diff -Nru sundials-5.7.0+dfsg/cmake/macros/SundialsAddLibrary.cmake sundials-5.8.0+dfsg/cmake/macros/SundialsAddLibrary.cmake --- sundials-5.7.0+dfsg/cmake/macros/SundialsAddLibrary.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/macros/SundialsAddLibrary.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -357,6 +357,15 @@ endif() endif() + # -------------------------------------------------------------------------- + # List of installed SUNDIALS components + # -------------------------------------------------------------------------- + + if(NOT sundials_add_library_OBJECT_LIB_ONLY) + string(REPLACE "sundials_" "" _comp_name "${target}") + set(_SUNDIALS_INSTALLED_COMPONENTS "${_comp_name};${_SUNDIALS_INSTALLED_COMPONENTS}" CACHE INTERNAL "" FORCE) + endif() + endmacro(sundials_add_library) diff -Nru sundials-5.7.0+dfsg/cmake/macros/SundialsCMakeMacros.cmake sundials-5.8.0+dfsg/cmake/macros/SundialsCMakeMacros.cmake --- sundials-5.7.0+dfsg/cmake/macros/SundialsCMakeMacros.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/macros/SundialsCMakeMacros.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -65,7 +65,7 @@ message(${_mode} ${MSG}) endmacro() -# Macro to print error messages. Takes +# Macro to print error messages. macro(print_error message) set(options ) @@ -111,9 +111,28 @@ list2string(tmp_list ${example_string}) endmacro(EXAMPLES2STRING) +# Sets the SUNDIALS_GIT_VERSION variable + +function(sundials_git_version) + find_package(Git QUIET) + + set(_tmp "") + + if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/.git AND ${GIT_FOUND}) + execute_process(COMMAND git describe --abbrev=12 --dirty --always --tags + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE _tmp) + string(STRIP "${_tmp}" _tmp) + endif() + + set(SUNDIALS_GIT_VERSION "${_tmp}" CACHE INTERNAL "") + unset(_tmp) +endfunction() + # Macros from other files + include(SundialsAddLibrary) include(SundialsAddTest) include(SundialsAddTestInstall) include(SundialsInstallExamples) -include(SundialsOption) \ No newline at end of file +include(SundialsOption) diff -Nru sundials-5.7.0+dfsg/cmake/macros/SundialsInstallExamples.cmake sundials-5.8.0+dfsg/cmake/macros/SundialsInstallExamples.cmake --- sundials-5.7.0+dfsg/cmake/macros/SundialsInstallExamples.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/macros/SundialsInstallExamples.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -40,6 +40,8 @@ # # The SUNDIALS_TARGETS option is a list of CMake targets in the SUNDIALS:: namespace that the examples need to be linked to. # +# The OTHER_TARGETS option is a list of CMake targets that the examples need to be linked to. +# # The EXAMPLE_DEPENDENCIES option is a list of additional source files that the examples are dependent on. # # The EXTRA_FILES option is a list of files to install that are not example source code. @@ -51,7 +53,7 @@ set(options ) set(oneValueArgs SOLVER_LIBRARY DESTINATION CMAKE_TEMPLATE MAKE_TEMPLATE TEST_INSTALL) - set(multiValueArgs SUNDIALS_TARGETS EXAMPLES_DEPENDENCIES EXTRA_FILES EXTRA_INCLUDES) + set(multiValueArgs SUNDIALS_TARGETS OTHER_TARGETS EXAMPLES_DEPENDENCIES EXTRA_FILES EXTRA_INCLUDES) # Parse keyword arguments/options cmake_parse_arguments(sundials_install_examples @@ -81,10 +83,18 @@ string(TOUPPER "${MODULE}" SOLVER) set(SOLVER_LIB "${sundials_install_examples_SOLVER_LIBRARY}") set(EXAMPLES_DEPENDENCIES "${sundials_install_examples_EXAMPLES_DEPENDENCIES}") - set(EXAMPLES_CMAKE_TARGETS "${sundials_install_examples_SUNDIALS_TARGETS}") set(EXTRA_INCLUDES "${sundials_install_examples_EXTRA_INCLUDES}") examples2string(${EXAMPLES_VAR} EXAMPLES) + set(target_list "") + foreach(target ${sundials_install_examples_SUNDIALS_TARGETS}) + list(APPEND target_list SUNDIALS::${target}) + endforeach() + foreach(target ${sundials_install_examples_OTHER_TARGETS}) + list(APPEND target_list ${target}) + endforeach() + list2string(target_list EXAMPLES_CMAKE_TARGETS) + # Regardless of the platform we're on, we will generate and install # CMakeLists.txt file for building the examples. This file can then # be used as a template for the user's own programs. @@ -125,4 +135,4 @@ sundials_add_test_install(${MODULE} ${sundials_install_examples_TEST_INSTALL}) endif() -endmacro() \ No newline at end of file +endmacro() diff -Nru sundials-5.7.0+dfsg/cmake/SundialsBuildOptionsPost.cmake sundials-5.8.0+dfsg/cmake/SundialsBuildOptionsPost.cmake --- sundials-5.7.0+dfsg/cmake/SundialsBuildOptionsPost.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/SundialsBuildOptionsPost.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -20,10 +20,10 @@ # Currently only available in CVODE. # --------------------------------------------------------------- -sundials_option(SUNDIALS_BUILD_PACKAGE_FUSED_KERNELS BOOL "Build specialized fused CUDA kernels" OFF - DEPENDS_ON ENABLE_CUDA CMAKE_CUDA_COMPILER BUILD_CVODE +sundials_option(SUNDIALS_BUILD_PACKAGE_FUSED_KERNELS BOOL "Build specialized fused GPU kernels" OFF + DEPENDS_ON BUILD_CVODE DEPENDS_ON_THROW_ERROR - SHOW_IF ENABLE_CUDA CMAKE_CUDA_COMPILER BUILD_CVODE) + SHOW_IF BUILD_CVODE) # --------------------------------------------------------------- # Options to enable/disable build for NVECTOR modules. @@ -124,16 +124,21 @@ ADVANCED) list(APPEND SUNDIALS_BUILD_LIST "BUILD_SUNMATRIX_CUSPARSE") -sundials_option(BUILD_SUNMATRIX_SLUNRLOC BOOL "Build the SUNMATRIX_SLUNRLOC module (requires SuperLU_DIST)" ON - DEPENDS_ON ENABLE_SUPERLUDIST SUPERLUDIST_WORKS - ADVANCED) -list(APPEND SUNDIALS_BUILD_LIST "BUILD_SUNMATRIX_SLUNRLOC") - sundials_option(BUILD_SUNMATRIX_MAGMADENSE BOOL "Build the SUNMATRIX_MAGMADENSE module (requires MAGMA)" ON DEPENDS_ON ENABLE_MAGMA MAGMA_WORKS ADVANCED) list(APPEND SUNDIALS_BUILD_LIST "BUILD_SUNMATRIX_MAGMADENSE") +sundials_option(BUILD_SUNMATRIX_ONEMKLDENSE BOOL "Build the SUNMATRIX_ONEMKLDENSE module (requires oneMKL)" ON + DEPENDS_ON ENABLE_ONEMKL ONEMKL_WORKS + ADVANCED) +list(APPEND SUNDIALS_BUILD_LIST "BUILD_SUNMATRIX_ONEMKLDENSE") + +sundials_option(BUILD_SUNMATRIX_SLUNRLOC BOOL "Build the SUNMATRIX_SLUNRLOC module (requires SuperLU_DIST)" ON + DEPENDS_ON ENABLE_SUPERLUDIST SUPERLUDIST_WORKS + ADVANCED) +list(APPEND SUNDIALS_BUILD_LIST "BUILD_SUNMATRIX_SLUNRLOC") + # --------------------------------------------------------------- # Options to enable/disable build for SUNLINSOL modules. # --------------------------------------------------------------- @@ -179,6 +184,11 @@ ADVANCED) list(APPEND SUNDIALS_BUILD_LIST "BUILD_SUNLINSOL_MAGMADENSE") +sundials_option(BUILD_SUNLINSOL_ONEMKLDENSE BOOL "Build the SUNLINSOL_ONEMKLDENSE module (requires oneMKL)" ON + DEPENDS_ON ENABLE_ONEMKL ONEMKL_WORKS + ADVANCED) +list(APPEND SUNDIALS_BUILD_LIST "BUILD_SUNLINSOL_ONEMKLDENSE") + sundials_option(BUILD_SUNLINSOL_SUPERLUDIST BOOL "Build the SUNLINSOL_SUPERLUDIST module (requires SUPERLUDIST)" ON DEPENDS_ON ENABLE_SUPERLUDIST SUPERLUDIST_WORKS BUILD_SUNMATRIX_SLUNRLOC ADVANCED) diff -Nru sundials-5.7.0+dfsg/cmake/SUNDIALSConfig.cmake.in sundials-5.8.0+dfsg/cmake/SUNDIALSConfig.cmake.in --- sundials-5.7.0+dfsg/cmake/SUNDIALSConfig.cmake.in 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/SUNDIALSConfig.cmake.in 2021-09-30 19:05:25.000000000 +0000 @@ -12,10 +12,34 @@ # SUNDIALS Copyright End # --------------------------------------------------------------- +@PACKAGE_INIT@ + include(CMakeFindDependencyMacro) + +### ------- Set FOUND status for SUNDIALS components + +set(_installed_components "@_SUNDIALS_INSTALLED_COMPONENTS@") + +set(_comp_not_found "") +foreach(_comp ${SUNDIALS_FIND_COMPONENTS}) + if(_comp IN_LIST _installed_components) + set(SUNDIALS_${_comp}_FOUND TRUE) + else() + set(SUNDIALS_${_comp}_FOUND FALSE) + set(_comp_not_found "${_comp} ${_comp_not_found}") + endif() +endforeach() + +if(_comp_not_found) + set(SUNDIALS_NOT_FOUND_MESSAGE "Component(s) not found: ${_comp_not_found}") +endif() + +### ------- Import SUNDIALS targets + include("${CMAKE_CURRENT_LIST_DIR}/SUNDIALSTargets.cmake") ### ------- Alias targets + set(_SUNDIALS_ALIAS_TARGETS "@_SUNDIALS_ALIAS_TARGETS@") foreach(ptr ${_SUNDIALS_ALIAS_TARGETS}) string(REGEX REPLACE "sundials_" "" ptr "${ptr}") @@ -23,20 +47,22 @@ _matches "${ptr}") set(_pointer ${CMAKE_MATCH_1}) set(_pointee ${CMAKE_MATCH_2}) - set_target_properties(SUNDIALS::${_pointee} PROPERTIES IMPORTED_GLOBAL TRUE) - add_library(SUNDIALS::${_pointer} ALIAS SUNDIALS::${_pointee}) + if(NOT TARGET SUNDIALS::${_pointer}) + add_library(SUNDIALS::${_pointer} INTERFACE IMPORTED) + target_link_libraries(SUNDIALS::${_pointer} INTERFACE SUNDIALS::${_pointee}) + endif() endforeach() ### ------- Create TPL imported targets if(@ENABLE_HYPRE@ AND NOT TARGET SUNDIALS::HYPRE) - add_library(SUNDIALS::HYPRE INTERFACE IMPORTED GLOBAL) + add_library(SUNDIALS::HYPRE INTERFACE IMPORTED) target_link_libraries(SUNDIALS::HYPRE INTERFACE "@HYPRE_LIBRARIES@") set_target_properties(SUNDIALS::HYPRE PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "@HYPRE_INCLUDE_DIR@") endif() if(@ENABLE_KLU@ AND NOT TARGET SUNDIALS::KLU) - add_library(SUNDIALS::KLU INTERFACE IMPORTED GLOBAL) + add_library(SUNDIALS::KLU INTERFACE IMPORTED) target_link_libraries(SUNDIALS::KLU INTERFACE "@KLU_LIBRARIES@") set_target_properties(SUNDIALS::KLU PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "@KLU_INCLUDE_DIR@") endif() @@ -93,19 +119,23 @@ endif() if(@ENABLE_MAGMA@ AND NOT TARGET SUNDIALS::MAGMA) - add_library(SUNDIALS::MAGMA INTERFACE IMPORTED GLOBAL) + add_library(SUNDIALS::MAGMA INTERFACE IMPORTED) target_link_libraries(SUNDIALS::MAGMA INTERFACE "@MAGMA_LIBRARIES@") set_target_properties(SUNDIALS::MAGMA PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "@MAGMA_INCLUDE_DIR@") endif() +if(@ENABLE_ONEMKL@ AND NOT TARGET MKL) + find_package(MKL PATHS @ONEMKL_DIR@) +endif() + if(@ENABLE_SUPERLUDIST@ AND NOT TARGET SUNDIALS::SUPERLUDIST) - add_library(SUNDIALS::SUPERLUDIST INTERFACE IMPORTED GLOBAL) + add_library(SUNDIALS::SUPERLUDIST INTERFACE IMPORTED) target_link_libraries(SUNDIALS::SUPERLUDIST INTERFACE "@SUPERLUDIST_LIBRARIES@") set_target_properties(SUNDIALS::SUPERLUDIST PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "@SUPERLUDIST_INCLUDE_DIR@") endif() if(@ENABLE_SUPERLUMT@ AND NOT TARGET SUNDIALS::SUPERLUMT) - add_library(SUNDIALS::SUPERLUMT INTERFACE IMPORTED GLOBAL) + add_library(SUNDIALS::SUPERLUMT INTERFACE IMPORTED) target_link_libraries(SUNDIALS::SUPERLUMT INTERFACE "@SUPERLUMT_LIBRARIES@") set_target_properties(SUNDIALS::SUPERLUMT PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "@SUPERLUMT_INCLUDE_DIR@") endif() @@ -115,13 +145,17 @@ endif() if(@ENABLE_TRILINOS@ AND NOT TARGET SUNDIALS::TRILINOS) - add_library(SUNDIALS::TRILINOS INTERFACE IMPORTED GLOBAL) + add_library(SUNDIALS::TRILINOS INTERFACE IMPORTED) target_link_libraries(SUNDIALS::TRILINOS INTERFACE "@Trilinos_LIBRARIES@") set_target_properties(SUNDIALS::TRILINOS PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "@Trilinos_INCLUDE_DIRS@") endif() if(@ENABLE_XBRAID@ AND NOT TARGET SUNDIALS::XBRAID) - add_library(SUNDIALS::XBRAID INTERFACE IMPORTED GLOBAL) + add_library(SUNDIALS::XBRAID INTERFACE IMPORTED) target_link_libraries(SUNDIALS::XBRAID INTERFACE "@XBRAID_LIBRARIES@") set_target_properties(SUNDIALS::XBRAID PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "@XBRAID_INCLUDE_DIR@") endif() + +### ------- Check if required components were found + +check_required_components(SUNDIALS) diff -Nru sundials-5.7.0+dfsg/cmake/SundialsExampleOptions.cmake sundials-5.8.0+dfsg/cmake/SundialsExampleOptions.cmake --- sundials-5.7.0+dfsg/cmake/SundialsExampleOptions.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/SundialsExampleOptions.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -22,7 +22,8 @@ # Some TPLs only have C++ examples. Default the C++ examples to ON if any of # these are enabled on the initial configuration pass. -if (ENABLE_TRILINOS OR ENABLE_SUPERLUDIST OR ENABLE_XBRAID OR ENABLE_HIP OR ENABLE_MAGMA) +if (ENABLE_TRILINOS OR ENABLE_SUPERLUDIST OR ENABLE_XBRAID OR ENABLE_HIP OR + ENABLE_MAGMA OR ENABLE_SYCL OR ENABLE_ONEMKL OR ENABLE_RAJA) sundials_option(EXAMPLES_ENABLE_CXX BOOL "Build SUNDIALS C++ examples" ON) else() sundials_option(EXAMPLES_ENABLE_CXX BOOL "Build SUNDIALS C++ examples" OFF) diff -Nru sundials-5.7.0+dfsg/cmake/SundialsSetupCompilers.cmake sundials-5.8.0+dfsg/cmake/SundialsSetupCompilers.cmake --- sundials-5.7.0+dfsg/cmake/SundialsSetupCompilers.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/SundialsSetupCompilers.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -214,10 +214,21 @@ endif() # =============================================================== +# Default flags for build types +# =============================================================== + +set(CMAKE_C_FLAGS_DEV "${CMAKE_C_FLAGS_DEV} -g -O0 -Wall -Wpedantic -Wextra -Wno-unused-parameter -Werror") +set(CMAKE_CXX_FLAGS_DEV "${CMAKE_CXX_FLAGS_DEV} -g -O0 -Wall -Wpedantic -Wextra -Wno-unused-parameter -Werror") +set(CMAKE_Fortran_FLAGS_DEV "${CMAKE_Fortran_FLAGS_DEV} -g -O0 -Wall -Wpedantic -ffpe-summary=none") +set(CMAKE_C_FLAGS_DEVSTRICT "-std=c89 ${CMAKE_C_FLAGS_DEV}") +set(CMAKE_CXX_FLAGS_DEVSTRICT "${CMAKE_CXX_FLAGS_DEV}") +set(CMAKE_Fortran_FLAGS_DEVSTRICT "${CMAKE_Fortran_FLAGS_DEV}") + +# =============================================================== # Configure presentation of language options # =============================================================== -set(build_types DEBUG RELEASE RELWITHDEBINFO MINSIZEREL) +set(build_types DEBUG RELEASE RELWITHDEBINFO MINSIZEREL DEV DEVSTRICT) set(_SUNDIALS_ENABLED_LANGS "C") if(CXX_FOUND) diff -Nru sundials-5.7.0+dfsg/cmake/SundialsSetupCXX.cmake sundials-5.8.0+dfsg/cmake/SundialsSetupCXX.cmake --- sundials-5.7.0+dfsg/cmake/SundialsSetupCXX.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/SundialsSetupCXX.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -28,4 +28,9 @@ sundials_option(CMAKE_CXX_STANDARD STRING "${DOCSTR}" "11" OPTIONS "98;11;14;17;20") +# SYCL requries C++17 +if(ENABLE_SYCL AND (CMAKE_CXX_STANDARD LESS "17")) + set(CMAKE_CXX_STANDARD "17" CACHE STRING "${DOCSTR}" FORCE) +endif() + message(STATUS "CXX standard set to ${CMAKE_CXX_STANDARD}") diff -Nru sundials-5.7.0+dfsg/cmake/SundialsSetupFortran.cmake sundials-5.8.0+dfsg/cmake/SundialsSetupFortran.cmake --- sundials-5.7.0+dfsg/cmake/SundialsSetupFortran.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/SundialsSetupFortran.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -257,7 +257,7 @@ "SET(CMAKE_C_FLAGS_RELWITHDEBUGINFO \"${CMAKE_C_FLAGS_RELWITHDEBUGINFO}\")\n" "SET(CMAKE_C_FLAGS_MINSIZE \"${CMAKE_C_FLAGS_MINSIZE}\")\n" "ADD_EXECUTABLE(ctest2 ctest2.c)\n" - "FIND_LIBRARY(FLIB flib ${FortranTest_DIR})\n" + "FIND_LIBRARY(FLIB flib \"${FortranTest_DIR}\")\n" "TARGET_LINK_LIBRARIES(ctest2 \${FLIB})\n") set(options my_sub my_sub_ my_sub__ MY_SUB MY_SUB_ MY_SUB__) @@ -336,4 +336,4 @@ message(STATUS "Determining Fortran name-mangling scheme... FAILED") endif() -endif() \ No newline at end of file +endif() diff -Nru sundials-5.7.0+dfsg/cmake/SundialsSetupTPLs.cmake sundials-5.8.0+dfsg/cmake/SundialsSetupTPLs.cmake --- sundials-5.7.0+dfsg/cmake/SundialsSetupTPLs.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/SundialsSetupTPLs.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -69,6 +69,15 @@ endif() # --------------------------------------------------------------- +# Find (and test) the oneMKL libraries +# --------------------------------------------------------------- + +if(ENABLE_ONEMKL) + include(SundialsONEMKL) + list(APPEND SUNDIALS_TPL_LIST "ONEMKL") +endif() + +# --------------------------------------------------------------- # Find (and test) the SuperLUDIST libraries # --------------------------------------------------------------- @@ -144,4 +153,4 @@ # Check for POSIX timers # --------------------------------------------------------------- -include(SundialsPOSIXTimers) \ No newline at end of file +include(SundialsPOSIXTimers) diff -Nru sundials-5.7.0+dfsg/cmake/SundialsTPLOptions.cmake sundials-5.8.0+dfsg/cmake/SundialsTPLOptions.cmake --- sundials-5.7.0+dfsg/cmake/SundialsTPLOptions.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/SundialsTPLOptions.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -81,7 +81,7 @@ sundials_option(MAGMA_DIR PATH "Path to the root of a MAGMA installation" "${MAGMA_DIR}" SHOW_IF ENABLE_MAGMA) -sundials_option(SUNDIALS_MAGMA_BACKENDS STRING "Which MAGMA backend under the SUNDIALS MAGMA interfaces (CUDA, HIP)" "CUDA" +sundials_option(SUNDIALS_MAGMA_BACKENDS STRING "Which MAGMA backend to use under the SUNDIALS MAGMA interfaces (CUDA, HIP)" "CUDA" OPTIONS "CUDA;HIP" SHOW_IF ENABLE_MAGMA) @@ -193,8 +193,8 @@ sundials_option(RAJA_DIR PATH "Path to root of RAJA installation" "${RAJA_DIR}" SHOW_IF ENABLE_RAJA) -sundials_option(SUNDIALS_RAJA_BACKENDS STRING "Which RAJA backend under the SUNDIALS RAJA interfaces (CUDA, HIP)" "CUDA" - OPTIONS "CUDA;HIP" +sundials_option(SUNDIALS_RAJA_BACKENDS STRING "Which RAJA backend under the SUNDIALS RAJA interfaces (CUDA, HIP, SYCL)" "CUDA" + OPTIONS "CUDA;HIP;SYCL" SHOW_IF ENABLE_RAJA) # --------------------------------------------------------------- @@ -252,5 +252,15 @@ ADVANCED) sundials_option(XBRAID_WORKS BOOL "Set to ON to force CMake to accept a given XBraid configuration" OFF - DEPENDS_ON ENABLE_XBRAID + SHOW_IF ENABLE_XBRAID + ADVANCED) + +# ------------------------------------------------------------- +# Enable oneMKL support? +# ------------------------------------------------------------- + +sundials_option(ENABLE_ONEMKL BOOL "Enable oneMKL support" OFF) + +sundials_option(ONEMKL_WORKS BOOL "Set to ON to force CMake to accept a given oneMKL configuration" OFF + SHOW_IF ENABLE_ONEMKL ADVANCED) diff -Nru sundials-5.7.0+dfsg/cmake/tpl/FindMAGMA.cmake sundials-5.8.0+dfsg/cmake/tpl/FindMAGMA.cmake --- sundials-5.7.0+dfsg/cmake/tpl/FindMAGMA.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/tpl/FindMAGMA.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -66,7 +66,8 @@ set(_interface_libraires ) foreach(lib ${_libraries_list}) if(NOT (lib STREQUAL "-lmagma" OR lib STREQUAL "-lmagma_sparse" OR lib STREQUAL "-L\${libdir}" OR lib STREQUAL "") ) - string(REPLACE "-l" "" lib ${lib}) + # remove -l only from the beginning of the string + string(REPLACE "^-l" "" lib ${lib}) list(APPEND _interface_libraires ${lib}) endif() endforeach() diff -Nru sundials-5.7.0+dfsg/cmake/tpl/SundialsONEMKL.cmake sundials-5.8.0+dfsg/cmake/tpl/SundialsONEMKL.cmake --- sundials-5.7.0+dfsg/cmake/tpl/SundialsONEMKL.cmake 1970-01-01 00:00:00.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/tpl/SundialsONEMKL.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -0,0 +1,70 @@ +# ----------------------------------------------------------------------------- +# Programmer(s): David J. Gardner @ LLNL +# ----------------------------------------------------------------------------- +# SUNDIALS Copyright Start +# Copyright (c) 2002-2021, Lawrence Livermore National Security +# and Southern Methodist University. +# All rights reserved. +# +# See the top-level LICENSE and NOTICE files for details. +# +# SPDX-License-Identifier: BSD-3-Clause +# SUNDIALS Copyright End +# ----------------------------------------------------------------------------- +# Module to find and setup oneMKL correctly. +# Created from the SundialsTPL.cmake template. +# All SUNDIALS modules that find and setup a TPL must: +# +# 1. Check to make sure the SUNDIALS configuration and the TPL is compatible. +# 2. Find the TPL. +# 3. Check if the TPL works with SUNDIALS, UNLESS the override option +# TPL_WORKS is TRUE - in this case the tests should not be performed and it +# should be assumed that the TPL works with SUNDIALS. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# Section 1: Include guard +# ----------------------------------------------------------------------------- + +if(NOT DEFINED SUNDIALS_ONEMKL_INCLUDED) + set(SUNDIALS_ONEMKL_INCLUDED) +else() + return() +endif() + +# ----------------------------------------------------------------------------- +# Section 2: Check to make sure options are compatible +# ----------------------------------------------------------------------------- + +# oneMKL does not support extended precision +if(SUNDIALS_PRECISION MATCHES "EXTENDED") + message(FATAL_ERROR + "oneMKL is not compatible with ${SUNDIALS_PRECISION} precision") +endif() + +# oneMKL does not support 32-bit index sizes +if(SUNDIALS_INDEX_SIZE MATCHES "32") + message(FATAL_ERROR + "oneMKL is not compatible with ${SUNDIALS_INDEX_SIZE}-bit indices") +endif() + +# ----------------------------------------------------------------------------- +# Section 3: Find the TPL +# ----------------------------------------------------------------------------- + +# Look for CMake configuration file in oneMKL installation +find_package(MKL CONFIG + PATHS "${ONEMKL_DIR}" "${ONEMKL_DIR}/lib/cmake/mkl" + NO_DEFAULT_PATH + REQUIRED) + +# ----------------------------------------------------------------------------- +# Section 4: Test the TPL +# ----------------------------------------------------------------------------- + +if(MKL_FOUND AND (NOT ONEMKL_WORKS)) + message(STATUS "Checking if oneMKL works... OK") + set(ONEMKL_WORKS TRUE CACHE BOOL "oneMKL works with SUNDIALS as configured" FORCE) +else() + message(STATUS "Skipped oneMKL tests, assuming oneMKL works with SUNDIALS.") +endif() diff -Nru sundials-5.7.0+dfsg/cmake/tpl/SundialsRAJA.cmake sundials-5.8.0+dfsg/cmake/tpl/SundialsRAJA.cmake --- sundials-5.7.0+dfsg/cmake/tpl/SundialsRAJA.cmake 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/cmake/tpl/SundialsRAJA.cmake 2021-09-30 19:05:25.000000000 +0000 @@ -36,6 +36,18 @@ # Section 2: Check to make sure options are compatible # ----------------------------------------------------------------------------- +if((SUNDIALS_RAJA_BACKENDS MATCHES "CUDA") AND (NOT ENABLE_CUDA)) + message(FATAL_ERROR "RAJA with a CUDA backend requires ENABLE_CUDA = ON") +endif() + +if((SUNDIALS_RAJA_BACKENDS MATCHES "HIP") AND (NOT ENABLE_HIP)) + message(FATAL_ERROR "RAJA with a HIP backend requires ENABLE_HIP = ON") +endif() + +if((SUNDIALS_RAJA_BACKENDS MATCHES "SYCL") AND (NOT ENABLE_SYCL)) + message(FATAL_ERROR "RAJA with a SYCL backend requires ENABLE_SYCL = ON") +endif() + # ----------------------------------------------------------------------------- # Section 3: Find the TPL # ----------------------------------------------------------------------------- @@ -54,7 +66,7 @@ REQUIRED) # determine the backends -foreach(_backend CUDA HIP OPENMP TARGET_OPENMP) +foreach(_backend CUDA HIP OPENMP TARGET_OPENMP SYCL) file(STRINGS "${RAJA_CONFIGHPP_PATH}" _raja_has_backend REGEX "^#define RAJA_ENABLE_${_backend}\$") if(_raja_has_backend) set(RAJA_BACKENDS "${_backend};${RAJA_BACKENDS}") @@ -85,3 +97,8 @@ if(NOT ENABLE_OPENMP_DEVICE AND RAJA_BACKENDS MATCHES "TARGET_OPENMP") print_error("RAJA was built with OpenMP device offloading, but OpenMP with device offloading is not enabled. Set ENABLE_OPENMP_DEVICE to ON.") endif() + +if((SUNDIALS_RAJA_BACKENDS MATCHES "SYCL") AND + (NOT RAJA_BACKENDS MATCHES "SYCL")) + print_error("Requested that SUNDIALS uses the SYCL RAJA backend, but RAJA was not built with the SYCL backend.") +endif() diff -Nru sundials-5.7.0+dfsg/CMakeLists.txt sundials-5.8.0+dfsg/CMakeLists.txt --- sundials-5.7.0+dfsg/CMakeLists.txt 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/CMakeLists.txt 2021-09-30 19:05:25.000000000 +0000 @@ -25,16 +25,31 @@ # sets PROJECT_SOURCE_DIR and PROJECT_BINARY_DIR variables. project(SUNDIALS C) +# Specify the location of additional CMAKE modules +set(CMAKE_MODULE_PATH + ${PROJECT_SOURCE_DIR}/cmake + ${PROJECT_SOURCE_DIR}/cmake/macros + ${PROJECT_SOURCE_DIR}/cmake/tpl + ) + +# MACRO definitions +include(SundialsCMakeMacros) +include(FindPackageHandleStandardArgs) +include(CMakePrintHelpers) + # Set some variables with info on the SUNDIALS project set(PACKAGE_BUGREPORT "sundials-users@llnl.gov") set(PACKAGE_NAME "SUNDIALS") -set(PACKAGE_STRING "SUNDIALS 5.7.0") +set(PACKAGE_STRING "SUNDIALS 5.8.0") set(PACKAGE_TARNAME "sundials") -# set SUNDIALS version numbers +# Set SUNDIALS version numbers +sundials_git_version() # sets SUNDIALS_GIT_VERSION +message(STATUS "SUNDIALS_GIT_VERSION: ${SUNDIALS_GIT_VERSION}") + # (use "" for the version label if none is needed) set(PACKAGE_VERSION_MAJOR "5") -set(PACKAGE_VERSION_MINOR "7") +set(PACKAGE_VERSION_MINOR "8") set(PACKAGE_VERSION_PATCH "0") set(PACKAGE_VERSION_LABEL "") @@ -50,37 +65,37 @@ # Specify the VERSION and SOVERSION for shared libraries -set(arkodelib_VERSION "4.7.0") +set(arkodelib_VERSION "4.8.0") set(arkodelib_SOVERSION "4") -set(cvodelib_VERSION "5.7.0") +set(cvodelib_VERSION "5.8.0") set(cvodelib_SOVERSION "5") -set(cvodeslib_VERSION "5.7.0") +set(cvodeslib_VERSION "5.8.0") set(cvodeslib_SOVERSION "5") -set(idalib_VERSION "5.7.0") +set(idalib_VERSION "5.8.0") set(idalib_SOVERSION "5") -set(idaslib_VERSION "4.7.0") +set(idaslib_VERSION "4.8.0") set(idaslib_SOVERSION "4") -set(kinsollib_VERSION "5.7.0") +set(kinsollib_VERSION "5.8.0") set(kinsollib_SOVERSION "5") set(cpodeslib_VERSION "0.0.0") set(cpodeslib_SOVERSION "0") -set(nveclib_VERSION "5.7.0") +set(nveclib_VERSION "5.8.0") set(nveclib_SOVERSION "5") -set(sunmatrixlib_VERSION "3.7.0") +set(sunmatrixlib_VERSION "3.8.0") set(sunmatrixlib_SOVERSION "3") -set(sunlinsollib_VERSION "3.7.0") +set(sunlinsollib_VERSION "3.8.0") set(sunlinsollib_SOVERSION "3") -set(sunnonlinsollib_VERSION "2.7.0") +set(sunnonlinsollib_VERSION "2.8.0") set(sunnonlinsollib_SOVERSION "2") set(sundialslib_VERSION @@ -89,20 +104,6 @@ set(sundialslib_SOVERSION "${PACKAGE_VERSION_MAJOR}") # =============================================================== -# SUNDIALS CMake Macros -# =============================================================== - -# Specify the location of additional CMAKE modules -set(CMAKE_MODULE_PATH - ${PROJECT_SOURCE_DIR}/cmake - ${PROJECT_SOURCE_DIR}/cmake/macros - ${PROJECT_SOURCE_DIR}/cmake/tpl - ) - -# MACRO definitions -include(SundialsCMakeMacros) - -# =============================================================== # Initial Setup # =============================================================== @@ -192,8 +193,6 @@ add_subdirectory(examples) endif() - - # =============================================================== # Install configuration header files and license file. # =============================================================== @@ -254,9 +253,10 @@ ) # install SUNDIALSConfig.cmake -configure_file( - ${PROJECT_SOURCE_DIR}/cmake/SUNDIALSConfig.cmake.in SUNDIALSConfig.cmake - @ONLY +configure_package_config_file( + "${PROJECT_SOURCE_DIR}/cmake/SUNDIALSConfig.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/SUNDIALSConfig.cmake" + INSTALL_DESTINATION "${SUNDIALS_INSTALL_CMAKEDIR}" ) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/SUNDIALSConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/SUNDIALSConfigVersion.cmake" diff -Nru sundials-5.7.0+dfsg/debian/changelog sundials-5.8.0+dfsg/debian/changelog --- sundials-5.7.0+dfsg/debian/changelog 2021-07-06 07:23:34.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/changelog 2021-10-31 12:36:26.000000000 +0000 @@ -1,8 +1,26 @@ -sundials (5.7.0+dfsg-1~exp1ubuntu1) impish; urgency=medium +sundials (5.8.0+dfsg-1) unstable; urgency=medium - * Link pthread to avoid FTBFS with LTO enabled + * [d4636af] Add simple autopkgtest + * Upload into unstable - -- Graham Inggs Tue, 06 Jul 2021 07:23:34 +0000 + -- Anton Gladky Sun, 31 Oct 2021 13:36:26 +0100 + +sundials (5.8.0+dfsg-1~exp1) experimental; urgency=medium + + [ Drew Parsons ] + * [1918f64] debian/rules: configure for change of hypre lib from + libHYPRE_core.so to libHYPRE.so from hypre 2.20. + + [ Anton Gladky ] + * [e38fe53] New upstream version 5.8.0+dfsg + + -- Anton Gladky Sat, 23 Oct 2021 23:07:30 +0200 + +sundials (5.7.0+dfsg-1) unstable; urgency=medium + + * Upload into unstable + + -- Anton Gladky Fri, 20 Aug 2021 21:51:43 +0200 sundials (5.7.0+dfsg-1~exp1) experimental; urgency=medium @@ -24,6 +42,12 @@ -- Anton Gladky Sat, 27 Mar 2021 21:56:00 +0100 +sundials (4.1.0+dfsg-4) unstable; urgency=medium + + * [5c80d16] Install libsundials_*sunnonlinsol*.so.*. (Closes: #988551) + + -- Anton Gladky Sat, 15 May 2021 16:51:20 +0200 + sundials (4.1.0+dfsg-3) unstable; urgency=medium * Team upload. diff -Nru sundials-5.7.0+dfsg/debian/control sundials-5.8.0+dfsg/debian/control --- sundials-5.7.0+dfsg/debian/control 2021-07-06 07:23:34.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/control 2021-08-20 18:47:38.000000000 +0000 @@ -1,6 +1,5 @@ Source: sundials -Maintainer: Ubuntu Developers -XSBC-Original-Maintainer: Debian Science Team +Maintainer: Debian Science Team Uploaders: Dima Kogan , James Tocknell , Anton Gladky diff -Nru sundials-5.7.0+dfsg/debian/patches/0002-Fix-library-paths-for-multiarch.patch sundials-5.8.0+dfsg/debian/patches/0002-Fix-library-paths-for-multiarch.patch --- sundials-5.7.0+dfsg/debian/patches/0002-Fix-library-paths-for-multiarch.patch 2020-12-20 13:20:47.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/patches/0002-Fix-library-paths-for-multiarch.patch 2021-10-28 21:22:31.000000000 +0000 @@ -6,13 +6,13 @@ CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) -Index: sundials-5.7.0/CMakeLists.txt +Index: sundials-5.8.0/CMakeLists.txt =================================================================== ---- sundials-5.7.0.orig/CMakeLists.txt -+++ sundials-5.7.0/CMakeLists.txt -@@ -25,6 +25,10 @@ cmake_minimum_required(VERSION 3.12) - # sets PROJECT_SOURCE_DIR and PROJECT_BINARY_DIR variables. - project(SUNDIALS C) +--- sundials-5.8.0.orig/CMakeLists.txt ++++ sundials-5.8.0/CMakeLists.txt +@@ -37,6 +37,10 @@ include(SundialsCMakeMacros) + include(FindPackageHandleStandardArgs) + include(CMakePrintHelpers) +# Get correct build paths automatically + diff -Nru sundials-5.7.0+dfsg/debian/patches/0003-PETSc-is-now-findable-by-CMake.patch sundials-5.8.0+dfsg/debian/patches/0003-PETSc-is-now-findable-by-CMake.patch --- sundials-5.7.0+dfsg/debian/patches/0003-PETSc-is-now-findable-by-CMake.patch 2020-12-20 13:20:47.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/patches/0003-PETSc-is-now-findable-by-CMake.patch 2021-10-28 21:22:31.000000000 +0000 @@ -9,8 +9,10 @@ Reviewed-By: Last-Update: 2021-03-27 ---- sundials-5.7.0+dfsg.orig/cmake/tpl/FindPETSC.cmake -+++ sundials-5.7.0+dfsg/cmake/tpl/FindPETSC.cmake +Index: sundials-5.8.0/cmake/tpl/FindPETSC.cmake +=================================================================== +--- sundials-5.8.0.orig/cmake/tpl/FindPETSC.cmake ++++ sundials-5.8.0/cmake/tpl/FindPETSC.cmake @@ -1,777 +1,26 @@ -# ------------------------------------------------------------------------------ -# Programmer(s): Cody J. Balos and David J. Gardner @ LLNL @@ -814,8 +816,10 @@ +set(PETSC_LIBRARIES ${PETSC_LIBRARY} ) +set(PETSC_INCLUDE_DIRS ${PC_PETSC_INCLUDE_DIRS} ${MPI_INCLUDE_DIR}) +set(PETSC_FOUND TRUE) ---- sundials-5.7.0+dfsg.orig/cmake/tpl/SundialsPETSC.cmake -+++ sundials-5.7.0+dfsg/cmake/tpl/SundialsPETSC.cmake +Index: sundials-5.8.0/cmake/tpl/SundialsPETSC.cmake +=================================================================== +--- sundials-5.8.0.orig/cmake/tpl/SundialsPETSC.cmake ++++ sundials-5.8.0/cmake/tpl/SundialsPETSC.cmake @@ -57,6 +57,8 @@ message(STATUS "PETSC_INCLUDES: ${PET message(STATUS "PETSC_INDEX_SIZE: ${PETSC_INDEX_SIZE}") message(STATUS "PETSC_PRECISION: ${PETSC_PRECISION}\n") @@ -825,8 +829,10 @@ # ----------------------------------------------------------------------------- # Section 4: Test the TPL # ----------------------------------------------------------------------------- ---- sundials-5.7.0+dfsg.orig/src/nvector/petsc/CMakeLists.txt -+++ sundials-5.7.0+dfsg/src/nvector/petsc/CMakeLists.txt +Index: sundials-5.8.0/src/nvector/petsc/CMakeLists.txt +=================================================================== +--- sundials-5.8.0.orig/src/nvector/petsc/CMakeLists.txt ++++ sundials-5.8.0/src/nvector/petsc/CMakeLists.txt @@ -35,7 +35,7 @@ sundials_add_library(sundials_nvecpetsc OBJECT_LIBRARIES sundials_generic_obj @@ -836,8 +842,10 @@ OUTPUT_NAME sundials_nvecpetsc VERSION ---- sundials-5.7.0+dfsg.orig/src/sunnonlinsol/petscsnes/CMakeLists.txt -+++ sundials-5.7.0+dfsg/src/sunnonlinsol/petscsnes/CMakeLists.txt +Index: sundials-5.8.0/src/sunnonlinsol/petscsnes/CMakeLists.txt +=================================================================== +--- sundials-5.8.0.orig/src/sunnonlinsol/petscsnes/CMakeLists.txt ++++ sundials-5.8.0/src/sunnonlinsol/petscsnes/CMakeLists.txt @@ -34,7 +34,7 @@ sundials_add_library(sundials_sunnonlins OBJECT_LIBRARIES sundials_generic_obj diff -Nru sundials-5.7.0+dfsg/debian/patches/add_some_more_headers.patch sundials-5.8.0+dfsg/debian/patches/add_some_more_headers.patch --- sundials-5.7.0+dfsg/debian/patches/add_some_more_headers.patch 2021-03-27 18:29:58.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/patches/add_some_more_headers.patch 2021-10-28 21:22:31.000000000 +0000 @@ -23,8 +23,10 @@ Reviewed-By: Last-Update: 2021-03-27 ---- sundials-5.7.0+dfsg.orig/CMakeLists.txt -+++ sundials-5.7.0+dfsg/CMakeLists.txt +Index: sundials-5.8.0/CMakeLists.txt +=================================================================== +--- sundials-5.8.0.orig/CMakeLists.txt ++++ sundials-5.8.0/CMakeLists.txt @@ -266,3 +266,10 @@ install(FILES "${CMAKE_CURRENT_BINARY_DI "${CMAKE_CURRENT_BINARY_DIR}/SUNDIALSConfigVersion.cmake" DESTINATION "${SUNDIALS_INSTALL_CMAKEDIR}" diff -Nru sundials-5.7.0+dfsg/debian/patches/disable_test sundials-5.8.0+dfsg/debian/patches/disable_test --- sundials-5.7.0+dfsg/debian/patches/disable_test 2021-03-27 18:31:51.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/patches/disable_test 2021-10-28 21:22:31.000000000 +0000 @@ -24,10 +24,10 @@ Reviewed-By: Last-Update: 2021-03-27 -Index: sundials-5.7.0+dfsg/examples/sunnonlinsol/petsc/CMakeLists.txt +Index: sundials-5.8.0/examples/sunnonlinsol/petsc/CMakeLists.txt =================================================================== ---- sundials-5.7.0+dfsg.orig/examples/sunnonlinsol/petsc/CMakeLists.txt -+++ sundials-5.7.0+dfsg/examples/sunnonlinsol/petsc/CMakeLists.txt +--- sundials-5.8.0.orig/examples/sunnonlinsol/petsc/CMakeLists.txt ++++ sundials-5.8.0/examples/sunnonlinsol/petsc/CMakeLists.txt @@ -82,10 +82,10 @@ foreach(example_tuple ${examples}) endif() @@ -43,10 +43,10 @@ if(EXAMPLES_INSTALL) install(FILES ${shared_SOURCES} ${example}.c -Index: sundials-5.7.0+dfsg/examples/sunlinsol/klu/CMakeLists.txt +Index: sundials-5.8.0/examples/sunlinsol/klu/CMakeLists.txt =================================================================== ---- sundials-5.7.0+dfsg.orig/examples/sunlinsol/klu/CMakeLists.txt -+++ sundials-5.7.0+dfsg/examples/sunlinsol/klu/CMakeLists.txt +--- sundials-5.8.0.orig/examples/sunlinsol/klu/CMakeLists.txt ++++ sundials-5.8.0/examples/sunlinsol/klu/CMakeLists.txt @@ -87,10 +87,10 @@ foreach(example_tuple ${sunlinsol_klu_ex endif() diff -Nru sundials-5.7.0+dfsg/debian/patches/link-pthread.patch sundials-5.8.0+dfsg/debian/patches/link-pthread.patch --- sundials-5.7.0+dfsg/debian/patches/link-pthread.patch 2021-07-06 07:23:34.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/patches/link-pthread.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -Description: Link pthread to avoid FTFS with LTO enabled -Origin: vendor, https://build.opensuse.org/request/show/873097 -Author: Atri Bhattacharya -Last-Update: 2021-02-17 - ---- a/src/nvector/pthreads/CMakeLists.txt -+++ b/src/nvector/pthreads/CMakeLists.txt -@@ -26,6 +26,8 @@ - nvector - OBJECT_LIBRARIES - sundials_generic_obj -+ LINK_LIBRARIES -+ PRIVATE pthread - OUTPUT_NAME - sundials_nvecpthreads - VERSION diff -Nru sundials-5.7.0+dfsg/debian/patches/series sundials-5.8.0+dfsg/debian/patches/series --- sundials-5.7.0+dfsg/debian/patches/series 2021-07-06 07:20:23.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/patches/series 2021-08-20 18:47:38.000000000 +0000 @@ -3,4 +3,3 @@ 0003-PETSc-is-now-findable-by-CMake.patch disable_test add_some_more_headers.patch -link-pthread.patch diff -Nru sundials-5.7.0+dfsg/debian/rules sundials-5.8.0+dfsg/debian/rules --- sundials-5.7.0+dfsg/debian/rules 2021-03-27 19:07:38.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/rules 2021-10-28 21:22:31.000000000 +0000 @@ -5,6 +5,9 @@ include /usr/share/dpkg/architecture.mk +HYPRE_VERSION=$(shell dpkg -s libhypre-dev | grep Version | awk '{print $$2}') +HYPRE_LIB=$(shell if dpkg --compare-versions $(HYPRE_VERSION) lt 2.20.0-1exp3~ ; then echo HYPRE_core; else echo HYPRE; fi) + extra_flags += \ -DCMAKE_Fortran_COMPILER=gfortran \ -DBUILD_SHARED_LIBS:BOOL=ON \ @@ -30,7 +33,7 @@ \ -DENABLE_HYPRE:BOOL=ON \ -DHYPRE_INCLUDE_DIR=/usr/include/hypre \ - -DHYPRE_LIBRARY=-lHYPRE_core \ + -DHYPRE_LIBRARY=-l$(HYPRE_LIB) \ \ -DCMAKE_C_FLAGS=-fcommon diff -Nru sundials-5.7.0+dfsg/debian/tests/arkode sundials-5.8.0+dfsg/debian/tests/arkode --- sundials-5.7.0+dfsg/debian/tests/arkode 1970-01-01 00:00:00.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/tests/arkode 2021-10-28 21:22:31.000000000 +0000 @@ -0,0 +1,27 @@ +#!/bin/sh +# (C) 2021 Anton Gladky + +set -e + +WORKDIR=$(mktemp -d) +trap "rm -rf $WORKDIR" 0 INT QUIT ABRT PIPE TERM +mkdir -p $WORKDIR/src +mkdir -p $WORKDIR/build + +cp examples/arkode/CXX_serial/* $WORKDIR/src/ + +cd $WORKDIR/build +g++ ../src/ark_analytic_sys.cpp -lsundials_sunmatrixdense -lsundials_nvecserial -lsundials_arkode -o demo1 +g++ ../src/ark_heat2D.cpp -lsundials_sunmatrixdense -lsundials_nvecserial -lsundials_arkode -o demo2 + +echo "build: OK" + +[ -x demo1 ] +./demo1 + +echo "run: demo1 OK" + +[ -x demo2 ] +./demo2 + +echo "run: demo2 OK" diff -Nru sundials-5.7.0+dfsg/debian/tests/control sundials-5.8.0+dfsg/debian/tests/control --- sundials-5.7.0+dfsg/debian/tests/control 1970-01-01 00:00:00.000000000 +0000 +++ sundials-5.8.0+dfsg/debian/tests/control 2021-10-28 21:35:35.000000000 +0000 @@ -0,0 +1,2 @@ +Tests: arkode +Depends: build-essential, libsundials-dev diff -Nru sundials-5.7.0+dfsg/examples/arkode/C_parallel/ark_brusselator1D_task_local_nls.c sundials-5.8.0+dfsg/examples/arkode/C_parallel/ark_brusselator1D_task_local_nls.c --- sundials-5.7.0+dfsg/examples/arkode/C_parallel/ark_brusselator1D_task_local_nls.c 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/examples/arkode/C_parallel/ark_brusselator1D_task_local_nls.c 2021-09-30 19:05:25.000000000 +0000 @@ -1165,6 +1165,7 @@ SUNNonlinearSolver TaskLocalNewton(N_Vector y, FILE* DFID) { + void* tmp_comm; SUNNonlinearSolver NLS; TaskLocalNewton_Content content; @@ -1201,8 +1202,11 @@ NLS->content = content; /* Fill general content */ - content->comm = *((MPI_Comm*) N_VGetCommunicator(y)); - if (content->comm == NULL) { SUNNonlinSolFree(NLS); return NULL; } + tmp_comm = N_VGetCommunicator(y); + if (tmp_comm == NULL) { SUNNonlinSolFree(NLS); return NULL; } + + content->comm = *((MPI_Comm*) tmp_comm); + if (content->comm == MPI_COMM_NULL) { SUNNonlinSolFree(NLS); return NULL; } content->local_nls = SUNNonlinSol_Newton(N_VGetLocalVector_MPIPlusX(y)); if (content->local_nls == NULL) { SUNNonlinSolFree(NLS); return NULL; } diff -Nru sundials-5.7.0+dfsg/examples/arkode/C_serial/ark_analytic_mels.c sundials-5.8.0+dfsg/examples/arkode/C_serial/ark_analytic_mels.c --- sundials-5.7.0+dfsg/examples/arkode/C_serial/ark_analytic_mels.c 1970-01-01 00:00:00.000000000 +0000 +++ sundials-5.8.0+dfsg/examples/arkode/C_serial/ark_analytic_mels.c 2021-09-30 19:05:25.000000000 +0000 @@ -0,0 +1,325 @@ +/*----------------------------------------------------------------- + * Programmer(s): Daniel R. Reynolds @ SMU + *--------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2021, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + *--------------------------------------------------------------- + * Example problem: + * + * The following is a simple example problem with analytical + * solution, + * dy/dt = lamda*y + 1/(1+t^2) - lamda*atan(t) + * for t in the interval [0.0, 10.0], with initial condition: y=0. + * + * The stiffness of the problem is directly proportional to the + * value of "lamda". The value of lamda should be negative to + * result in a well-posed ODE; for values with magnitude larger + * than 100 the problem becomes quite stiff. + * + * This program solves the problem with the DIRK method and a + * custom 'matrix-embedded' SUNLinearSolver. Output is printed + * every 1.0 units of time (10 total). Run statistics (optional + * outputs) are printed at the end. + *-----------------------------------------------------------------*/ + +/* Header files */ +#include +#include +#include /* prototypes for ARKStep fcts., consts */ +#include /* serial N_Vector types, fcts., macros */ +#include /* definition of type realtype */ + +#if defined(SUNDIALS_EXTENDED_PRECISION) +#define GSYM "Lg" +#define ESYM "Le" +#define FSYM "Lf" +#else +#define GSYM "g" +#define ESYM "e" +#define FSYM "f" +#endif + +/* User-supplied functions called by ARKStep */ +static int f(realtype t, N_Vector y, N_Vector ydot, void *user_data); + +/* Custom linear solver data structure, accessor macros, and routines */ +static SUNLinearSolver MatrixEmbeddedLS(void *arkode_mem); +static SUNLinearSolver_Type MatrixEmbeddedLSType(SUNLinearSolver S); +static int MatrixEmbeddedLSSolve(SUNLinearSolver S, SUNMatrix A, N_Vector x, + N_Vector b, realtype tol); +static int MatrixEmbeddedLSFree(SUNLinearSolver S); + +/* Private function to check function return values */ +static int check_retval(void *returnvalue, const char *funcname, int opt); + +/* Private function to check computed solution */ +static int check_ans(N_Vector y, realtype t, realtype rtol, realtype atol); + +/* Main Program */ +int main() +{ + /* general problem parameters */ + realtype T0 = RCONST(0.0); /* initial time */ + realtype Tf = RCONST(10.0); /* final time */ + realtype dTout = RCONST(1.0); /* time between outputs */ + sunindextype NEQ = 1; /* number of dependent vars. */ + realtype reltol = RCONST(1.0e-6); /* tolerances */ + realtype abstol = RCONST(1.0e-10); + realtype lamda = RCONST(-100.0); /* stiffness parameter */ + + /* general problem variables */ + int retval; /* reusable error-checking flag */ + N_Vector y = NULL; /* empty vector for storing solution */ + SUNLinearSolver LS = NULL; /* empty linear solver object */ + void *arkode_mem = NULL; /* empty ARKODE memory structure */ + realtype t, tout; + long int nst, nst_a, nfe, nfi, nsetups, nje, nfeLS, nni, ncfn, netf; + + /* Initial diagnostics output */ + printf("\nAnalytical ODE test problem:\n"); + printf(" lamda = %"GSYM"\n", lamda); + printf(" reltol = %.1"ESYM"\n", reltol); + printf(" abstol = %.1"ESYM"\n\n",abstol); + + /* Initialize data structures */ + y = N_VNew_Serial(NEQ); /* Create serial vector for solution */ + if (check_retval((void *)y, "N_VNew_Serial", 0)) return 1; + N_VConst(RCONST(0.0), y); /* Specify initial condition */ + + /* Call ARKStepCreate to initialize the ARK timestepper module and + specify the right-hand side function in y'=f(t,y), the inital time + T0, and the initial dependent variable vector y. Note: since this + problem is fully implicit, we set f_E to NULL and f_I to f. */ + arkode_mem = ARKStepCreate(NULL, f, T0, y); + if (check_retval((void *)arkode_mem, "ARKStepCreate", 0)) return 1; + + /* Set routines */ + retval = ARKStepSetUserData(arkode_mem, (void *) &lamda); /* Pass lamda to user functions */ + if (check_retval(&retval, "ARKStepSetUserData", 1)) return 1; + retval = ARKStepSStolerances(arkode_mem, reltol, abstol); /* Specify tolerances */ + if (check_retval(&retval, "ARKStepSStolerances", 1)) return 1; + + /* Initialize custom matrix-embedded linear solver */ + LS = MatrixEmbeddedLS(arkode_mem); + if (check_retval((void *)LS, "MatrixEmbeddedLS", 0)) return 1; + retval = ARKStepSetLinearSolver(arkode_mem, LS, NULL); /* Attach linear solver */ + if (check_retval(&retval, "ARKStepSetLinearSolver", 1)) return 1; + + /* Specify linearly implicit RHS, with non-time-dependent Jacobian */ + retval = ARKStepSetLinear(arkode_mem, 0); + if (check_retval(&retval, "ARKStepSetLinear", 1)) return 1; + + /* Main time-stepping loop: calls ARKStepEvolve to perform the integration, then + prints results. Stops when the final time has been reached. */ + t = T0; + tout = T0+dTout; + printf(" t u\n"); + printf(" ---------------------\n"); + while (Tf - t > 1.0e-15) { + + retval = ARKStepEvolve(arkode_mem, tout, y, &t, ARK_NORMAL); /* call integrator */ + if (check_retval(&retval, "ARKStepEvolve", 1)) break; + printf(" %10.6"FSYM" %10.6"FSYM"\n", t, NV_Ith_S(y,0)); /* access/print solution */ + if (retval >= 0) { /* successful solve: update time */ + tout += dTout; + tout = (tout > Tf) ? Tf : tout; + } else { /* unsuccessful solve: break */ + fprintf(stderr,"Solver failure, stopping integration\n"); + break; + } + } + printf(" ---------------------\n"); + + /* Get/print some final statistics on how the solve progressed */ + retval = ARKStepGetNumSteps(arkode_mem, &nst); + check_retval(&retval, "ARKStepGetNumSteps", 1); + retval = ARKStepGetNumStepAttempts(arkode_mem, &nst_a); + check_retval(&retval, "ARKStepGetNumStepAttempts", 1); + retval = ARKStepGetNumRhsEvals(arkode_mem, &nfe, &nfi); + check_retval(&retval, "ARKStepGetNumRhsEvals", 1); + retval = ARKStepGetNumLinSolvSetups(arkode_mem, &nsetups); + check_retval(&retval, "ARKStepGetNumLinSolvSetups", 1); + retval = ARKStepGetNumErrTestFails(arkode_mem, &netf); + check_retval(&retval, "ARKStepGetNumErrTestFails", 1); + retval = ARKStepGetNumNonlinSolvIters(arkode_mem, &nni); + check_retval(&retval, "ARKStepGetNumNonlinSolvIters", 1); + retval = ARKStepGetNumNonlinSolvConvFails(arkode_mem, &ncfn); + check_retval(&retval, "ARKStepGetNumNonlinSolvConvFails", 1); + retval = ARKStepGetNumJacEvals(arkode_mem, &nje); + check_retval(&retval, "ARKStepGetNumJacEvals", 1); + retval = ARKStepGetNumLinRhsEvals(arkode_mem, &nfeLS); + check_retval(&retval, "ARKStepGetNumLinRhsEvals", 1); + + printf("\nFinal Solver Statistics:\n"); + printf(" Internal solver steps = %li (attempted = %li)\n", nst, nst_a); + printf(" Total RHS evals: Fe = %li, Fi = %li\n", nfe, nfi); + printf(" Total linear solver setups = %li\n", nsetups); + printf(" Total RHS evals for setting up the linear system = %li\n", nfeLS); + printf(" Total number of Jacobian evaluations = %li\n", nje); + printf(" Total number of Newton iterations = %li\n", nni); + printf(" Total number of linear solver convergence failures = %li\n", ncfn); + printf(" Total number of error test failures = %li\n\n", netf); + + /* check the solution error */ + retval = check_ans(y, t, reltol, abstol); + + /* Clean up and return */ + N_VDestroy(y); /* Free y vector */ + ARKStepFree(&arkode_mem); /* Free integrator memory */ + SUNLinSolFree(LS); /* Free linear solver */ + + return retval; +} + +/*------------------------------- + * Functions called by the solver + *-------------------------------*/ + +/* f routine to compute the ODE RHS function f(t,y). */ +static int f(realtype t, N_Vector y, N_Vector ydot, void *user_data) +{ + realtype *rdata = (realtype *) user_data; /* cast user_data to realtype */ + realtype lamda = rdata[0]; /* set shortcut for stiffness parameter */ + realtype u = NV_Ith_S(y,0); /* access current solution value */ + + /* fill in the RHS function: "NV_Ith_S" accesses the 0th entry of ydot */ + NV_Ith_S(ydot,0) = lamda*u + RCONST(1.0)/(RCONST(1.0)+t*t) - lamda*atan(t); + + return 0; /* return with success */ +} + +/*------------------------------------- + * Custom matrix-embedded linear solver + *-------------------------------------*/ + +/* constructor */ +static SUNLinearSolver MatrixEmbeddedLS(void *arkode_mem) +{ + /* Create an empty linear solver */ + SUNLinearSolver LS = SUNLinSolNewEmpty(); + if (LS == NULL) return NULL; + + /* Attach operations */ + LS->ops->gettype = MatrixEmbeddedLSType; + LS->ops->solve = MatrixEmbeddedLSSolve; + LS->ops->free = MatrixEmbeddedLSFree; + + /* Set content pointer to ARKODE memory */ + LS->content = arkode_mem; + + /* Return solver */ + return(LS); +} + +/* type descriptor */ +static SUNLinearSolver_Type MatrixEmbeddedLSType(SUNLinearSolver S) +{ + return(SUNLINEARSOLVER_MATRIX_EMBEDDED); +} + +/* linear solve routine */ +static int MatrixEmbeddedLSSolve(SUNLinearSolver LS, SUNMatrix A, N_Vector x, + N_Vector b, realtype tol) +{ + /* temporary variables */ + int retval; + N_Vector z, zpred, Fi, sdata; + realtype tcur, gamma; + void *user_data; + realtype *rdata; + realtype lamda; + + /* retrieve implicit system data from ARKStep */ + retval = ARKStepGetNonlinearSystemData(LS->content, &tcur, &zpred, &z, &Fi, + &gamma, &sdata, &user_data); + if (check_retval((void *)&retval, "ARKStepGetNonlinearSystemData", 1)) + return(-1); + + /* extract stiffness parameter from user_data */ + rdata = (realtype *) user_data; + lamda = rdata[0]; + + /* perform linear solve: (1-gamma*lamda)*x = b */ + NV_Ith_S(x,0) = NV_Ith_S(b,0) / (1-gamma*lamda); + + /* return with success */ + return(SUNLS_SUCCESS); +} + +/* destructor */ +static int MatrixEmbeddedLSFree(SUNLinearSolver LS) +{ + if (LS == NULL) return(SUNLS_SUCCESS); + LS->content = NULL; + SUNLinSolFreeEmpty(LS); + return(SUNLS_SUCCESS); +} + +/*------------------------------- + * Private helper functions + *-------------------------------*/ + +/* Check function return value... + opt == 0 means SUNDIALS function allocates memory so check if + returned NULL pointer + opt == 1 means SUNDIALS function returns a flag so check if + flag >= 0 + opt == 2 means function allocates memory so check if returned + NULL pointer +*/ +static int check_retval(void *returnvalue, const char *funcname, int opt) +{ + int *retval; + + /* Check if SUNDIALS function returned NULL pointer - no memory allocated */ + if (opt == 0 && returnvalue == NULL) { + fprintf(stderr, "\nSUNDIALS_ERROR: %s() failed - returned NULL pointer\n\n", + funcname); + return 1; } + + /* Check if flag < 0 */ + else if (opt == 1) { + retval = (int *) returnvalue; + if (*retval < 0) { + fprintf(stderr, "\nSUNDIALS_ERROR: %s() failed with flag = %d\n\n", + funcname, *retval); + return 1; }} + + /* Check if function returned NULL pointer - no memory allocated */ + else if (opt == 2 && returnvalue == NULL) { + fprintf(stderr, "\nMEMORY_ERROR: %s() failed - returned NULL pointer\n\n", + funcname); + return 1; } + + return 0; +} + +/* check the computed solution */ +static int check_ans(N_Vector y, realtype t, realtype rtol, realtype atol) +{ + int passfail=0; /* answer pass (0) or fail (1) flag */ + realtype ans, err, ewt; /* answer data, error, and error weight */ + + /* compute solution error */ + ans = atan(t); + ewt = RCONST(1.0) / (rtol * fabs(ans) + atol); + err = ewt * fabs(NV_Ith_S(y,0) - ans); + + /* is the solution within the tolerances? */ + passfail = (err < RCONST(1.0)) ? 0 : 1; + + if (passfail) { + fprintf(stdout, "\nSUNDIALS_WARNING: check_ans error=%"GSYM"\n\n", err); + } + + return(passfail); +} + +/*---- end of file ----*/ diff -Nru sundials-5.7.0+dfsg/examples/arkode/C_serial/ark_analytic_mels.out sundials-5.8.0+dfsg/examples/arkode/C_serial/ark_analytic_mels.out --- sundials-5.7.0+dfsg/examples/arkode/C_serial/ark_analytic_mels.out 1970-01-01 00:00:00.000000000 +0000 +++ sundials-5.8.0+dfsg/examples/arkode/C_serial/ark_analytic_mels.out 2021-09-30 19:05:25.000000000 +0000 @@ -0,0 +1,30 @@ + +Analytical ODE test problem: + lamda = -100 + reltol = 1.0e-06 + abstol = 1.0e-10 + + t u + --------------------- + 1.000000 0.785398 + 2.000000 1.107149 + 3.000000 1.249046 + 4.000000 1.325818 + 5.000000 1.373401 + 6.000000 1.405648 + 7.000000 1.428899 + 8.000000 1.446441 + 9.000000 1.460139 + 10.000000 1.471128 + --------------------- + +Final Solver Statistics: + Internal solver steps = 559 (attempted = 559) + Total RHS evals: Fe = 0, Fi = 5593 + Total linear solver setups = 0 + Total RHS evals for setting up the linear system = 0 + Total number of Jacobian evaluations = 0 + Total number of Newton iterations = 2795 + Total number of linear solver convergence failures = 0 + Total number of error test failures = 0 + diff -Nru sundials-5.7.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_0_0.002.out sundials-5.8.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_0_0.002.out --- sundials-5.7.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_0_0.002.out 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_0_0.002.out 2021-09-30 19:05:25.000000000 +0000 @@ -24,19 +24,19 @@ 1.100000 1.107609 1.000489 3.45e-09 5.18e-10 1.200000 1.086821 1.677552 3.13e-09 5.29e-10 1.300000 1.064777 1.277775 2.77e-09 5.29e-10 - 1.400000 1.041625 1.342455 2.38e-09 5.24e-10 - 1.500000 1.017531 1.642940 1.94e-09 5.15e-10 + 1.400000 1.041625 1.342455 2.38e-09 5.25e-10 + 1.500000 1.017531 1.642940 1.94e-09 5.14e-10 1.600000 0.992673 1.012112 1.46e-09 4.94e-10 1.700000 0.967253 1.714058 9.32e-10 4.72e-10 - 1.800000 0.941488 1.183867 3.54e-10 4.38e-10 + 1.800000 0.941488 1.183867 3.54e-10 4.39e-10 1.900000 0.915617 1.437465 2.75e-10 4.01e-10 2.000000 0.889903 1.577082 9.56e-10 3.55e-10 2.100000 0.864625 1.056467 1.69e-09 3.05e-10 - 2.200000 0.840089 1.730920 2.47e-09 2.39e-10 + 2.200000 0.840089 1.730920 2.47e-09 2.38e-10 2.300000 0.816616 1.101047 3.30e-09 1.73e-10 - 2.400000 0.794546 1.525051 4.15e-09 9.32e-11 - 2.500000 0.774227 1.496993 5.01e-09 8.75e-12 - 2.600000 0.756013 1.126857 5.86e-09 7.99e-11 + 2.400000 0.794546 1.525051 4.15e-09 9.31e-11 + 2.500000 0.774227 1.496993 5.01e-09 8.65e-12 + 2.600000 0.756013 1.126857 5.86e-09 8.00e-11 2.700000 0.740246 1.727536 6.67e-09 1.81e-10 2.800000 0.727247 1.038393 7.38e-09 2.78e-10 2.900000 0.717301 1.600759 7.97e-09 3.85e-10 diff -Nru sundials-5.7.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_5_0.002.out sundials-5.8.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_5_0.002.out --- sundials-5.7.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_5_0.002.out 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_5_0.002.out 2021-09-30 19:05:25.000000000 +0000 @@ -21,28 +21,28 @@ 0.800000 1.161186 1.374632 1.50e-10 6.28e-11 0.900000 1.144904 1.245763 1.42e-10 6.63e-11 1.000000 1.127010 1.691839 1.33e-10 6.94e-11 - 1.100000 1.107609 1.000489 1.23e-10 7.42e-11 - 1.200000 1.086821 1.677552 1.12e-10 7.19e-11 - 1.300000 1.064777 1.277775 9.90e-11 7.36e-11 - 1.400000 1.041625 1.342455 8.49e-11 7.24e-11 - 1.500000 1.017531 1.642940 6.95e-11 6.92e-11 - 1.600000 0.992673 1.012112 5.24e-11 6.94e-11 - 1.700000 0.967253 1.714058 3.37e-11 6.25e-11 - 1.800000 0.941488 1.183867 1.32e-11 6.04e-11 - 1.900000 0.915617 1.437465 9.11e-12 5.37e-11 - 2.000000 0.889903 1.577082 3.33e-11 4.65e-11 - 2.100000 0.864625 1.056467 5.94e-11 3.59e-11 - 2.200000 0.840089 1.730920 8.72e-11 3.16e-11 - 2.300000 0.816616 1.101047 1.16e-10 1.82e-11 - 2.400000 0.794546 1.525051 1.47e-10 1.03e-11 - 2.500000 0.774227 1.496993 1.78e-10 1.43e-12 - 2.600000 0.756013 1.126857 2.08e-10 1.62e-11 - 2.700000 0.740246 1.727536 2.36e-10 2.57e-11 - 2.800000 0.727247 1.038393 2.62e-10 4.37e-11 - 2.900000 0.717301 1.600759 2.83e-10 5.42e-11 - 3.000000 0.710636 1.406380 2.99e-10 6.93e-11 - 3.100000 0.707412 1.214353 3.07e-10 8.38e-11 - 3.200000 0.707709 1.704026 3.08e-10 9.31e-11 + 1.100000 1.107609 1.000489 1.23e-10 7.12e-11 + 1.200000 1.086821 1.677552 1.12e-10 7.21e-11 + 1.300000 1.064777 1.277775 9.90e-11 7.21e-11 + 1.400000 1.041625 1.342455 8.49e-11 7.13e-11 + 1.500000 1.017531 1.642940 6.94e-11 6.96e-11 + 1.600000 0.992673 1.012112 5.24e-11 6.69e-11 + 1.700000 0.967253 1.714058 3.37e-11 6.34e-11 + 1.800000 0.941488 1.183867 1.32e-11 5.89e-11 + 1.900000 0.915617 1.437465 9.12e-12 5.33e-11 + 2.000000 0.889903 1.577082 3.33e-11 4.69e-11 + 2.100000 0.864625 1.056467 5.94e-11 3.62e-11 + 2.200000 0.840089 1.730920 8.72e-11 3.20e-11 + 2.300000 0.816616 1.101047 1.16e-10 1.85e-11 + 2.400000 0.794546 1.525051 1.47e-10 1.05e-11 + 2.500000 0.774227 1.496993 1.78e-10 1.19e-12 + 2.600000 0.756013 1.126857 2.08e-10 1.60e-11 + 2.700000 0.740246 1.727536 2.36e-10 2.55e-11 + 2.800000 0.727247 1.038393 2.62e-10 4.36e-11 + 2.900000 0.717301 1.600759 2.83e-10 5.41e-11 + 3.000000 0.710636 1.406380 2.99e-10 6.92e-11 + 3.100000 0.707412 1.214353 3.07e-10 8.37e-11 + 3.200000 0.707709 1.704026 3.08e-10 9.30e-11 3.300000 0.711520 1.004391 3.02e-10 1.08e-10 3.400000 0.718750 1.661225 2.88e-10 1.14e-10 3.500000 0.729227 1.310102 2.68e-10 1.23e-10 @@ -58,12 +58,12 @@ 4.500000 0.945834 1.126875 8.93e-12 9.14e-11 4.600000 0.971557 1.496974 3.00e-11 7.93e-11 4.700000 0.996898 1.525070 4.92e-11 6.88e-11 - 4.800000 1.021641 1.101030 6.67e-11 6.11e-11 - 4.900000 1.045589 1.730922 8.26e-11 4.66e-11 + 4.800000 1.021641 1.101030 6.67e-11 6.10e-11 + 4.900000 1.045589 1.730922 8.26e-11 4.65e-11 5.000000 1.068565 1.056480 9.70e-11 4.02e-11 ------------------------------------------------------ Final Solver Statistics: - Steps: nsts = 2501, nstf = 250250 - u error = 1.665e-10, v error = 7.703e-11, total error = 1.297e-10 - Total RHS evals: Fs = 12506, Ff = 1003503 + Steps: nsts = 2501, nstf = 250100 + u error = 1.665e-10, v error = 7.685e-11, total error = 1.296e-10 + Total RHS evals: Fs = 12506, Ff = 1002903 diff -Nru sundials-5.7.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_6_0.005.out sundials-5.8.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_6_0.005.out --- sundials-5.7.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_6_0.005.out 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/examples/arkode/C_serial/ark_kpr_mri_6_0.005.out 2021-09-30 19:05:25.000000000 +0000 @@ -31,12 +31,12 @@ 1.800000 0.941488 1.183867 6.13e-10 2.48e-09 1.900000 0.915617 1.437465 4.00e-10 2.25e-09 2.000000 0.889903 1.577082 1.50e-09 1.98e-09 - 2.100000 0.864625 1.056467 2.68e-09 1.67e-09 + 2.100000 0.864625 1.056467 2.68e-09 1.66e-09 2.200000 0.840089 1.730920 3.94e-09 1.30e-09 - 2.300000 0.816616 1.101047 5.27e-09 9.00e-10 + 2.300000 0.816616 1.101047 5.27e-09 8.99e-10 2.400000 0.794546 1.525051 6.65e-09 4.49e-10 2.500000 0.774227 1.496993 8.05e-09 4.16e-11 - 2.600000 0.756013 1.126857 9.42e-09 5.70e-10 + 2.600000 0.756013 1.126857 9.42e-09 5.71e-10 2.700000 0.740246 1.727536 1.07e-08 1.13e-09 2.800000 0.727247 1.038393 1.19e-08 1.71e-09 2.900000 0.717301 1.600759 1.29e-08 2.31e-09 @@ -57,13 +57,13 @@ 4.400000 0.919964 1.727533 6.51e-10 4.18e-09 4.500000 0.945834 1.126875 3.93e-10 3.79e-09 4.600000 0.971557 1.496974 1.35e-09 3.38e-09 - 4.700000 0.996898 1.525070 2.23e-09 2.95e-09 + 4.700000 0.996898 1.525070 2.23e-09 2.94e-09 4.800000 1.021641 1.101030 3.02e-09 2.50e-09 4.900000 1.045589 1.730922 3.75e-09 2.06e-09 5.000000 1.068565 1.056480 4.40e-09 1.61e-09 ------------------------------------------------------ Final Solver Statistics: - Steps: nsts = 1001, nstf = 100205 + Steps: nsts = 1001, nstf = 100100 u error = 7.569e-09, v error = 3.238e-09, total error = 5.821e-09 - Total RHS evals: Fs = 5006, Ff = 301618 + Total RHS evals: Fs = 5006, Ff = 301303 diff -Nru sundials-5.7.0+dfsg/examples/arkode/C_serial/CMakeLists.txt sundials-5.8.0+dfsg/examples/arkode/C_serial/CMakeLists.txt --- sundials-5.7.0+dfsg/examples/arkode/C_serial/CMakeLists.txt 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/examples/arkode/C_serial/CMakeLists.txt 2021-09-30 19:05:25.000000000 +0000 @@ -21,6 +21,7 @@ # Examples using SUNDIALS linear solvers set(ARKODE_examples "ark_analytic\;\;" + "ark_analytic_mels\;\;develop" "ark_analytic_nonlin\;\;develop" "ark_brusselator\;\;develop" "ark_brusselator_fp\;\;develop" diff -Nru sundials-5.7.0+dfsg/examples/arkode/CXX_parallel/ark_brusselator1D_task_local_nls.cpp sundials-5.8.0+dfsg/examples/arkode/CXX_parallel/ark_brusselator1D_task_local_nls.cpp --- sundials-5.7.0+dfsg/examples/arkode/CXX_parallel/ark_brusselator1D_task_local_nls.cpp 2021-01-29 21:48:12.000000000 +0000 +++ sundials-5.8.0+dfsg/examples/arkode/CXX_parallel/ark_brusselator1D_task_local_nls.cpp 2021-09-30 19:05:25.000000000 +0000 @@ -1072,6 +1072,7 @@ SUNNonlinearSolver TaskLocalNewton(N_Vector y, FILE* DFID) { + void* tmp_comm; SUNNonlinearSolver NLS; TaskLocalNewton_Content content; @@ -1108,8 +1109,11 @@ NLS->content = content; /* Fill general content */ - content->comm = *((MPI_Comm*) N_VGetCommunicator(y)); - if (content->comm == NULL) { SUNNonlinSolFree(NLS); return NULL; } + tmp_comm = N_VGetCommunicator(y); + if (tmp_comm == NULL) { SUNNonlinSolFree(NLS); return NULL; } + + content->comm = *((MPI_Comm*) tmp_comm); + if (content->comm == MPI_COMM_NULL) { SUNNonlinSolFree(NLS); return NULL; } content->local_nls = SUNNonlinSol_Newton(N_VGetLocalVector_MPIPlusX(y)); if (content->local_nls == NULL) { SUNNonlinSolFree(NLS); return NULL; } diff -Nru sundials-5.7.0+dfsg/examples/arkode/CXX_parallel/ark_diffusion_reaction_p.cpp sundials-5.8.0+dfsg/examples/arkode/CXX_parallel/ark_diffusion_reaction_p.cpp --- sundials-5.7.0+dfsg/examples/arkode/CXX_parallel/ark_diffusion_reaction_p.cpp 1970-01-01 00:00:00.000000000 +0000 +++ sundials-5.8.0+dfsg/examples/arkode/CXX_parallel/ark_diffusion_reaction_p.cpp 2021-09-30 19:05:25.000000000 +0000 @@ -0,0 +1,3119 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2021, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * This example simulates the 2D diffusion-reaction (Brusselator) equation, + * + * u_t = Dux u_xx + Duy u_yy + A + u * u * v - (B + 1) * u + * v_t = Dvx u_xx + Dvy u_yy + B * u - u * u * v + * + * where u and v represent the concentrations of the two chemcial species, the + * diffusion rates are Dux = Duy = Dvx = Dvy = 1e-3, and the species with + * constant concentration over time are A = 1 and B = 3. + * + * The system is evolved from t = 0 to t = 10 on a square domain centered at + * the origin with sizes of length 1. The initial condition is + * + * u(x,y) = A + 0.5 * sin(2 pi (x - xl) / wx) * sin(2 pi (y - yl) / wy) + * v(x,y) = B / A + * + * where xl and yl are the lower bounds of the domain in the x and y directions + * respectively, wx is the width of the domain, and wy is the height of the + * domain. + * + * The system is evolved in time using one of the following approaches: + * + * 1. A single rate IMEX method (ARKStep) with implicit diffusion and + * explicit reactions. + * + * 2. A solve-decoupled implicit MRI-GARK method (MRIStep) paired with one of + * the following fast time scale integrators: + * + * a. An explicit method (ARKStep) integrating all the reaction systems + * simultaneously. + * + * b. A user-defined custom inner stepper wrapping CVODE and integrating + * all the reaction systems simultaneously (default). + * + * c. A user-defined custom inner stepper wrapping CVODE and integrating + * the MPI task-local reaction systems independently. + * + * When CVODE is used as the fast time scale integrator variable order implicit + * Adams methods are used and the nonlinear implicit systems are solved with the + * Anderson accelerated fixed point solver. + * + * Several command line options are available to change the problem parameters + * and ARKStep/MRIStep/CVODE settings. Use the flag --help for more information. + * ---------------------------------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +// Include MPI +#include "mpi.h" + +// Include desired integrators, vectors, linear solvers, and nonlinear sovlers +#include "arkode/arkode_arkstep.h" +#include "arkode/arkode_mristep.h" +#include "cvode/cvode.h" +#include "nvector/nvector_serial.h" +#include "nvector/nvector_mpiplusx.h" +#include "sunlinsol/sunlinsol_pcg.h" +#include "sunlinsol/sunlinsol_spgmr.h" +#include "sunnonlinsol/sunnonlinsol_fixedpoint.h" + +// Macros for problem constants +#define PI RCONST(3.141592653589793238462643383279502884197169) +#define ZERO RCONST(0.0) +#define ONE RCONST(1.0) +#define TWO RCONST(2.0) + +#define NSPECIES 2 + +#define WIDTH (10 + numeric_limits::digits10) + +// Macro to access each species at an (x,y) location in a 1D array +#define UIDX(x,y,nx) (NSPECIES * ((nx) * (y) + (x))) +#define VIDX(x,y,nx) (NSPECIES * ((nx) * (y) + (x)) + 1) + +using namespace std; + + +// ----------------------------------------------------------------------------- +// Simple timer class +// ----------------------------------------------------------------------------- + + +class Timer +{ +public: + Timer() : total_(0.0), start_(0.0), end_(0.0) {} + void start() + { + start_ = MPI_Wtime(); + } + void stop() + { + end_ = MPI_Wtime(); + total_ += (end_-start_); + } + realtype total() const + { + return total_; + } + realtype max(MPI_Comm comm) + { + double maxtime = 0.0; + MPI_Reduce(&total_, &maxtime, 1, MPI_DOUBLE, MPI_MAX, 0, comm); + return maxtime; + } +private: + realtype total_; + realtype start_; + realtype end_; +}; + + +// ----------------------------------------------------------------------------- +// User data structure +// ----------------------------------------------------------------------------- + + +struct UserData +{ + // ------------------ + // Problem parameters + // ------------------ + + // Diffusion coefficients for u and v + realtype Dux = RCONST(1.0e-3); + realtype Duy = RCONST(1.0e-3); + realtype Dvx = RCONST(1.0e-3); + realtype Dvy = RCONST(1.0e-3); + + // Feed and reaction rates + realtype A = RCONST(1.0); + realtype B = RCONST(3.0); + + // Final simulation time + realtype tf = RCONST(10.0); + + // Domain boundaries in x and y directions + realtype xl = RCONST(-0.5); + realtype xu = RCONST(0.5); + realtype yl = RCONST(-0.5); + realtype yu = RCONST(0.5); + + // Enable/disable RHS terms + bool diffusion = true; + bool reaction = true; + + // -------------------------- + // Discretization parameteres + // -------------------------- + + // Global and local number of nodes in the x and y directions + sunindextype nx = 128; + sunindextype ny = 128; + sunindextype nx_loc = 0; + sunindextype ny_loc = 0; + + // Mesh spacing in the x and y directions + realtype dx = (xu - xl) / nx; + realtype dy = (yu - yl) / ny; + + // Global and local number of equations + sunindextype neq = NSPECIES * nx * ny; + sunindextype neq_loc = 0; + + // Subdomain global starting and ending x and y indices + sunindextype is = 0; + sunindextype ie = 0; + sunindextype js = 0; + sunindextype je = 0; + + // ------------- + // MPI variables + // ------------- + + // Cartesian communicator + MPI_Comm comm = MPI_COMM_NULL; + + // MPI processes total, in the x and y directions, and process ID + int nprocs = 1; + int npx = 0; + int npy = 0; + int myid = 0; + + // Output from this process + bool outproc = false; + + // ------------------ + // Exchange variables + // ------------------ + + // Neighbor IDs + int ipW = -1; + int ipE = -1; + int ipS = -1; + int ipN = -1; + int ipSW = -1; + int ipNE = -1; + + // Number of elements in buffers + int xbufcount = 0; + int ybufcount = 0; + + // Receive and send buffers + realtype *Wrecv = NULL; + realtype *Erecv = NULL; + realtype *Srecv = NULL; + realtype *Nrecv = NULL; + + realtype *Wsend = NULL; + realtype *Esend = NULL; + realtype *Ssend = NULL; + realtype *Nsend = NULL; + + realtype *SWsend = NULL; + realtype *NErecv = NULL; + + // Recieve and send requests + MPI_Request reqRW, reqRE, reqRS, reqRN; + MPI_Request reqSW, reqSE, reqSS, reqSN; + MPI_Request reqRC, reqSC; + + // ------------------ + // Integrator options + // ------------------ + + // Flag to change integration method + // 0 = ARKStep IMEX + // 1 = MRIStep with ARKStep global inner integrator + // 2 = MRIStep with CVODE global inner integrator + // 3 = MRIStep with CVODE local inner integrator + int integrator = 2; + + // ------------- + // IMEX settings + // ------------- + + // Relative and absolute tolerances + realtype rtol_imex = RCONST(1.e-4); + realtype atol_imex = RCONST(1.e-8); + + // Step size selection (ZERO = adaptive steps) + realtype h_imex = ZERO; + + // Method order + int order_imex = 3; + + // ------------ + // MRI settings + // ------------ + + // Relative and absolute tolerances (slow and fast) + realtype rtol_slow = RCONST(1.e-4); + realtype atol_slow = RCONST(1.e-8); + realtype rtol_fast = RCONST(1.e-5); + realtype atol_fast = RCONST(1.e-9); + + // Fixed step size (slow and fast) + realtype h_slow = RCONST(-1.0); // use multiple of CFL + realtype h_fast = ZERO; // use adaptive stepping + + // Inner ARKODE method order + int order_fast = 3; + + // Inner stepper memory + MRIStepInnerStepper stepper = NULL; + + // ---------------------------- + // Shared IMEX and MRI settings + // ---------------------------- + + int controller = 0; // step size adaptivity method (0 = use default) + int maxsteps = 0; // max steps between outputs (0 = use default) + bool linear = true; // enable/disable linearly implicit option + bool diagnostics = false; // output diagnostics + FILE *diagfp = NULL; // diagnostics output file + + // ----------------------------------------- + // Nonlinear solver settings + // ----------------------------------------- + + int fp_iters = 10; // max number of fixed-point iterations with CVODE + int fp_aa = 3; // Anderson acceleration depth with fixed-point + + // ----------------------------------------- + // Linear solver and preconditioner settings + // ----------------------------------------- + + bool pcg = true; // use PCG (true) or GMRES (false) + bool prec = true; // preconditioner on/off + bool lsinfo = false; // output residual history + int liniters = 5; // number of linear iterations + int msbp = 0; // preconditioner setup frequency (0 = default) + realtype epslin = ZERO; // linear solver tolerance factor (ZERO = default) + N_Vector diag = NULL; // inverse of Jacobian diagonal + + // --------------- + // Ouput variables + // --------------- + + int output = 1; // 0 = no output, 1 = output stats, 2 = write to disk + int nout = 20; // number of output times + ofstream uout; // output file stream + + // ---------------- + // Timing variables + // ---------------- + + bool timing = false; // print timings + Timer evolve; // evolve time (excluding output) + Timer rhsD; // diffustion rhs time (including exchange) + Timer rhsR; // reaction rhs time + Timer psolve; // preconditioner solve time + Timer exchange; // MPI exchange time + + // --------- + // Debugging + // --------- + + // Run in one step mode for fixed number of steps (0 = normal mode) + int onestep = 0; +}; + + +// ----------------------------------------------------------------------------- +// Custom inner stepper content and functions +// ----------------------------------------------------------------------------- + + +struct InnerStepperContent +{ + void* cvode_mem = NULL; // CVODE memory structure + void* user_data = NULL; // user data pointer + bool local = false; // global or task-local inner integrator + + // saved integrator stats + long int nst = 0; // time steps + long int netf = 0; // error test fails + long int nfe = 0; // rhs evals + long int nni = 0; // nonlinear iterations + long int nncf = 0; // nonlinear convergence failures +}; + +static int CVodeInnerStepper_Evolve(MRIStepInnerStepper stepper, + realtype t0, realtype tout, N_Vector y); +static int CVodeInnerStepper_FullRhs(MRIStepInnerStepper stepper, + realtype t, N_Vector y, N_Vector f, + int mode); +static int CVodeInnerStepper_Reset(MRIStepInnerStepper stepper, + realtype tR, N_Vector yR); + + +// ----------------------------------------------------------------------------- +// Functions provided to the SUNDIALS integrator +// ----------------------------------------------------------------------------- + + +// ODE right hand side functions +static int diffusion(realtype t, N_Vector u, N_Vector f, void *user_data); +static int reaction(realtype t, N_Vector u, N_Vector f, void *user_data); + +// Preconditioner solve function +static int PSolve(realtype t, N_Vector u, N_Vector f, N_Vector r, + N_Vector z, realtype gamma, realtype delta, int lr, + void *user_data); + + +// ----------------------------------------------------------------------------- +// Helper functions +// ----------------------------------------------------------------------------- + + +// Setup the parallel decomposition +static int SetupDecomp(UserData *udata); + +// Integrator setup functions +static int SetupARK(UserData *udata, N_Vector u, SUNLinearSolver LS, + void **arkode_mem); +static int SetupMRI(UserData *udata, N_Vector u, SUNLinearSolver LS, + void **arkode_mem, MRIStepInnerStepper *stepper); +static int SetupMRICVODE(UserData *udata, N_Vector u, SUNLinearSolver LS, + SUNNonlinearSolver *NLS, void **arkode_mem, + MRIStepInnerStepper *stepper); + +// Perform neighbor exchange +static int StartExchange(N_Vector y, UserData *udata); +static int EndExchange(UserData *udata); + +// Exchange boundary data for output +static int ExchangeBC(N_Vector y, UserData *udata); + + +// ----------------------------------------------------------------------------- +// UserData and input functions +// ----------------------------------------------------------------------------- + + +// Free memory allocated within UserData +static int FreeUserData(UserData *udata); + +// Read the command line inputs and set UserData values +static int ReadInputs(int *argc, char ***argv, UserData *udata); + + +// ----------------------------------------------------------------------------- +// Output and utility functions +// ----------------------------------------------------------------------------- + + +// Compute the initial condition +static int SetIC(N_Vector u, UserData *udata); + +// Print the command line options +static void InputHelp(); + +// Print some UserData information +static int PrintUserData(UserData *udata); + +// Output solution +static int OpenOutput(UserData *udata); +static int WriteOutput(realtype t, N_Vector u, UserData *udata); +static int CloseOutput(UserData *udata); + +// Print integration statistics +static int OutputStatsIMEX(void *arkode_mem, UserData *udata); +static int OutputStatsMRI(void *arkode_mem, MRIStepInnerStepper stepper, + UserData *udata); +static int OutputStatsMRICVODE(void *arkode_mem, + MRIStepInnerStepper stepper, + UserData* udata); + +// Print integration timing +static int OutputTiming(UserData *udata); + +// Check function return values +static int check_flag(void *flagvalue, const string funcname, int opt); + + +// ----------------------------------------------------------------------------- +// Main Program +// ----------------------------------------------------------------------------- + + +int main(int argc, char* argv[]) +{ + // Reusable error-checking flag + int flag = 0; + + // Initialize MPI + flag = MPI_Init(&argc, &argv); + if (check_flag(&flag, "MPI_Init", 1)) return 1; + + // MPI process ID + int myid; + flag = MPI_Comm_rank(MPI_COMM_WORLD, &myid); + if (check_flag(&flag, "MPI_Comm_rank", 1)) return 1; + + // Set output process flag + bool outproc = (myid == 0); + + // --------------- + // Setup user data + // --------------- + + UserData udata; + + udata.outproc = outproc; + + flag = ReadInputs(&argc, &argv, &udata); + if (flag != 0) return 1; + + // ---------------------------- + // Setup parallel decomposition + // ---------------------------- + + flag = SetupDecomp(&udata); + if (check_flag(&flag, "SetupDecomp", 1)) return 1; + + // Output problem setup/options + if (outproc) + { + flag = PrintUserData(&udata); + if (check_flag(&flag, "PrintUserData", 1)) return 1; + } + + // -------------- + // Create vectors + // -------------- + + N_Vector u = N_VMake_MPIPlusX(udata.comm, N_VNew_Serial(udata.neq_loc)); + if (check_flag((void *) u, "N_VNew_MPIPlusX", 0)) return 1; + + // -------------------- + // Create linear solver + // -------------------- + + // Open diagnostics file + if (outproc && (udata.diagnostics || udata.lsinfo)) + { + udata.diagfp = fopen("diagnostics.txt", "w"); + if (check_flag((void *) (udata.diagfp), "fopen", 0)) return 1; + } + + // Preconditioning type + int prectype = (udata.prec) ? PREC_RIGHT : PREC_NONE; + + // Linear solver memory structure + SUNLinearSolver LS = NULL; + + if (udata.pcg) + { + LS = SUNLinSol_PCG(u, prectype, udata.liniters); + if (check_flag((void *) LS, "SUNLinSol_PCG", 0)) return 1; + + if (udata.lsinfo && outproc) + { + flag = SUNLinSolSetPrintLevel_PCG(LS, 1); + if (check_flag(&flag, "SUNLinSolSetPrintLevel_PCG", 1)) return(1); + + flag = SUNLinSolSetInfoFile_PCG(LS, udata.diagfp); + if (check_flag(&flag, "SUNLinSolSetInfoFile_PCG", 1)) return(1); + } + } + else + { + LS = SUNLinSol_SPGMR(u, prectype, udata.liniters); + if (check_flag((void *) LS, "SUNLinSol_SPGMR", 0)) return 1; + + if (udata.lsinfo && outproc) + { + flag = SUNLinSolSetPrintLevel_SPGMR(LS, 1); + if (check_flag(&flag, "SUNLinSolSetPrintLevel_SPGMR", 1)) return(1); + + flag = SUNLinSolSetInfoFile_SPGMR(LS, udata.diagfp); + if (check_flag(&flag, "SUNLinSolSetInfoFile_SPGMR", 1)) return(1); + } + } + + // Allocate preconditioner workspace + if (udata.prec) + { + udata.diag = N_VClone(u); + if (check_flag((void *) (udata.diag), "N_VClone", 0)) return 1; + } + + // --------------------- + // Set initial condition + // --------------------- + + flag = SetIC(u, &udata); + if (check_flag(&flag, "SetIC", 1)) return 1; + + // ---------------- + // Setup Integrator + // ---------------- + + // ARKODE memory structure + void *arkode_mem = NULL; + + // Custom inner stepper memory (CVODE) + MRIStepInnerStepper stepper = NULL; + + // Inner stepper nonlinear solver (CVODE) + SUNNonlinearSolver NLS = NULL; + + // Create integrator + switch(udata.integrator) + { + case(0): + flag = SetupARK(&udata, u, LS, &arkode_mem); + if (check_flag((void *) arkode_mem, "SetupARK", 0)) return 1; + break; + case(1): + flag = SetupMRI(&udata, u, LS, &arkode_mem, &stepper); + if (check_flag((void *) arkode_mem, "SetupMRI", 0)) return 1; + break; + case(2): + case(3): + flag = SetupMRICVODE(&udata, u, LS, &NLS, &arkode_mem, &stepper); + if (check_flag((void *) arkode_mem, "SetupMRICVODE", 0)) return 1; + break; + default: + cerr << "Invalid integrator option" << endl; + break; + } + + // ---------------------- + // Evolve problem in time + // ---------------------- + + // Set the step mode + int stepmode = ARK_NORMAL; + + if (udata.onestep) + { + udata.nout = udata.onestep; + stepmode = ARK_ONE_STEP; + } + + // Initial time, time between outputs, output time + realtype t = ZERO; + realtype dTout = udata.tf / udata.nout; + realtype tout = dTout; + + // Inital output + flag = OpenOutput(&udata); + if (check_flag(&flag, "OpenOutput", 1)) return 1; + + flag = WriteOutput(t, u, &udata); + if (check_flag(&flag, "WriteOutput", 1)) return 1; + + // Loop over output times + for (int iout = 0; iout < udata.nout; iout++) + { + // Start timer + udata.evolve.start(); + + // Evolve + if (udata.integrator) + { + flag = MRIStepEvolve(arkode_mem, tout, u, &t, stepmode); + if (check_flag(&flag, "MRIStepEvolve", 1)) break; + } + else + { + flag = ARKStepEvolve(arkode_mem, tout, u, &t, stepmode); + if (check_flag(&flag, "ARKStepEvolve", 1)) break; + } + + // Stop timer + udata.evolve.stop(); + + // Output solution + flag = WriteOutput(t, u, &udata); + if (check_flag(&flag, "WriteOutput", 1)) return 1; + + // Update output time + tout += dTout; + tout = (tout > udata.tf) ? udata.tf : tout; + } + + // Close output + flag = CloseOutput(&udata); + if (check_flag(&flag, "CloseOutput", 1)) return 1; + + // ------------- + // Final outputs + // ------------- + + // Print final integrator stats + if (udata.output > 0 && outproc) + { + cout << "Final integrator statistics:" << endl; + switch(udata.integrator) + { + case(0): + flag = OutputStatsIMEX(arkode_mem, &udata); + if (check_flag(&flag, "OutputStatsIMEX", 1)) return 1; + break; + case(1): + flag = OutputStatsMRI(arkode_mem, stepper, &udata); + if (check_flag(&flag, "OutputStatsMRI", 1)) return 1; + break; + case(2): + case(3): + flag = OutputStatsMRICVODE(arkode_mem, stepper, &udata); + if (check_flag(&flag, "OutputStatsMRICVODE", 1)) return 1; + break; + default: + cerr << "Invalid integrator option" << endl; + break; + } + } + + // Print timing + if (udata.timing) + { + flag = OutputTiming(&udata); + if (check_flag(&flag, "OutputTiming", 1)) return 1; + } + + // -------------------- + // Clean up and return + // -------------------- + + if (outproc && (udata.diagnostics || udata.lsinfo)) fclose(udata.diagfp); + + switch(udata.integrator) + { + case(0): + ARKStepFree(&arkode_mem); + break; + case(1): + { + void* inner_arkode_mem = NULL; + MRIStepInnerStepper_GetContent(stepper, &inner_arkode_mem); + ARKStepFree(&inner_arkode_mem); + MRIStepInnerStepper_Free(&stepper); + MRIStepFree(&arkode_mem); + break; + } + case(2): + case(3): + { + void* inner_content = NULL; + MRIStepInnerStepper_GetContent(stepper, &inner_content); + InnerStepperContent* content = (InnerStepperContent *) inner_content; + CVodeFree(&(content->cvode_mem)); + free(inner_content); + MRIStepInnerStepper_Free(&stepper); + SUNNonlinSolFree(NLS); + MRIStepFree(&arkode_mem); + break; + } + default: + cerr << "Invalid integrator option" << endl; + break; + } + + SUNLinSolFree(LS); // Free linear solver + N_VDestroy(N_VGetLocalVector_MPIPlusX(u)); // Free vectors + N_VDestroy(u); + FreeUserData(&udata); // Free user data + flag = MPI_Finalize(); // Finalize MPI + return 0; +} + + +// ----------------------------------------------------------------------------- +// Setup the parallel decomposition +// ----------------------------------------------------------------------------- + + +static int SetupDecomp(UserData *udata) +{ + int flag; + + // Check that this has not been called before + if (udata->Erecv != NULL || udata->Wrecv != NULL || + udata->Srecv != NULL || udata->Nrecv != NULL) + { + cerr << "SetupDecomp error: parallel decomposition already set up" << endl; + return -1; + } + + // Get the number of processes + flag = MPI_Comm_size(MPI_COMM_WORLD, &(udata->nprocs)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Comm_size = " << flag << endl; + return -1; + } + + // Set up 2D Cartesian communicator + int dims[2]; + dims[0] = (udata->npx > 0) ? udata->npx : 0; + dims[1] = (udata->npy > 0) ? udata->npy : 0; + + int periods[2]; + periods[0] = 1; + periods[1] = 1; + + flag = MPI_Dims_create(udata->nprocs, 2, dims); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Dims_create = " << flag << endl; + return -1; + } + + udata->npx = dims[0]; + udata->npy = dims[1]; + + flag = MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &(udata->comm)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Cart_create = " << flag << endl; + return -1; + } + + // Get my rank in the new Cartesian communicator + flag = MPI_Comm_rank(udata->comm, &(udata->myid)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Comm_rank = " << flag << endl; + return -1; + } + + if (udata->myid == 0) udata->outproc = true; + + // Get dimension of the Cartesian communicator and my coordinates + int coords[2]; + flag = MPI_Cart_get(udata->comm, 2, dims, periods, coords); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Cart_get = " << flag << endl; + return -1; + } + + // Determine local extents in x-direction + int idx = coords[0]; + sunindextype qx = udata->nx / dims[0]; + sunindextype rx = udata->nx % dims[0]; + + udata->is = qx * idx + (idx < rx ? idx : rx); + udata->ie = udata->is + qx - 1 + (idx < rx ? 1 : 0); + + // Sanity check + if (udata->ie > (udata->nx - 1)) + { + cerr << "Error ie > nx - 1" << endl; + return -1; + } + + // Determine local extents in y-direction + int idy = coords[1]; + sunindextype qy = udata->ny / dims[1]; + sunindextype ry = udata->ny % dims[1]; + + udata->js = qy * idy + (idy < ry ? idy : ry); + udata->je = udata->js + qy - 1 + (idy < ry ? 1 : 0); + + // Sanity check + if (udata->je > (udata->ny - 1)) + { + cerr << "Error je > ny - 1" << endl; + return -1; + } + + // Number of local nodes + udata->nx_loc = (udata->ie) - (udata->is) + 1; + udata->ny_loc = (udata->je) - (udata->js) + 1; + + // Initialize global and local vector lengths + udata->neq = NSPECIES * udata->nx * udata->ny; + udata->neq_loc = NSPECIES * udata->nx_loc * udata->ny_loc; + + // Allocate exchange buffers if necessary + udata->ybufcount = NSPECIES * udata->ny_loc; + udata->Wrecv = new realtype[udata->ybufcount]; + udata->Wsend = new realtype[udata->ybufcount]; + udata->Erecv = new realtype[udata->ybufcount]; + udata->Esend = new realtype[udata->ybufcount]; + + udata->xbufcount = NSPECIES * udata->nx_loc; + udata->Srecv = new realtype[udata->xbufcount]; + udata->Ssend = new realtype[udata->xbufcount]; + udata->Nrecv = new realtype[udata->xbufcount]; + udata->Nsend = new realtype[udata->xbufcount]; + + udata->SWsend = new realtype[NSPECIES]; + udata->NErecv = new realtype[NSPECIES]; + + // MPI neighborhood information + int nbcoords[2]; + + // West neighbor + nbcoords[0] = coords[0]-1; + nbcoords[1] = coords[1]; + flag = MPI_Cart_rank(udata->comm, nbcoords, &(udata->ipW)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Cart_rank = " << flag << endl; + return -1; + } + + // East neighbor + nbcoords[0] = coords[0]+1; + nbcoords[1] = coords[1]; + flag = MPI_Cart_rank(udata->comm, nbcoords, &(udata->ipE)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Cart_rank = " << flag << endl; + return -1; + } + + // South neighbor + nbcoords[0] = coords[0]; + nbcoords[1] = coords[1]-1; + flag = MPI_Cart_rank(udata->comm, nbcoords, &(udata->ipS)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Cart_rank = " << flag << endl; + return -1; + } + + // North neighbor + nbcoords[0] = coords[0]; + nbcoords[1] = coords[1]+1; + flag = MPI_Cart_rank(udata->comm, nbcoords, &(udata->ipN)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Cart_rank = " << flag << endl; + return -1; + } + + // Opposite Corners for periodic BC output + if (udata->is == 0 && udata->js == 0) + { + nbcoords[0] = coords[0] - 1; + nbcoords[1] = coords[1] - 1; + flag = MPI_Cart_rank(udata->comm, nbcoords, &(udata->ipSW)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Cart_rank = " << flag << endl; + return -1; + } + } + + if (udata->ie == udata->nx - 1 && udata->je == udata->ny - 1) + { + nbcoords[0] = coords[0] + 1; + nbcoords[1] = coords[1] + 1; + flag = MPI_Cart_rank(udata->comm, nbcoords, &(udata->ipNE)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Cart_rank = " << flag << endl; + return -1; + } + } + + // Return success + return 0; +} + + +// ----------------------------------------------------------------------------- +// Setup the integrator +// ----------------------------------------------------------------------------- + + +static int SetupARK(UserData* udata, N_Vector u, SUNLinearSolver LS, + void** arkode_mem) +{ + int flag; + + // Optionally enable/disable diffusion or reactions (helpful for debugging) + ARKRhsFn fe = udata->reaction ? reaction : NULL; + ARKRhsFn fi = udata->diffusion ? diffusion : NULL; + + // Create ARKStep memory with explicit reactions and implicit diffusion + *arkode_mem = ARKStepCreate(fe, fi, ZERO, u); + if (check_flag((void *) *arkode_mem, "ARKStepCreate", 0)) return 1; + + // Specify tolerances + flag = ARKStepSStolerances(*arkode_mem, udata->rtol_imex, udata->atol_imex); + if (check_flag(&flag, "ARKStepSStolerances", 1)) return 1; + + // Attach user data + flag = ARKStepSetUserData(*arkode_mem, (void *) udata); + if (check_flag(&flag, "ARKStepSetUserData", 1)) return 1; + + if (udata->diffusion) + { + // Attach linear solver + flag = ARKStepSetLinearSolver(*arkode_mem, LS, NULL); + if (check_flag(&flag, "ARKStepSetLinearSolver", 1)) return 1; + + if (udata->prec) + { + // Attach preconditioner + flag = ARKStepSetPreconditioner(*arkode_mem, NULL, PSolve); + if (check_flag(&flag, "ARKStepSetPreconditioner", 1)) return 1; + + // Set linear solver setup frequency (update preconditioner) + flag = ARKStepSetLSetupFrequency(*arkode_mem, udata->msbp); + if (check_flag(&flag, "ARKStepSetLSetupFrequency", 1)) return 1; + } + + // Set linear solver tolerance factor + flag = ARKStepSetEpsLin(*arkode_mem, udata->epslin); + if (check_flag(&flag, "ARKStepSetEpsLin", 1)) return 1; + + // Specify linearly implicit non-time-dependent RHS + if (udata->linear) + { + flag = ARKStepSetLinear(*arkode_mem, 0); + if (check_flag(&flag, "ARKStepSetLinear", 1)) return 1; + } + } + + // Select method order + flag = ARKStepSetOrder(*arkode_mem, udata->order_imex); + if (check_flag(&flag, "ARKStepSetOrder", 1)) return 1; + + // Set fixed step size or adaptivity method + if (udata->h_imex > ZERO) + { + flag = ARKStepSetFixedStep(*arkode_mem, udata->h_imex); + if (check_flag(&flag, "ARKStepSetFixedStep", 1)) return 1; + } + else + { + flag = ARKStepSetAdaptivityMethod(*arkode_mem, udata->controller, SUNTRUE, + SUNFALSE, NULL); + if (check_flag(&flag, "ARKStepSetAdaptivityMethod", 1)) return 1; + } + + // Set max steps between outputs + flag = ARKStepSetMaxNumSteps(*arkode_mem, udata->maxsteps); + if (check_flag(&flag, "ARKStepSetMaxNumSteps", 1)) return 1; + + // Set stopping time + flag = ARKStepSetStopTime(*arkode_mem, udata->tf); + if (check_flag(&flag, "ARKStepSetStopTime", 1)) return 1; + + // Set diagnostics output file + if (udata->diagnostics && udata->outproc) + { + flag = ARKStepSetDiagnostics(*arkode_mem, udata->diagfp); + if (check_flag(&flag, "ARKStepSetDiagnostics", 1)) return 1; + } + + return 0; +} + + +static int SetupMRI(UserData* udata, N_Vector y, SUNLinearSolver LS, + void** arkode_mem, MRIStepInnerStepper* stepper) +{ + int flag; + + // ------------------------- + // Setup the fast integrator + // ------------------------- + + // Create fast explicit integrator for reactions + void *inner_arkode_mem = ARKStepCreate(reaction, NULL, ZERO, y); + if (check_flag((void *) inner_arkode_mem, "ARKStepCreate", 0)) return 1; + + // Specify tolerances + flag = ARKStepSStolerances(inner_arkode_mem, udata->rtol_fast, + udata->atol_fast); + if (check_flag(&flag, "ARKStepSStolerances", 1)) return 1; + + // Attach user data + flag = ARKStepSetUserData(inner_arkode_mem, (void *) udata); + if (check_flag(&flag, "ARKStepSetUserData", 1)) return 1; + + // Select method order + flag = ARKStepSetOrder(inner_arkode_mem, udata->order_fast); + if (check_flag(&flag, "ARKStepSetOrder", 1)) return 1; + + // Set fixed step size or adaptivity method + if (udata->h_fast > ZERO) + { + flag = ARKStepSetFixedStep(inner_arkode_mem, udata->h_fast); + if (check_flag(&flag, "ARKStepSetFixedStep", 1)) return 1; + } + else + { + flag = ARKStepSetAdaptivityMethod(inner_arkode_mem, udata->controller, + SUNTRUE, SUNFALSE, NULL); + if (check_flag(&flag, "ARKStepSetAdaptivityMethod", 1)) return 1; + } + + // Set max steps between outputs + flag = ARKStepSetMaxNumSteps(inner_arkode_mem, udata->maxsteps); + if (check_flag(&flag, "ARKStepSetMaxNumSteps", 1)) return 1; + + // Set diagnostics output file + if (udata->diagnostics && udata->outproc) + { + flag = ARKStepSetDiagnostics(inner_arkode_mem, udata->diagfp); + if (check_flag(&flag, "ARKStepSetDiagnostics", 1)) return 1; + } + + // Wrap ARKODE as an MRIStepInnerStepper + flag = ARKStepCreateMRIStepInnerStepper(inner_arkode_mem, + stepper); + if (check_flag(&flag, "ARKStepCreateMRIStepInnerStepper", 1)) return 1; + + // ------------------------- + // Setup the slow integrator + // ------------------------- + + // Create slow integrator for diffusion and attach fast integrator + *arkode_mem = MRIStepCreate(diffusion, ZERO, y, MRISTEP_CUSTOM, + *stepper); + if (check_flag((void *)*arkode_mem, "MRIStepCreate", 0)) return 1; + + // Set method coupling table (solve-decoupled implicit method) + MRIStepCoupling C = MRIStepCoupling_LoadTable(MRI_GARK_ESDIRK34a); + if (check_flag((void*)C, "MRIStepCoupling_LoadTable", 1)) return 1; + + flag = MRIStepSetCoupling(*arkode_mem, C); + if (check_flag(&flag, "MRIStepSetCoupling", 1)) return 1; + + MRIStepCoupling_Free(C); + + // Set the slow step size + flag = MRIStepSetFixedStep(*arkode_mem, udata->h_slow); + if (check_flag(&flag, "MRIStepSetFixedStep", 1)) return 1; + + // Specify tolerances + flag = MRIStepSStolerances(*arkode_mem, udata->rtol_slow, udata->atol_slow); + if (check_flag(&flag, "MRIStepSStolerances", 1)) return 1; + + // Attach user data + flag = MRIStepSetUserData(*arkode_mem, (void *) udata); + if (check_flag(&flag, "MRIStepSetUserData", 1)) return 1; + + // Attach linear solver + flag = MRIStepSetLinearSolver(*arkode_mem, LS, NULL); + if (check_flag(&flag, "MRIStepSetLinearSolver", 1)) return 1; + + if (udata->prec) + { + // Attach preconditioner + flag = MRIStepSetPreconditioner(*arkode_mem, NULL, PSolve); + if (check_flag(&flag, "MRIStepSetPreconditioner", 1)) return 1; + + // Set linear solver setup frequency (update preconditioner) + flag = MRIStepSetLSetupFrequency(*arkode_mem, udata->msbp); + if (check_flag(&flag, "MRIStepSetLSetupFrequency", 1)) return 1; + } + + // Set linear solver tolerance factor + flag = MRIStepSetEpsLin(*arkode_mem, udata->epslin); + if (check_flag(&flag, "MRIStepSetEpsLin", 1)) return 1; + + // Specify linearly implicit non-time-dependent RHS + if (udata->linear) + { + flag = MRIStepSetLinear(*arkode_mem, 0); + if (check_flag(&flag, "MRIStepSetLinear", 1)) return 1; + } + + // Set max steps between outputs + flag = MRIStepSetMaxNumSteps(*arkode_mem, udata->maxsteps); + if (check_flag(&flag, "MRIStepSetMaxNumSteps", 1)) return 1; + + // Set stopping time + flag = MRIStepSetStopTime(*arkode_mem, udata->tf); + if (check_flag(&flag, "MRIStepSetStopTime", 1)) return 1; + + // Set diagnostics output file + if (udata->diagnostics && udata->outproc) + { + flag = MRIStepSetDiagnostics(*arkode_mem, udata->diagfp); + if (check_flag(&flag, "MRIStepSetDiagnostics", 1)) return 1; + } + + return 0; +} + + +static int SetupMRICVODE(UserData *udata, N_Vector y, SUNLinearSolver LS, + SUNNonlinearSolver *NLS, void **arkode_mem, + MRIStepInnerStepper* stepper) +{ + int flag; + + // ------------------------- + // Setup the fast integrator + // ------------------------- + + // Use the global or local state vector to create the inner integrator + N_Vector y_vec; + if (udata->integrator == 2) + { + y_vec = y; + } + else if (udata->integrator == 3) + { + y_vec = N_VGetLocalVector_MPIPlusX(y); + } + else + { + cerr << "ERROR: Invalid MRIStep + CVODE option" << endl; + return -1; + } + + // Create the solver memory and specify the Adams methods + void *cvode_mem = CVodeCreate(CV_ADAMS); + if (check_flag((void *)cvode_mem, "CVodeCreate", 0)) return(1); + + // Initialize the integrator memory + flag = CVodeInit(cvode_mem, reaction, ZERO, y_vec); + if (check_flag(&flag, "CVodeInit", 1)) return(1); + + // Specify tolerances + flag = CVodeSStolerances(cvode_mem, udata->rtol_fast, udata->atol_fast); + if (check_flag(&flag, "CVodeSVtolerances", 1)) return(1); + + // Attach user data + flag = CVodeSetUserData(cvode_mem, (void *) udata); + if (check_flag(&flag, "CVodeSetUserData", 1)) return 1; + + // Create and attach fixed-point nonlinear solver + *NLS = SUNNonlinSol_FixedPoint(y_vec, udata->fp_aa); + if(check_flag((void *)*NLS, "SUNNonlinSol_FixedPoint", 0)) return(1); + + flag = CVodeSetNonlinearSolver(cvode_mem, *NLS); + if(check_flag(&flag, "CVodeSetNonlinearSolver", 1)) return(1); + + // Set max number of fixed-point iterations + flag = CVodeSetMaxNonlinIters(cvode_mem, udata->fp_iters); + if(check_flag(&flag, "CVodeSetMaxNonlinIters", 1)) return(1); + + // Set max steps between outputs + flag = CVodeSetMaxNumSteps(cvode_mem, udata->maxsteps); + if (check_flag(&flag, "CVodeSetMaxNumSteps", 1)) return 1; + + // Create the inner stepper wrapper + flag = MRIStepInnerStepper_Create(stepper); + if (check_flag(&flag, "MRIStepInnerStepper_Create", 1)) return 1; + + // Attach memory and operations + InnerStepperContent *inner_content = new InnerStepperContent; + + inner_content->cvode_mem = cvode_mem; + inner_content->user_data = udata; + inner_content->local = (udata->integrator == 2) ? false : true; + + flag = MRIStepInnerStepper_SetContent(*stepper, inner_content); + if (check_flag(&flag, "MRIStepInnerStepper_SetContent", 1)) return 1; + + flag = MRIStepInnerStepper_SetEvolveFn(*stepper, + CVodeInnerStepper_Evolve); + if (check_flag(&flag, "MRIStepInnerStepper_SetEvolve", 1)) return 1; + + flag = MRIStepInnerStepper_SetFullRhsFn(*stepper, + CVodeInnerStepper_FullRhs); + if (check_flag(&flag, "MRIStepInnerStepper_SetFullRhsFn", 1)) return 1; + + flag = MRIStepInnerStepper_SetResetFn(*stepper, + CVodeInnerStepper_Reset); + if (check_flag(&flag, "MRIStepInnerStepper_SetResetFn", 1)) return 1; + + // Attach inner stepper memory to user data + udata->stepper = *stepper; + + // ------------------------- + // Setup the slow integrator + // ------------------------- + + // Create slow integrator for diffusion and attach fast integrator + *arkode_mem = MRIStepCreate(diffusion, ZERO, y, MRISTEP_CUSTOM, + *stepper); + if (check_flag((void *)*arkode_mem, "MRIStepCreate", 0)) return 1; + + // Set method coupling table (solve-decoupled implicit method) + MRIStepCoupling C = MRIStepCoupling_LoadTable(MRI_GARK_ESDIRK34a); + if (check_flag((void*)C, "MRIStepCoupling_LoadTable", 1)) return 1; + + flag = MRIStepSetCoupling(*arkode_mem, C); + if (check_flag(&flag, "MRIStepSetCoupling", 1)) return 1; + + MRIStepCoupling_Free(C); + + // Set the slow step size + flag = MRIStepSetFixedStep(*arkode_mem, udata->h_slow); + if (check_flag(&flag, "MRIStepSetFixedStep", 1)) return 1; + + // Specify tolerances + flag = MRIStepSStolerances(*arkode_mem, udata->rtol_slow, udata->atol_slow); + if (check_flag(&flag, "MRIStepSStolerances", 1)) return 1; + + // Attach user data + flag = MRIStepSetUserData(*arkode_mem, (void *) udata); + if (check_flag(&flag, "MRIStepSetUserData", 1)) return 1; + + // Attach linear solver + flag = MRIStepSetLinearSolver(*arkode_mem, LS, NULL); + if (check_flag(&flag, "MRIStepSetLinearSolver", 1)) return 1; + + if (udata->prec) + { + // Attach preconditioner + flag = MRIStepSetPreconditioner(*arkode_mem, NULL, PSolve); + if (check_flag(&flag, "MRIStepSetPreconditioner", 1)) return 1; + + // Set linear solver setup frequency (update preconditioner) + flag = MRIStepSetLSetupFrequency(*arkode_mem, udata->msbp); + if (check_flag(&flag, "MRIStepSetLSetupFrequency", 1)) return 1; + } + + // Set linear solver tolerance factor + flag = MRIStepSetEpsLin(*arkode_mem, udata->epslin); + if (check_flag(&flag, "MRIStepSetEpsLin", 1)) return 1; + + // Specify linearly implicit non-time-dependent RHS + if (udata->linear) + { + flag = MRIStepSetLinear(*arkode_mem, 0); + if (check_flag(&flag, "MRIStepSetLinear", 1)) return 1; + } + + // Set max steps between outputs + flag = MRIStepSetMaxNumSteps(*arkode_mem, udata->maxsteps); + if (check_flag(&flag, "MRIStepSetMaxNumSteps", 1)) return 1; + + // Set stopping time + flag = MRIStepSetStopTime(*arkode_mem, udata->tf); + if (check_flag(&flag, "MRIStepSetStopTime", 1)) return 1; + + // Set diagnostics output file + if (udata->diagnostics && udata->outproc) + { + flag = MRIStepSetDiagnostics(*arkode_mem, udata->diagfp); + if (check_flag(&flag, "MRIStepSetDiagnostics", 1)) return 1; + } + + return 0; +} + + +// ----------------------------------------------------------------------------- +// Custom inner stepper functions +// ----------------------------------------------------------------------------- + + +static int CVodeInnerStepper_Evolve(MRIStepInnerStepper stepper, + realtype t0, realtype tout, N_Vector y) +{ + int flag; + realtype tret; + void* inner_content = NULL; + + flag = MRIStepInnerStepper_GetContent(stepper, &inner_content); + if (check_flag(&flag, "MRIStepInnerStepper_GetContent", 1)) return -1; + + InnerStepperContent *content = (InnerStepperContent*) inner_content; + + N_Vector y_vec; + if (content->local) + { + // Using local inner stepper, extract the local serial vector + y_vec = N_VGetLocalVector_MPIPlusX(y); + } + else + { + // Using global inner stepper, use the MPIPlusX vector + y_vec = y; + } + + flag = CVodeSetStopTime(content->cvode_mem, tout); + if (check_flag(&flag, "CVodeSetStopTime", 1)) return -1; + + flag = CVode(content->cvode_mem, tout, y_vec, &tret, CV_NORMAL); + if (flag < 0) return -1; + + return 0; +} + + +static int CVodeInnerStepper_FullRhs(MRIStepInnerStepper stepper, + realtype t, N_Vector y, N_Vector f, + int mode) +{ + int flag; + void* inner_content = NULL; + + flag = MRIStepInnerStepper_GetContent(stepper, &inner_content); + if (check_flag(&flag, "MRIStepInnerStepper_GetContent", 1)) return -1; + + InnerStepperContent *content = (InnerStepperContent*) inner_content; + UserData *udata = (UserData *) content->user_data; + + // disable forcing + int integrator = udata->integrator; + udata->integrator = 0; + + flag = reaction(t, y, f, content->user_data); + if (flag != 0) return -1; + + // enable forcing + udata->integrator = integrator; + + return 0; +} + + +static int CVodeInnerStepper_Reset(MRIStepInnerStepper stepper, + realtype tR, N_Vector yR) +{ + int flag; + void* inner_content = NULL; + + flag = MRIStepInnerStepper_GetContent(stepper, &inner_content); + if (check_flag(&flag, "MRIStepInnerStepper_GetContent", 1)) return -1; + + InnerStepperContent *content = (InnerStepperContent*) inner_content; + + N_Vector yR_vec; + if (content->local) + { + yR_vec = N_VGetLocalVector_MPIPlusX(yR); + } + else + { + yR_vec = yR; + } + + // Save current stats before reinitializing + long int nst, netf, nfe, nni, nncf; + + flag = CVodeGetNumSteps(content->cvode_mem, &nst); + if (check_flag(&flag, "CVodeGetNumSteps", 1)) return -1; + content->nst += nst; + + flag = CVodeGetNumErrTestFails(content->cvode_mem, &netf); + if (check_flag(&flag, "CVodeGetNumErrTestFails", 1)) return -1; + content->netf += netf; + + flag = CVodeGetNumRhsEvals(content->cvode_mem, &nfe); + if (check_flag(&flag, "CVodeGetNumRhsEvals", 1)) return -1; + content->nfe += nfe; + + flag = CVodeGetNumNonlinSolvIters(content->cvode_mem, &nni); + if (check_flag(&flag, "CVodeGetNumNonlinSolvIters", 1)) return -1; + content->nni += nni; + + flag = CVodeGetNumNonlinSolvConvFails(content->cvode_mem, &nncf); + if (check_flag(&flag, "CVodeGetNumNonlinSolvConvFails", 1)) return -1; + content->nncf += nncf; + + // Reinitialize CVODE with new state + flag = CVodeReInit(content->cvode_mem, tR, yR_vec); + if (check_flag(&flag, "CVodeReInit", 1)) return -1; + + return 0; +} + + +// ----------------------------------------------------------------------------- +// Functions called by the integrator +// ----------------------------------------------------------------------------- + + +// Routine to compute the ODE diffusion RHS function +static int diffusion(realtype t, N_Vector y, N_Vector f, void *user_data) +{ + int flag; + + // Access problem data + UserData *udata = (UserData *) user_data; + + // Start timer + udata->rhsD.start(); + + // Open exchange receives and exchange data + flag = StartExchange(y, udata); + if (check_flag(&flag, "StartExchange", 1)) return -1; + + // Constants for computing diffusion term + realtype cxu = udata->Dux / (udata->dx * udata->dx); + realtype cyu = udata->Duy / (udata->dy * udata->dy); + realtype ccu = -TWO * (cxu + cyu); + + realtype cxv = udata->Dvx / (udata->dx * udata->dx); + realtype cyv = udata->Dvy / (udata->dy * udata->dy); + realtype ccv = -TWO * (cxv + cyv); + + // Access data arrays + realtype *ydata = N_VGetArrayPointer(y); + if (check_flag((void *) ydata, "N_VGetArrayPointer", 0)) return -1; + + realtype *fdata = N_VGetArrayPointer(f); + if (check_flag((void *) fdata, "N_VGetArrayPointer", 0)) return -1; + + // Shortcuts to array indices (center, west, east, south, north) + sunindextype uc, uw, ue, us, un; + sunindextype vc, vw, ve, vs, vn; + + // Shortcuts to local number of nodes + sunindextype ny_loc = udata->ny_loc; + sunindextype nx_loc = udata->nx_loc; + + // Compute diffusion term on subdomain + for (sunindextype j = 1; j < ny_loc - 1; j++) + { + for (sunindextype i = 1; i < nx_loc - 1; i++) + { + uc = UIDX(i, j, nx_loc); + uw = UIDX(i-1, j, nx_loc); + ue = UIDX(i+1, j, nx_loc); + us = UIDX(i, j-1, nx_loc); + un = UIDX(i, j+1, nx_loc); + + vc = VIDX(i, j, nx_loc); + vw = VIDX(i-1, j, nx_loc); + ve = VIDX(i+1, j, nx_loc); + vs = VIDX(i, j-1, nx_loc); + vn = VIDX(i, j+1, nx_loc); + + fdata[uc] = ccu * ydata[uc] + + cxu * (ydata[uw] + ydata[ue]) + + cyu * (ydata[us] + ydata[un]); + + fdata[vc] = ccv * ydata[vc] + + cxv * (ydata[vw] + ydata[ve]) + + cyv * (ydata[vs] + ydata[vn]); + } + } + + // Wait for exchange receives and compute diffusion term on subdomain boundary + flag = EndExchange(udata); + if (check_flag(&flag, "EndExchange", 1)) return -1; + + realtype *Wdata = udata->Wrecv; + realtype *Edata = udata->Erecv; + realtype *Sdata = udata->Srecv; + realtype *Ndata = udata->Nrecv; + + sunindextype i, j; + + // ----------------------------------------------------- + // West face (updates south-west and north-west corners) + // ----------------------------------------------------- + i = 0; + + // South-West corner + j = 0; + + uc = UIDX(i, j, nx_loc); + ue = UIDX(i+1, j, nx_loc); + un = UIDX(i, j+1, nx_loc); + + vc = VIDX(i, j, nx_loc); + ve = VIDX(i+1, j, nx_loc); + vn = VIDX(i, j+1, nx_loc); + + fdata[uc] = ccu * ydata[uc] + + cxu * (Wdata[NSPECIES * j] + ydata[ue]) + + cyu * (Sdata[NSPECIES * i] + ydata[un]); + + fdata[vc] = ccv * ydata[vc] + + cxv * (Wdata[NSPECIES * j + 1] + ydata[ve]) + + cyv * (Sdata[NSPECIES * i + 1] + ydata[vn]); + + // West face interior + for (j = 1; j < ny_loc - 1; j++) + { + uc = UIDX(i, j, nx_loc); + ue = UIDX(i+1, j, nx_loc); + us = UIDX(i, j-1, nx_loc); + un = UIDX(i, j+1, nx_loc); + + vc = VIDX(i, j, nx_loc); + ve = VIDX(i+1, j, nx_loc); + vs = VIDX(i, j-1, nx_loc); + vn = VIDX(i, j+1, nx_loc); + + fdata[uc] = ccu * ydata[uc] + + cxu * (Wdata[NSPECIES * j] + ydata[ue]) + + cyu * (ydata[us] + ydata[un]); + + fdata[vc] = ccv * ydata[vc] + + cxv * (Wdata[NSPECIES * j + 1] + ydata[ve]) + + cyv * (ydata[vs] + ydata[vn]); + } + + // North-West corner + j = ny_loc - 1; + + uc = UIDX(i, j, nx_loc); + ue = UIDX(i+1, j, nx_loc); + us = UIDX(i, j-1, nx_loc); + + vc = VIDX(i, j, nx_loc); + ve = VIDX(i+1, j, nx_loc); + vs = VIDX(i, j-1, nx_loc); + + fdata[uc] = ccu * ydata[uc] + + cxu * (Wdata[NSPECIES * j] + ydata[ue]) + + cyu * (ydata[us] + Ndata[NSPECIES * i]); + + fdata[vc] = ccv * ydata[vc] + + cxv * (Wdata[NSPECIES * j + 1] + ydata[ve]) + + cyv * (ydata[vs] + Ndata[NSPECIES * i + 1]); + + // ----------------------------------------------------- + // East face (updates south-east and north-east corners) + // ----------------------------------------------------- + i = nx_loc - 1; + + // South-East corner + j = 0; + + uc = UIDX(i, j, nx_loc); + uw = UIDX(i-1, j, nx_loc); + un = UIDX(i, j+1, nx_loc); + + vc = VIDX(i, j, nx_loc); + vw = VIDX(i-1, j, nx_loc); + vn = VIDX(i, j+1, nx_loc); + + fdata[uc] = ccu * ydata[uc] + + cxu * (ydata[uw] + Edata[NSPECIES * j]) + + cyu * (Sdata[NSPECIES * i] + ydata[un]); + + fdata[vc] = ccv * ydata[vc] + + cxv * (ydata[vw] + Edata[NSPECIES * j + 1]) + + cyv * (Sdata[NSPECIES * i + 1] + ydata[vn]); + + // East face interior + for (j = 1; j < ny_loc - 1; j++) + { + uc = UIDX(i, j, nx_loc); + uw = UIDX(i-1, j, nx_loc); + us = UIDX(i, j-1, nx_loc); + un = UIDX(i, j+1, nx_loc); + + vc = VIDX(i, j, nx_loc); + vw = VIDX(i-1, j, nx_loc); + vs = VIDX(i, j-1, nx_loc); + vn = VIDX(i, j+1, nx_loc); + + fdata[uc] = ccu * ydata[uc] + + cxu * (ydata[uw] + Edata[NSPECIES * j]) + + cyu * (ydata[us] + ydata[un]); + + fdata[vc] = ccv * ydata[vc] + + cxv * (ydata[vw] + Edata[NSPECIES * j + 1]) + + cyv * (ydata[vs] + ydata[vn]); + } + + // North-East corner + j = ny_loc - 1; + + uc = UIDX(i, j, nx_loc); + uw = UIDX(i-1, j, nx_loc); + us = UIDX(i, j-1, nx_loc); + + vc = VIDX(i, j, nx_loc); + vw = VIDX(i-1, j, nx_loc); + vs = VIDX(i, j-1, nx_loc); + + fdata[uc] = ccu * ydata[uc] + + cxu * (ydata[uw] + Edata[NSPECIES * j]) + + cyu * (ydata[us] + Ndata[NSPECIES * i]); + + fdata[vc] = ccv * ydata[vc] + + cxv * (ydata[vw] + Edata[NSPECIES * j + 1]) + + cyv * (ydata[vs] + Ndata[NSPECIES * i + 1]); + + // ----------------------------- + // South face (excludes corners) + // ----------------------------- + j = 0; + + for (i = 1; i < nx_loc - 1; i++) + { + uc = UIDX(i, j, nx_loc); + uw = UIDX(i-1, j, nx_loc); + ue = UIDX(i+1, j, nx_loc); + un = UIDX(i, j+1, nx_loc); + + vc = VIDX(i, j, nx_loc); + vw = VIDX(i-1, j, nx_loc); + ve = VIDX(i+1, j, nx_loc); + vn = VIDX(i, j+1, nx_loc); + + fdata[uc] = ccu * ydata[uc] + + cxu * (ydata[uw] + ydata[ue]) + + cyu * (Sdata[NSPECIES * i] + ydata[un]); + + fdata[vc] = ccv * ydata[vc] + + cxv * (ydata[vw] + ydata[ve]) + + cyv * (Sdata[NSPECIES * i + 1] + ydata[vn]); + } + + // ----------------------------- + // North face (excludes corners) + // ----------------------------- + j = ny_loc - 1; + + for (i = 1; i < nx_loc - 1; i++) + { + uc = UIDX(i, j, nx_loc); + uw = UIDX(i-1, j, nx_loc); + ue = UIDX(i+1, j, nx_loc); + us = UIDX(i, j-1, nx_loc); + + vc = VIDX(i, j, nx_loc); + vw = VIDX(i-1, j, nx_loc); + ve = VIDX(i+1, j, nx_loc); + vs = VIDX(i, j-1, nx_loc); + + fdata[uc] = ccu * ydata[uc] + + cxu * (ydata[uw] + ydata[ue]) + + cyu * (ydata[us] + Ndata[NSPECIES * i]); + + fdata[vc] = ccv * ydata[vc] + + cxv * (ydata[vw] + ydata[ve]) + + cyv * (ydata[vs] + Ndata[NSPECIES * i + 1]); + } + + // Stop timer + udata->rhsD.stop(); + + // Return success + return 0; +} + + +// Routine to compute the ODE reaction RHS function +static int reaction(realtype t, N_Vector y, N_Vector f, void *user_data) +{ + // Access problem data + UserData *udata = (UserData *) user_data; + + // Start timer + udata->rhsR.start(); + + // Access data arrays + realtype *ydata = N_VGetArrayPointer(y); + if (check_flag((void *) ydata, "N_VGetArrayPointer", 0)) return -1; + + realtype *fdata = N_VGetArrayPointer(f); + if (check_flag((void *) fdata, "N_VGetArrayPointer", 0)) return -1; + + // Shortcuts to local number of nodes + sunindextype ny_loc = udata->ny_loc; + sunindextype nx_loc = udata->nx_loc; + + // Compute reaction term on the subdomain + realtype u, v; + + for (sunindextype j = 0; j < ny_loc; j++) + { + for (sunindextype i = 0; i < nx_loc; i++) + { + u = ydata[UIDX(i,j,nx_loc)]; + v = ydata[VIDX(i,j,nx_loc)]; + + fdata[UIDX(i,j,nx_loc)] = udata->A + u * u * v - (udata->B + 1) * u; + fdata[VIDX(i,j,nx_loc)] = udata->B * u - u * u * v; + } + } + + // Apply inner forcing for MRI + CVODE + if (udata->integrator > 1) + { + int flag; + + if (udata->integrator == 2) + { + // With a global inner stepper the RHS vector f and the forcing vectors + // from the outer integrator are both MPIPlusX vectors as such we can use + // a utility function to add the forcing to the RHS vector + MRIStepInnerStepper_AddForcing(udata->stepper, t, f); + } + else if (udata->integrator == 3) + { + int nforcing; + realtype tshift, tscale; + N_Vector *forcing; + + // With a local inner stepper the RHS vector f is a serial vector and the + // forcing vectors from the outer integrator are MPIPlusX vectors as such + // we need to extract the local serial vectors and apply the forcing + flag = MRIStepInnerStepper_GetForcingData(udata->stepper, + &tshift, &tscale, + &forcing, &nforcing); + if (flag != 0) return flag; + + N_Vector forcing_loc; + realtype tau = (t - tshift) / tscale; + realtype taui = ONE; + + for (int i = 0; i < nforcing; i++) + { + forcing_loc = N_VGetLocalVector_MPIPlusX(forcing[i]); + N_VLinearSum(ONE, f, taui, forcing_loc, f); + taui *= tau; + } + } + else + { + cerr << "ERROR: Invalid MRIStep + CVODE option" << endl; + return -1; + } + } + + // Stop timer + udata->rhsR.stop(); + + // Return success + return 0; +} + + +// Preconditioner solve routine for Pz = r +static int PSolve(realtype t, N_Vector u, N_Vector f, N_Vector r, + N_Vector z, realtype gamma, realtype delta, int lr, + void *user_data) +{ + // Access user_data structure + UserData *udata = (UserData *) user_data; + + // Start timer + udata->psolve.start(); + + // Constants for computing diffusion + realtype cxu = udata->Dux / (udata->dx * udata->dx); + realtype cyu = udata->Duy / (udata->dy * udata->dy); + realtype ccu = -TWO * (cxu + cyu); + + realtype cxv = udata->Dvx / (udata->dx * udata->dx); + realtype cyv = udata->Dvy / (udata->dy * udata->dy); + realtype ccv = -TWO * (cxv + cyv); + + // Access data arrays + realtype *rdata = N_VGetArrayPointer(r); + if (check_flag((void *) rdata, "N_VGetArrayPointer", 0)) return -1; + + realtype *zdata = N_VGetArrayPointer(z); + if (check_flag((void *) zdata, "N_VGetArrayPointer", 0)) return -1; + + // Shortcuts to local number of nodes + sunindextype ny_loc = udata->ny_loc; + sunindextype nx_loc = udata->nx_loc; + + // Set all entries of diag to the inverse diagonal values + realtype du = ONE / (ONE - gamma * ccu); + realtype dv = ONE / (ONE - gamma * ccv); + + for (sunindextype j = 0; j < ny_loc; j++) + { + for (sunindextype i = 0; i < nx_loc; i++) + { + zdata[UIDX(i, j, nx_loc)] = du * rdata[UIDX(i, j, nx_loc)]; + zdata[VIDX(i, j, nx_loc)] = dv * rdata[VIDX(i, j, nx_loc)]; + } + } + + // Stop timer + udata->psolve.stop(); + + // Return success + return 0; +} + + +// ----------------------------------------------------------------------------- +// RHS helper functions +// ----------------------------------------------------------------------------- + + +// Open exchange receives +static int StartExchange(N_Vector y, UserData *udata) +{ + int flag; + + // Start timer + udata->exchange.start(); + + // East face (from neighbor's West face) + flag = MPI_Irecv(udata->Erecv, udata->ybufcount, MPI_SUNREALTYPE, + udata->ipE, 0, udata->comm, &(udata->reqRE)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Irecv = " << flag << endl; + return -1; + } + + // West face (from neighbor's East face) + flag = MPI_Irecv(udata->Wrecv, udata->ybufcount, MPI_SUNREALTYPE, + udata->ipW, 1, udata->comm, &(udata->reqRW)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Irecv = " << flag << endl; + return -1; + } + + // North face (from neighbor's South face) + flag = MPI_Irecv(udata->Nrecv, udata->xbufcount, MPI_SUNREALTYPE, + udata->ipN, 2, udata->comm, &(udata->reqRN)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Irecv = " << flag << endl; + return -1; + } + + // South face (from neighbor's North face) + flag = MPI_Irecv(udata->Srecv, udata->xbufcount, MPI_SUNREALTYPE, + udata->ipS, 3, udata->comm, &(udata->reqRS)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Irecv = " << flag << endl; + return -1; + } + + // Shortcuts to local number of nodes + sunindextype ny_loc = udata->ny_loc; + sunindextype nx_loc = udata->nx_loc; + + // Access data array + realtype *ydata = N_VGetArrayPointer(y); + if (check_flag((void *) ydata, "N_VGetArrayPointer", 0)) return -1; + + // Send West face data to neighbor's East face + for (sunindextype i = 0; i < ny_loc; i++) + { + udata->Wsend[NSPECIES * i] = ydata[UIDX(0,i,nx_loc)]; + udata->Wsend[NSPECIES * i + 1] = ydata[VIDX(0,i,nx_loc)]; + } + flag = MPI_Isend(udata->Wsend, udata->ybufcount, MPI_SUNREALTYPE, + udata->ipW, 0, udata->comm, &(udata->reqSW)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Isend = " << flag << endl; + return -1; + } + + // Send East face data to neighbor's West face + for (sunindextype i = 0; i < ny_loc; i++) + { + udata->Esend[NSPECIES * i] = ydata[UIDX(nx_loc-1,i,nx_loc)]; + udata->Esend[NSPECIES * i + 1] = ydata[VIDX(nx_loc-1,i,nx_loc)]; + } + flag = MPI_Isend(udata->Esend, udata->ybufcount, MPI_SUNREALTYPE, + udata->ipE, 1, udata->comm, &(udata->reqSE)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Isend = " << flag << endl; + return -1; + } + + // Send South face data to neighbor's North face + for (sunindextype i = 0; i < nx_loc; i++) + { + udata->Ssend[NSPECIES * i] = ydata[UIDX(i,0,nx_loc)]; + udata->Ssend[NSPECIES * i + 1] = ydata[VIDX(i,0,nx_loc)]; + } + flag = MPI_Isend(udata->Ssend, udata->xbufcount, MPI_SUNREALTYPE, + udata->ipS, 2, udata->comm, &(udata->reqSS)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Isend = " << flag << endl; + return -1; + } + + // Send North face data to neighbor's South face + for (sunindextype i = 0; i < nx_loc; i++) + { + udata->Nsend[NSPECIES * i] = ydata[UIDX(i,ny_loc-1,nx_loc)]; + udata->Nsend[NSPECIES * i + 1] = ydata[VIDX(i,ny_loc-1,nx_loc)]; + } + flag = MPI_Isend(udata->Nsend, udata->xbufcount, MPI_SUNREALTYPE, + udata->ipN, 3, udata->comm, &(udata->reqSN)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Isend = " << flag << endl; + return -1; + } + + // Stop timer + udata->exchange.stop(); + + // Return success + return 0; +} + + +// Wait for exchange data +static int EndExchange(UserData *udata) +{ + // Local variables + int flag; + MPI_Status stat; + + // Start timer + udata->exchange.start(); + + // Wait for messages to finish + flag = MPI_Wait(&(udata->reqRW), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + flag = MPI_Wait(&(udata->reqSW), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + + flag = MPI_Wait(&(udata->reqRE), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + flag = MPI_Wait(&(udata->reqSE), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + + flag = MPI_Wait(&(udata->reqRS), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + flag = MPI_Wait(&(udata->reqSS), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + + flag = MPI_Wait(&(udata->reqRN), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + flag = MPI_Wait(&(udata->reqSN), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + + // Stop timer + udata->exchange.stop(); + + // Return success + return 0; +} + + +// Send exchange data +static int ExchangeBC(N_Vector y, UserData *udata) +{ + int flag; + MPI_Status stat; + + // Shortcuts to local number of nodes + sunindextype ny_loc = udata->ny_loc; + sunindextype nx_loc = udata->nx_loc; + + // Post East face exchange receives + if (udata->ie == udata->nx - 1) + { + flag = MPI_Irecv(udata->Erecv, udata->ybufcount, MPI_SUNREALTYPE, + udata->ipE, MPI_ANY_TAG, udata->comm, &(udata->reqRE)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Irecv = " << flag << endl; + return -1; + } + } + + // Post North face exchange receives + if (udata->je == udata->ny - 1) + { + flag = MPI_Irecv(udata->Nrecv, udata->xbufcount, MPI_SUNREALTYPE, + udata->ipN, MPI_ANY_TAG, udata->comm, &(udata->reqRN)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Irecv = " << flag << endl; + return -1; + } + } + + // Post North-East corner exchange receive + if (udata->ie == udata->nx - 1 && udata->je == udata->ny - 1) + { + flag = MPI_Irecv(udata->NErecv, NSPECIES, MPI_SUNREALTYPE, + udata->ipNE, MPI_ANY_TAG, udata->comm, &(udata->reqRC)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Irecv = " << flag << endl; + return -1; + } + } + + realtype *ydata = N_VGetArrayPointer(y); + if (check_flag((void *) ydata, "N_VGetArrayPointer", 0)) return -1; + + // Send West face data + if (udata->is == 0) + { + for (sunindextype i = 0; i < ny_loc; i++) + { + udata->Wsend[NSPECIES * i] = ydata[UIDX(0,i,nx_loc)]; + udata->Wsend[NSPECIES * i + 1] = ydata[VIDX(0,i,nx_loc)]; + } + flag = MPI_Isend(udata->Wsend, udata->ybufcount, MPI_SUNREALTYPE, + udata->ipW, 0, udata->comm, &(udata->reqSW)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Isend = " << flag << endl; + return -1; + } + } + + // Send South face data + if (udata->js == 0) + { + for (sunindextype i = 0; i < nx_loc; i++) + { + udata->Ssend[NSPECIES * i] = ydata[UIDX(i,0,nx_loc)]; + udata->Ssend[NSPECIES * i + 1] = ydata[VIDX(i,0,nx_loc)]; + } + flag = MPI_Isend(udata->Ssend, udata->xbufcount, MPI_SUNREALTYPE, + udata->ipS, 2, udata->comm, &(udata->reqSS)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Isend = " << flag << endl; + return -1; + } + } + + // Send South-West corner data + if (udata->is == 0 && udata->js == 0) + { + udata->SWsend[0] = ydata[UIDX(0,0,nx_loc)]; + udata->SWsend[1] = ydata[VIDX(0,0,nx_loc)]; + flag = MPI_Isend(udata->SWsend, NSPECIES, MPI_SUNREALTYPE, + udata->ipSW, 2, udata->comm, &(udata->reqSC)); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Isend = " << flag << endl; + return -1; + } + } + + // Wait for messages to finish + if (udata->ie == udata->nx - 1) + { + flag = MPI_Wait(&(udata->reqRE), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + } + + if (udata->je == udata->ny - 1) + { + flag = MPI_Wait(&(udata->reqRN), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + } + + if (udata->ie == udata->nx - 1 && udata->je == udata->ny - 1) + { + flag = MPI_Wait(&(udata->reqRC), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + } + + if (udata->is == 0) + { + flag = MPI_Wait(&(udata->reqSW), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + } + + if (udata->js == 0) + { + flag = MPI_Wait(&(udata->reqSS), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + } + + if (udata->is == 0 && udata->js == 0) + { + flag = MPI_Wait(&(udata->reqSC), &stat); + if (flag != MPI_SUCCESS) + { + cerr << "Error in MPI_Wait = " << flag << endl; + return -1; + } + } + + return(0); +} + + +// ----------------------------------------------------------------------------- +// UserData and input functions +// ----------------------------------------------------------------------------- + + +// Free memory allocated within Userdata +static int FreeUserData(UserData *udata) +{ + // Free exchange buffers + if (udata->Wrecv != NULL) delete[] udata->Wrecv; + if (udata->Wsend != NULL) delete[] udata->Wsend; + if (udata->Erecv != NULL) delete[] udata->Erecv; + if (udata->Esend != NULL) delete[] udata->Esend; + if (udata->Srecv != NULL) delete[] udata->Srecv; + if (udata->Ssend != NULL) delete[] udata->Ssend; + if (udata->Nrecv != NULL) delete[] udata->Nrecv; + if (udata->Nsend != NULL) delete[] udata->Nsend; + if (udata->NErecv != NULL) delete[] udata->NErecv; + if (udata->SWsend != NULL) delete[] udata->SWsend; + + // Free preconditioner data + if (udata->diag) + { + N_VDestroy(udata->diag); + udata->diag = NULL; + } + + // Free MPI Cartesian communicator + if (udata->comm != MPI_COMM_NULL) + { + MPI_Comm_free(&(udata->comm)); + udata->comm = MPI_COMM_NULL; + } + + // Return success + return 0; +} + + +// Read command line inputs +static int ReadInputs(int *argc, char ***argv, UserData *udata) +{ + // Check for input args + int arg_idx = 1; + + while (arg_idx < (*argc)) + { + string arg = (*argv)[arg_idx++]; + + // Mesh points + if (arg == "--mesh") + { + udata->nx = stoi((*argv)[arg_idx++]); + udata->ny = stoi((*argv)[arg_idx++]); + } + // MPI processes + else if (arg == "--np") + { + udata->npx = stoi((*argv)[arg_idx++]); + udata->npy = stoi((*argv)[arg_idx++]); + } + // Domain bounds + else if (arg == "--domain") + { + udata->xl = stoi((*argv)[arg_idx++]); + udata->xu = stoi((*argv)[arg_idx++]); + udata->yl = stoi((*argv)[arg_idx++]); + udata->yu = stoi((*argv)[arg_idx++]); + } + // Diffusion parameters + else if (arg == "--D") + { + udata->Dux = stod((*argv)[arg_idx++]); + udata->Duy = stod((*argv)[arg_idx++]); + udata->Dvx = stod((*argv)[arg_idx++]); + udata->Dvy = stod((*argv)[arg_idx++]); + } + // Reaction parameters + else if (arg == "--A") + { + udata->A = stod((*argv)[arg_idx++]); + } + else if (arg == "--B") + { + udata->B = stod((*argv)[arg_idx++]); + } + // Temporal domain settings + else if (arg == "--tf") + { + udata->tf = stod((*argv)[arg_idx++]); + } + // Integrator options + else if (arg == "--imex") + { + udata->integrator = 0; + } + else if (arg == "--mri-arkstep") + { + udata->integrator = 1; + } + else if (arg == "--mri-cvode-global") + { + udata->integrator = 2; + } + else if (arg == "--mri-cvode-local") + { + udata->integrator = 3; + } + // IMEX integrator settings + else if (arg == "--rtol_imex") + { + udata->rtol_imex = stod((*argv)[arg_idx++]); + } + else if (arg == "--atol_imex") + { + udata->atol_imex = stod((*argv)[arg_idx++]); + } + else if (arg == "--h_imex") + { + udata->h_imex = stod((*argv)[arg_idx++]); + } + else if (arg == "--order_imex") + { + udata->order_imex = stoi((*argv)[arg_idx++]); + } + // MRI integrator settings + else if (arg == "--rtol_slow") + { + udata->rtol_fast = stod((*argv)[arg_idx++]); + } + else if (arg == "--atol_slow") + { + udata->atol_fast = stod((*argv)[arg_idx++]); + } + else if (arg == "--rtol_fast") + { + udata->rtol_fast = stod((*argv)[arg_idx++]); + } + else if (arg == "--atol_fast") + { + udata->atol_fast = stod((*argv)[arg_idx++]); + } + else if (arg == "--h_slow") + { + udata->h_slow = stod((*argv)[arg_idx++]); + } + else if (arg == "--h_fast") + { + udata->h_fast = stod((*argv)[arg_idx++]); + } + // Shared IMEX and MRI settings + else if (arg == "--controller") + { + udata->controller = stoi((*argv)[arg_idx++]); + } + else if (arg == "--nonlinear") + { + udata->linear = false; + } + else if (arg == "--diagnostics") + { + udata->diagnostics = true; + } + // Linear solver settings + else if (arg == "--gmres") + { + udata->pcg = false; + } + else if (arg == "--lsinfo") + { + udata->lsinfo = true; + } + else if (arg == "--liniters") + { + udata->liniters = stoi((*argv)[arg_idx++]); + } + else if (arg == "--epslin") + { + udata->epslin = stod((*argv)[arg_idx++]); + } + // Preconditioner settings + else if (arg == "--noprec") + { + udata->prec = false; + } + else if (arg == "--msbp") + { + udata->msbp = stoi((*argv)[arg_idx++]); + } + // Output settings + else if (arg == "--output") + { + udata->output = stoi((*argv)[arg_idx++]); + } + else if (arg == "--nout") + { + udata->nout = stoi((*argv)[arg_idx++]); + } + else if (arg == "--maxsteps") + { + udata->maxsteps = stoi((*argv)[arg_idx++]); + } + else if (arg == "--timing") + { + udata->timing = true; + } + // Debugging + else if (arg == "--onestep") + { + udata->onestep = stoi((*argv)[arg_idx++]); + } + else if (arg == "--no_diffusion") + { + udata->diffusion = false; + } + else if (arg == "--no_reaction") + { + udata->reaction = false; + } + // Help + else if (arg == "--help") + { + if (udata->outproc) InputHelp(); + return -1; + } + // Unknown input + else + { + if (udata->outproc) + { + cerr << "ERROR: Invalid input " << arg << endl; + InputHelp(); + } + return -1; + } + } + + // Recompute total number of equations + udata->neq = NSPECIES * udata->nx * udata->ny; + + // Recompute x and y mesh spacing with periodic boundary conditions + udata->dx = (udata->xu - udata->xl) / udata->nx; + udata->dy = (udata->yu - udata->yl) / udata->ny; + + // Compute slow step size based on CFL if not set by input + if (udata->h_slow < ZERO) + { + realtype cfl_u = RCONST(0.5) / ((udata->Dux / (udata->dx * udata->dx)) + + (udata->Duy / (udata->dy * udata->dy))); + realtype cfl_v = RCONST(0.5) / ((udata->Dvx / (udata->dx * udata->dx)) + + (udata->Dvy / (udata->dy * udata->dy))); + udata->h_slow = RCONST(5.0) * min(cfl_u, cfl_v); + } + + // Return success + return 0; +} + + +// ----------------------------------------------------------------------------- +// Output and utility functions +// ----------------------------------------------------------------------------- + + +// Compute the initial condition +static int SetIC(N_Vector u, UserData *udata) +{ + realtype x, y, a, b; + + // Shortcuts to local number of nodes + sunindextype ny_loc = udata->ny_loc; + sunindextype nx_loc = udata->nx_loc; + + // Gaussian random number generator + default_random_engine generator; + normal_distribution dist(RCONST(0.0), RCONST(0.001)); + + realtype *data = N_VGetArrayPointer(u); + if (check_flag((void *) data, "N_VGetArrayPointer", 0)) return -1; + + for (sunindextype j = 0; j < ny_loc; j++) + { + for (sunindextype i = 0; i < nx_loc; i++) + { + x = udata->xl + (udata->is + i) * udata->dx; + y = udata->yl + (udata->js + j) * udata->dy; + + a = TWO * PI * (x - udata->xl) / (udata->xu - udata->xl); + b = TWO * PI * (y - udata->yl) / (udata->yu - udata->yl); + + data[UIDX(i,j,nx_loc)] = udata->A + RCONST(0.5) * sin(a) * sin(b); + data[VIDX(i,j,nx_loc)] = udata->B / udata->A; + } + } + + return 0; +} + + +// Print command line options +static void InputHelp() +{ + cout << endl; + cout << "Command line options:" << endl; + cout << " --mesh : number of mesh points" << endl; + cout << " --np : number of MPI processes" << endl; + cout << " --domain : domain boundaries" << endl; + cout << " --D : diffusion coefficients" << endl; + cout << " --A : species A concentration" << endl; + cout << " --B : species B concentration" << endl; + cout << " --tf