diff -Nru mariadb-5.5-5.5.39/client/mysql.cc mariadb-5.5-5.5.40/client/mysql.cc --- mariadb-5.5-5.5.39/client/mysql.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/client/mysql.cc 2014-10-08 13:19:52.000000000 +0000 @@ -89,7 +89,7 @@ #if defined(__WIN__) #include #else -#include +#include #define HAVE_READLINE #define USE_POPEN #endif @@ -1294,6 +1294,16 @@ sig_handler mysql_end(int sig) { +#ifndef _WIN32 + /* + Ingnoring SIGQUIT and SIGINT signals when cleanup process starts. + This will help in resolving the double free issues, which occures in case + the signal handler function is started in between the clean up function. + */ + signal(SIGQUIT, SIG_IGN); + signal(SIGINT, SIG_IGN); +#endif + mysql_close(&mysql); #ifdef HAVE_READLINE if (!status.batch && !quick && !opt_html && !opt_xml && diff -Nru mariadb-5.5-5.5.39/cmake/build_configurations/mysql_release.cmake mariadb-5.5-5.5.40/cmake/build_configurations/mysql_release.cmake --- mariadb-5.5-5.5.39/cmake/build_configurations/mysql_release.cmake 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/cmake/build_configurations/mysql_release.cmake 2014-10-08 13:19:52.000000000 +0000 @@ -121,14 +121,9 @@ IF(UNIX) SET(WITH_EXTRA_CHARSETS all CACHE STRING "") - IF(EXISTS "${CMAKE_SOURCE_DIR}/COPYING") - OPTION(WITH_READLINE "" ON) - ELSE() - OPTION(WITH_LIBEDIT "" ON) - ENDIF() - IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + SET(WITH_JEMALLOC "static" CACHE STRING "") IF(NOT IGNORE_AIO_CHECK) # Ensure aio is available on Linux (required by InnoDB) diff -Nru mariadb-5.5-5.5.39/cmake/install_layout.cmake mariadb-5.5-5.5.40/cmake/install_layout.cmake --- mariadb-5.5-5.5.39/cmake/install_layout.cmake 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/cmake/install_layout.cmake 2014-10-08 13:19:51.000000000 +0000 @@ -103,16 +103,6 @@ ENDIF() # -# plugin_tests's value should not be used by imported plugins, -# just use if(INSTALL_PLUGINTESTDIR). -# The plugin must set its own install path for tests -# -FILE(GLOB plugin_tests - ${CMAKE_SOURCE_DIR}/plugin/*/tests - ${CMAKE_SOURCE_DIR}/internal/plugin/*/tests -) - -# # STANDALONE layout # SET(INSTALL_BINDIR_STANDALONE "bin") @@ -136,7 +126,6 @@ SET(INSTALL_SUPPORTFILESDIR_STANDALONE "support-files") # SET(INSTALL_MYSQLDATADIR_STANDALONE "data") -SET(INSTALL_PLUGINTESTDIR_STANDALONE ${plugin_tests}) SET(INSTALL_UNIX_ADDRDIR_STANDALONE "/tmp/mysql.sock") # @@ -170,7 +159,6 @@ SET(INSTALL_SUPPORTFILESDIR_RPM "share/mysql") # SET(INSTALL_MYSQLDATADIR_RPM "/var/lib/mysql") -SET(INSTALL_PLUGINTESTDIR_RPM ${plugin_tests}) SET(INSTALL_UNIX_ADDRDIR_RPM "${INSTALL_MYSQLDATADIR_RPM}/mysql.sock") @@ -199,7 +187,6 @@ SET(INSTALL_SUPPORTFILESDIR_DEB "share/mysql") # SET(INSTALL_MYSQLDATADIR_DEB "/var/lib/mysql") -SET(INSTALL_PLUGINTESTDIR_DEB ${plugin_tests}) SET(INSTALL_UNIX_ADDRDIR_DEB "/var/run/mysqld/mysqld.sock") # @@ -226,7 +213,6 @@ SET(INSTALL_SUPPORTFILESDIR_SVR4 "support-files") # SET(INSTALL_MYSQLDATADIR_SVR4 "/var/lib/mysql") -SET(INSTALL_PLUGINTESTDIR_SVR4 ${plugin_tests}) SET(INSTALL_UNIX_ADDRDIR_SVR "/tmp/mysql.sock") @@ -242,7 +228,7 @@ # will be defined as ${INSTALL_BINDIR_STANDALONE} by default if STANDALONE # layout is chosen) FOREACH(var BIN SBIN LIB MYSQLSHARE SHARE PLUGIN INCLUDE SCRIPT DOC MAN SYSCONF SYSCONF2 - INFO MYSQLTEST SQLBENCH DOCREADME SUPPORTFILES MYSQLDATA PLUGINTEST UNIX_ADDR) + INFO MYSQLTEST SQLBENCH DOCREADME SUPPORTFILES MYSQLDATA UNIX_ADDR) SET(INSTALL_${var}DIR ${INSTALL_${var}DIR_${INSTALL_LAYOUT}} CACHE STRING "${var} installation directory" ${FORCE}) MARK_AS_ADVANCED(INSTALL_${var}DIR) diff -Nru mariadb-5.5-5.5.39/cmake/jemalloc.cmake mariadb-5.5-5.5.40/cmake/jemalloc.cmake --- mariadb-5.5-5.5.39/cmake/jemalloc.cmake 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/cmake/jemalloc.cmake 2014-10-08 13:19:51.000000000 +0000 @@ -1,65 +1,34 @@ -# old cmake does not have ExternalProject file -IF(CMAKE_VERSION VERSION_LESS "2.8.6") - MACRO (CHECK_JEMALLOC) - ENDMACRO() - RETURN() -ENDIF() - -INCLUDE(ExternalProject) - -MACRO (USE_BUNDLED_JEMALLOC) - SET(SOURCE_DIR "${CMAKE_SOURCE_DIR}/extra/jemalloc") - SET(BINARY_DIR "${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/extra/jemalloc/build") - SET(LIBJEMALLOC "libjemalloc") - SET(JEMALLOC_CONFIGURE_OPTS "CC=${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}" "--with-private-namespace=jemalloc_internal_" "--enable-cc-silence") - IF (CMAKE_BUILD_TYPE MATCHES "Debug" AND NOT APPLE) # see the comment in CMakeLists.txt - LIST(APPEND JEMALLOC_CONFIGURE_OPTS --enable-debug) +INCLUDE (CheckLibraryExists) + +SET(WITH_JEMALLOC auto CACHE STRING + "Build with jemalloc (possible values are 'yes', 'no', 'auto')") + +MACRO (CHECK_JEMALLOC) + # compatibility with old WITH_JEMALLOC values + IF(WITH_JEMALLOC STREQUAL "bundled") + MESSAGE(FATAL_ERROR "MariaDB no longer bundles jemalloc") ENDIF() - - IF(CMAKE_GENERATOR MATCHES "Makefiles") - SET(MAKE_COMMAND ${CMAKE_MAKE_PROGRAM}) - ELSE() # Xcode/Ninja generators - SET(MAKE_COMMAND make) + IF(WITH_JEMALLOC STREQUAL "system") + SET(WITH_JEMALLOC "yes") ENDIF() - - ExternalProject_Add(jemalloc - PREFIX extra/jemalloc - SOURCE_DIR ${SOURCE_DIR} - BINARY_DIR ${BINARY_DIR} - STAMP_DIR ${BINARY_DIR} - CONFIGURE_COMMAND "${SOURCE_DIR}/configure" ${JEMALLOC_CONFIGURE_OPTS} - BUILD_COMMAND ${MAKE_COMMAND} "build_lib_static" - INSTALL_COMMAND "" - ) - ADD_LIBRARY(libjemalloc STATIC IMPORTED) - SET_TARGET_PROPERTIES(libjemalloc PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/lib/libjemalloc_pic.a") - ADD_DEPENDENCIES(libjemalloc jemalloc) -ENDMACRO() -IF(CMAKE_SYSTEM_NAME MATCHES "Linux" OR APPLE) - # Linux and OSX are the only systems where bundled jemalloc can be built without problems, - # as they both have GNU make and jemalloc actually compiles. - # Also, BSDs use jemalloc as malloc already - SET(WITH_JEMALLOC_DEFAULT "yes") -ELSE() - SET(WITH_JEMALLOC_DEFAULT "no") -ENDIF() + IF(WITH_JEMALLOC STREQUAL "yes" OR WITH_JEMALLOC STREQUAL "auto" OR + WITH_JEMALLOC STREQUAL "static") -SET(WITH_JEMALLOC ${WITH_JEMALLOC_DEFAULT} CACHE STRING - "Which jemalloc to use (possible values are 'no', 'bundled', 'system', 'yes' (system if possible, otherwise bundled)") + IF(WITH_JEMALLOC STREQUAL "static") + SET(libname jemalloc_pic) + SET(CMAKE_REQUIRED_LIBRARIES pthread dl m) + ELSE() + SET(libname jemalloc) + ENDIF() + + CHECK_LIBRARY_EXISTS(${libname} malloc_stats_print "" HAVE_JEMALLOC) + SET(CMAKE_REQUIRED_LIBRARIES) -MACRO (CHECK_JEMALLOC) - IF(WITH_JEMALLOC STREQUAL "system" OR WITH_JEMALLOC STREQUAL "yes") - CHECK_LIBRARY_EXISTS(jemalloc malloc_stats_print "" HAVE_JEMALLOC) IF (HAVE_JEMALLOC) - SET(LIBJEMALLOC jemalloc) - ELSEIF (WITH_JEMALLOC STREQUAL "system") - MESSAGE(FATAL_ERROR "system jemalloc is not found") - ELSEIF (WITH_JEMALLOC STREQUAL "yes") - SET(trybundled 1) + SET(LIBJEMALLOC ${libname}) + ELSEIF (NOT WITH_JEMALLOC STREQUAL "auto") + MESSAGE(FATAL_ERROR "${libname} is not found") ENDIF() ENDIF() - IF(WITH_JEMALLOC STREQUAL "bundled" OR trybundled) - USE_BUNDLED_JEMALLOC() - ENDIF() ENDMACRO() diff -Nru mariadb-5.5-5.5.39/cmake/readline.cmake mariadb-5.5-5.5.40/cmake/readline.cmake --- mariadb-5.5-5.5.39/cmake/readline.cmake 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/cmake/readline.cmake 2014-10-08 13:19:52.000000000 +0000 @@ -116,24 +116,23 @@ MACRO (MYSQL_USE_BUNDLED_READLINE) SET(USE_NEW_READLINE_INTERFACE 1) SET(HAVE_HIST_ENTRY 0 CACHE INTERNAL "" FORCE) - SET(READLINE_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/cmd-line-utils) - SET(READLINE_LIBRARY readline) + SET(MY_READLINE_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/cmd-line-utils/readline) + SET(MY_READLINE_LIBRARY readline) ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/cmd-line-utils/readline) ENDMACRO() MACRO (MYSQL_FIND_SYSTEM_READLINE) - FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h ) + FIND_PATH(READLINE_INCLUDE_DIR readline.h PATH_SUFFIXES readline) FIND_LIBRARY(READLINE_LIBRARY NAMES readline) MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY) - SET(CMAKE_REQUIRES_LIBRARIES ${READLINE_LIBRARY} ${CURSES_LIBRARY}) - IF(READLINE_LIBRARY AND READLINE_INCLUDE_DIR) SET(CMAKE_REQUIRED_LIBRARIES ${READLINE_LIBRARY} ${CURSES_LIBRARY}) + SET(CMAKE_REQUIRED_INCLUDES ${READLINE_INCLUDE_DIR}) CHECK_CXX_SOURCE_COMPILES(" #include - #include + #include int main(int argc, char **argv) { rl_completion_func_t *func1= (rl_completion_func_t*)0; @@ -141,19 +140,9 @@ }" NEW_READLINE_INTERFACE) - CHECK_CXX_SOURCE_COMPILES(" - #include - #include - int main(int argc, char **argv) - { - HIST_ENTRY entry; - return 0; - }" - HAVE_HIST_ENTRY) - CHECK_C_SOURCE_COMPILES(" #include - #include + #include #if RL_VERSION_MAJOR > 5 #error #endif @@ -176,30 +165,27 @@ ENDIF(READLINE_V5) ENDIF(NEW_READLINE_INTERFACE) ENDIF() - SET(CMAKE_REQUIRES_LIBRARIES ) ENDMACRO() MACRO (MYSQL_FIND_SYSTEM_LIBEDIT) - - FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h ) - FIND_LIBRARY(READLINE_LIBRARY NAMES readline) - MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY) - - SET(CMAKE_REQUIRES_LIBRARIES ${READLINE_LIBRARY}) - - IF(READLINE_LIBRARY AND READLINE_INCLUDE_DIR) + FIND_PATH(LIBEDIT_INCLUDE_DIR readline.h PATH_SUFFIXES editline edit/readline) + FIND_LIBRARY(LIBEDIT_LIBRARY edit) + MARK_AS_ADVANCED(LIBEDIT_INCLUDE_DIR LIBEDIT_LIBRARY) + + IF(LIBEDIT_LIBRARY AND LIBEDIT_INCLUDE_DIR) + SET(CMAKE_REQUIRED_LIBRARIES ${LIBEDIT_LIBRARY}) + SET(CMAKE_REQUIRED_INCLUDES ${LIBEDIT_INCLUDE_DIR}) CHECK_CXX_SOURCE_COMPILES(" #include - #include + #include int main(int argc, char **argv) { - char res= *(*rl_completion_entry_function)(0,0); + int res= (*rl_completion_entry_function)(0,0); completion_matches(0,0); }" LIBEDIT_INTERFACE) SET(USE_LIBEDIT_INTERFACE ${LIBEDIT_INTERFACE}) ENDIF() - SET(CMAKE_REQUIRES_LIBRARIES) ENDMACRO() @@ -216,15 +202,33 @@ IF (NOT APPLE) MYSQL_FIND_SYSTEM_READLINE() ENDIF() - IF(NOT USE_NEW_READLINE_INTERFACE) + IF(USE_NEW_READLINE_INTERFACE) + SET(MY_READLINE_INCLUDE_DIR ${READLINE_INCLUDE_DIR}) + SET(MY_READLINE_LIBRARY ${READLINE_LIBRARY} ${CURSES_LIBRARY}) + ELSE() MYSQL_FIND_SYSTEM_LIBEDIT() - IF(NOT USE_LIBEDIT_INTERFACE) + IF(USE_LIBEDIT_INTERFACE) + SET(MY_READLINE_INCLUDE_DIR ${LIBEDIT_INCLUDE_DIR}) + SET(MY_READLINE_LIBRARY ${LIBEDIT_LIBRARY} ${CURSES_LIBRARY}) + ELSE() MYSQL_USE_BUNDLED_READLINE() ENDIF() ENDIF() ENDIF() - SET(MY_READLINE_INCLUDE_DIR ${READLINE_INCLUDE_DIR}) - SET(MY_READLINE_LIBRARY ${READLINE_LIBRARY} ${CURSES_LIBRARY}) + + SET(CMAKE_REQUIRED_LIBRARIES ${MY_READLINE_LIBRARY}) + SET(CMAKE_REQUIRED_INCLUDES ${MY_READLINE_INCLUDE_DIR}) + CHECK_CXX_SOURCE_COMPILES(" + #include + #include + int main(int argc, char **argv) + { + HIST_ENTRY entry; + return 0; + }" + HAVE_HIST_ENTRY) + SET(CMAKE_REQUIRED_LIBRARIES) + SET(CMAKE_REQUIRED_INCLUDES) ENDIF(NOT WIN32) ENDMACRO() diff -Nru mariadb-5.5-5.5.39/CMakeLists.txt mariadb-5.5-5.5.40/CMakeLists.txt --- mariadb-5.5-5.5.39/CMakeLists.txt 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/CMakeLists.txt 2014-10-08 13:19:53.000000000 +0000 @@ -479,8 +479,11 @@ INSTALL_DOCUMENTATION(README COPYING COPYING.LESSER EXCEPTIONS-CLIENT COMPONENT Readme) -INSTALL_DOCUMENTATION(${CMAKE_BINARY_DIR}/Docs/INFO_SRC - ${CMAKE_BINARY_DIR}/Docs/INFO_BIN) + +# MDEV-6526 these files are not installed anymore +#INSTALL_DOCUMENTATION(${CMAKE_BINARY_DIR}/Docs/INFO_SRC +# ${CMAKE_BINARY_DIR}/Docs/INFO_BIN) + IF(UNIX) INSTALL_DOCUMENTATION(Docs/INSTALL-BINARY COMPONENT Readme) ENDIF() diff -Nru mariadb-5.5-5.5.39/cmd-line-utils/readline/CMakeLists.txt mariadb-5.5-5.5.40/cmd-line-utils/readline/CMakeLists.txt --- mariadb-5.5-5.5.39/cmd-line-utils/readline/CMakeLists.txt 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/cmd-line-utils/readline/CMakeLists.txt 2014-10-08 13:19:51.000000000 +0000 @@ -13,8 +13,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include - ${CMAKE_SOURCE_DIR}/cmd-line-utils) +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}) ADD_DEFINITIONS(-DHAVE_CONFIG_H -DNO_KILL_INTR) diff -Nru mariadb-5.5-5.5.39/cmd-line-utils/readline/history.h mariadb-5.5-5.5.40/cmd-line-utils/readline/history.h --- mariadb-5.5-5.5.39/cmd-line-utils/readline/history.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/cmd-line-utils/readline/history.h 2014-10-08 13:19:52.000000000 +0000 @@ -32,8 +32,8 @@ # include "rlstdc.h" # include "rltypedefs.h" #else -# include -# include +# include +# include #endif #ifdef __STDC__ diff -Nru mariadb-5.5-5.5.39/cmd-line-utils/readline/keymaps.h mariadb-5.5-5.5.40/cmd-line-utils/readline/keymaps.h --- mariadb-5.5-5.5.39/cmd-line-utils/readline/keymaps.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/cmd-line-utils/readline/keymaps.h 2014-10-08 13:19:52.000000000 +0000 @@ -32,9 +32,9 @@ # include "chardefs.h" # include "rltypedefs.h" #else -# include -# include -# include +# include +# include +# include #endif /* A keymap contains one entry for each key in the ASCII set. diff -Nru mariadb-5.5-5.5.39/cmd-line-utils/readline/readline.h mariadb-5.5-5.5.40/cmd-line-utils/readline/readline.h --- mariadb-5.5-5.5.39/cmd-line-utils/readline/readline.h 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/cmd-line-utils/readline/readline.h 2014-10-08 13:19:52.000000000 +0000 @@ -33,10 +33,10 @@ # include "keymaps.h" # include "tilde.h" #else -# include -# include -# include -# include +# include +# include +# include +# include #endif /* Hex-encoded Readline version number. */ diff -Nru mariadb-5.5-5.5.39/cmd-line-utils/readline/xmalloc.h mariadb-5.5-5.5.40/cmd-line-utils/readline/xmalloc.h --- mariadb-5.5-5.5.39/cmd-line-utils/readline/xmalloc.h 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/cmd-line-utils/readline/xmalloc.h 2014-10-08 13:19:52.000000000 +0000 @@ -26,7 +26,7 @@ #if defined (READLINE_LIBRARY) # include "rlstdc.h" #else -# include +# include #endif #ifndef PTR_T diff -Nru mariadb-5.5-5.5.39/config.h.cmake mariadb-5.5-5.5.40/config.h.cmake --- mariadb-5.5-5.5.39/config.h.cmake 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/config.h.cmake 2014-10-08 13:19:51.000000000 +0000 @@ -641,4 +641,17 @@ #cmakedefine SIZEOF_TIME_T @SIZEOF_TIME_T@ #cmakedefine TIME_T_UNSIGNED @TIME_T_UNSIGNED@ +/* + stat structure (from ) is conditionally defined + to have different layout and size depending on the defined macros. + The correct macro is defined in my_config.h, which means it MUST be + included first (or at least before - so, practically, + before including any system headers). + + __GLIBC__ is defined in +*/ +#ifdef __GLIBC__ +#error MUST be included first! +#endif + #endif diff -Nru mariadb-5.5-5.5.39/debian/changelog mariadb-5.5-5.5.40/debian/changelog --- mariadb-5.5-5.5.39/debian/changelog 2014-09-01 10:19:05.000000000 +0000 +++ mariadb-5.5-5.5.40/debian/changelog 2014-11-17 22:04:27.000000000 +0000 @@ -1,3 +1,19 @@ +mariadb-5.5 (5.5.40-0ubuntu0.14.10.1) utopic-security; urgency=medium + + * SECURITY UPDATE: Update to 5.5.40 to fix security issues (LP: #1391676) + - CVE-2014-6507 + - CVE-2014-6491 + - CVE-2014-6500 + - CVE-2014-6469 + - CVE-2014-6555 + - CVE-2014-6559 + - CVE-2014-6494 + - CVE-2014-6496 + - CVE-2014-6464 + * Add bsdutils as mariadb-server dependency like upstream does in 5.5.40. + + -- Otto Kekäläinen Fri, 14 Nov 2014 21:04:24 +0200 + mariadb-5.5 (5.5.39-2) unstable; urgency=low * d/control: Removed Provides: libmysqlclient-dev (Closes: #759309) diff -Nru mariadb-5.5-5.5.39/debian/control mariadb-5.5-5.5.40/debian/control --- mariadb-5.5-5.5.39/debian/control 2014-09-01 10:19:05.000000000 +0000 +++ mariadb-5.5-5.5.40/debian/control 2014-11-17 20:25:03.000000000 +0000 @@ -1,7 +1,8 @@ Source: mariadb-5.5 Section: database Priority: optional -Maintainer: Debian MySQL Maintainers +Maintainer: Ubuntu Developers +XSBC-Original-Maintainer: Debian MySQL Maintainers Uploaders: Otto Kekäläinen , Clint Byrum , James Page @@ -185,7 +186,8 @@ Suggests: mailx, mariadb-test, tinyca Recommends: libhtml-template-perl Pre-Depends: adduser (>= 3.40), debconf, mariadb-common -Depends: libdbi-perl, +Depends: bsdutils, + libdbi-perl, lsb-base (>= 3.0-10), mariadb-client-5.5 (>= ${source:Version}), mariadb-server-core-5.5 (>= ${binary:Version}), diff -Nru mariadb-5.5-5.5.39/debian/patches/20_kfreebsd_tests.diff mariadb-5.5-5.5.40/debian/patches/20_kfreebsd_tests.diff --- mariadb-5.5-5.5.39/debian/patches/20_kfreebsd_tests.diff 2014-08-11 13:21:34.000000000 +0000 +++ mariadb-5.5-5.5.40/debian/patches/20_kfreebsd_tests.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ -Author: Otto Kekäläinen -Debian-Bug: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=540153 -Subject: tests not getting started on kFreeBSD - As per #670722 I found that the test socket created to test - file name truncation was barfing even for the shortened form. - This patch was adapted from the similar one in MySQL 5.6 package. -Forwarded: no -Last-Update: 2014-08-07 ---- a/mysql-test/lib/My/Platform.pm -+++ b/mysql-test/lib/My/Platform.pm -@@ -111,6 +111,9 @@ sub check_socket_path_length { - # See Bug #45771 - return 0 if ($^O eq 'aix'); - -+ # See Debian bug #670722 - failing on kFreeBSD even after setting short path -+ return 0 if length $path < 40; -+ - require IO::Socket::UNIX; - - my $truncated= undef; diff -Nru mariadb-5.5-5.5.39/debian/patches/21_kfreebsd-peercred.diff mariadb-5.5-5.5.40/debian/patches/21_kfreebsd-peercred.diff --- mariadb-5.5-5.5.39/debian/patches/21_kfreebsd-peercred.diff 2014-09-01 10:19:05.000000000 +0000 +++ mariadb-5.5-5.5.40/debian/patches/21_kfreebsd-peercred.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,112 +0,0 @@ -Author: Sergei Golubchik -Descriptiong: Experimental patch from https://mariadb.atlassian.net/browse/MDEV-6577 - -=== modified file 'plugin/auth_socket/CMakeLists.txt' ---- a/plugin/auth_socket/CMakeLists.txt 2013-03-08 18:09:15 +0000 -+++ b/plugin/auth_socket/CMakeLists.txt 2014-08-13 14:44:30 +0000 -@@ -22,18 +22,48 @@ int main() { - getsockopt(0, SOL_SOCKET, SO_PEERCRED, &cred, 0); - }" HAVE_PEERCRED) - --IF (NOT HAVE_PEERCRED) -- # Hi, OpenBSD! -- CHECK_CXX_SOURCE_COMPILES( -- "#include -- #include -- int main() { -- struct sockpeercred cred; -- getsockopt(0, SOL_SOCKET, SO_PEERCRED, &cred, 0); -- }" HAVE_SOCKPEERCRED) -- ADD_DEFINITIONS(-Ducred=sockpeercred) -+IF (HAVE_PEERCRED) -+ ADD_DEFINITIONS(-DHAVE_PEERCRED) -+ SET(ok 1) -+ELSE() -+ -+# Hi, OpenBSD! -+CHECK_CXX_SOURCE_COMPILES( -+"#include -+#include -+int main() { -+ struct sockpeercred cred; -+ getsockopt(0, SOL_SOCKET, SO_PEERCRED, &cred, 0); -+ }" HAVE_SOCKPEERCRED) -+ -+IF (HAVE_SOCKPEERCRED) -+ ADD_DEFINITIONS(-DHAVE_SOCKPEERCRED) -+ SET(ok 1) -+ELSE() -+ -+# FreeBSD, is that you? -+CHECK_CXX_SOURCE_COMPILES( -+"#include -+#include -+#include -+#include -+int main() { -+ struct xucred cred; -+ getsockopt(0, 0, LOCAL_PEERCRED, &cred, 0); -+ }" HAVE_XUCRED) -+ -+IF (HAVE_XUCRED) -+ ADD_DEFINITIONS(-DHAVE_XUCRED) -+ SET(ok 1) -+ELSE() -+ -+# What else? C'mon, show your creativity, be different! -+ -+ENDIF() -+ENDIF() - ENDIF() - --IF(HAVE_PEERCRED OR HAVE_SOCKPEERCRED) -+IF(ok) - MYSQL_ADD_PLUGIN(auth_socket auth_socket.c MODULE_ONLY) - ENDIF() -+ - -=== modified file 'plugin/auth_socket/auth_socket.c' ---- a/plugin/auth_socket/auth_socket.c 2012-02-15 17:08:08 +0000 -+++ b/plugin/auth_socket/auth_socket.c 2014-08-13 14:46:42 +0000 -@@ -27,9 +27,29 @@ - #define _GNU_SOURCE 1 /* for struct ucred */ - - #include --#include --#include - #include -+#include -+#include -+#include -+ -+#ifdef HAVE_PEERCRED -+#define level SOL_SOCKET -+ -+#elif defined HAVE_SOCKPEERCRED -+#define level SOL_SOCKET -+#define ucred socketpeercred -+ -+#elif defined HAVE_XUCRED -+#include -+#include -+#define level 0 -+#define SO_PEERCRED LOCAL_PEERCRED -+#define uid cr_uid -+#define ucred xucred -+ -+#else -+#error impossible -+#endif - - /** - perform the unix socket based authentication -@@ -63,7 +83,7 @@ static int socket_auth(MYSQL_PLUGIN_VIO - return CR_ERROR; - - /* get the UID of the client process */ -- if (getsockopt(vio_info.socket, SOL_SOCKET, SO_PEERCRED, &cred, &cred_len)) -+ if (getsockopt(vio_info.socket, level, SO_PEERCRED, &cred, &cred_len)) - return CR_ERROR; - - if (cred_len != sizeof(cred)) - diff -Nru mariadb-5.5-5.5.39/debian/patches/22_hppa_invalid_operands.diff mariadb-5.5-5.5.40/debian/patches/22_hppa_invalid_operands.diff --- mariadb-5.5-5.5.39/debian/patches/22_hppa_invalid_operands.diff 2014-09-01 10:19:05.000000000 +0000 +++ mariadb-5.5-5.5.40/debian/patches/22_hppa_invalid_operands.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -Author: Helge Deller -Description: Fix build error in HPPA, see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=751805 - -diff --git a/storage/xtradb/os/os0stacktrace.c b/storage/xtradb/os/os0stacktrace.c -index f7fb121..18b90ea 100644 ---- a/storage/xtradb/os/os0stacktrace.c -+++ b/storage/xtradb/os/os0stacktrace.c -@@ -85,7 +85,7 @@ os_stacktrace_print( - caller_address = (void*) uc->uc_mcontext.gregs[REG_RIP] ; - #elif defined(__hppa__) - ucontext_t* uc = (ucontext_t*) ucontext; -- caller_address = (void*) uc->uc_mcontext.sc_iaoq[0] & ~0x3UL ; -+ caller_address = (void*) (uc->uc_mcontext.sc_iaoq[0] & ~0x3UL); - #elif (defined (__ppc__)) || (defined (__powerpc__)) - ucontext_t* uc = (ucontext_t*) ucontext; - caller_address = (void*) uc->uc_mcontext.regs->nip ; diff -Nru mariadb-5.5-5.5.39/debian/patches/90_spelling.diff mariadb-5.5-5.5.40/debian/patches/90_spelling.diff --- mariadb-5.5-5.5.39/debian/patches/90_spelling.diff 2014-09-01 10:19:05.000000000 +0000 +++ mariadb-5.5-5.5.40/debian/patches/90_spelling.diff 2014-11-17 20:23:22.000000000 +0000 @@ -12,17 +12,6 @@ */ #define SCRAMBLE_LENGTH 20 #define SCRAMBLE_LENGTH_323 8 ---- a/libmysql/errmsg.c -+++ b/libmysql/errmsg.c -@@ -81,7 +81,7 @@ const char *client_errors[]= - "Attempt to read a row while there is no result set associated with the statement", - "This feature is not implemented yet", - "Lost connection to MySQL server at '%s', system error: %d", -- "Statement closed indirectly because of a preceeding %s() call", -+ "Statement closed indirectly because of a preceding %s() call", - "The number of columns in the result set differs from the number of bound buffers. You must reset the statement, rebind the result set columns, and execute the statement again", - "This handle is already connected. Use a separate handle for each connection.", - "Authentication plugin '%s' cannot be loaded: %s", --- a/man/mysql-test-run.pl.1 +++ b/man/mysql-test-run.pl.1 @@ -332,7 +332,7 @@ is defined so that @@ -45,50 +34,6 @@ .RE .sp .RS 4 ---- a/mysql-test/extra/rpl_tests/rpl_ddl.test -+++ b/mysql-test/extra/rpl_tests/rpl_ddl.test -@@ -98,8 +98,8 @@ - # --> less switching of AUTOCOMMIT mode on master side. - # - # 4. Never use a test object, which was direct or indirect affected by a --# preceeding test sequence again. --# If one preceeding test sequence hits a (sometimes not visible, -+# preceding test sequence again. -+# If one preceding test sequence hits a (sometimes not visible, - # because the sql error code of the statement might be 0) bug - # and these rules are ignored, a following test sequence might earn ugly - # effects like failing 'sync_slave_with_master', crashes of the slave or ---- a/mysql-test/extra/rpl_tests/rpl_row_basic.test -+++ b/mysql-test/extra/rpl_tests/rpl_row_basic.test -@@ -221,7 +221,7 @@ INSERT INTO t7 VALUES (1,3), (2,6), (3,9); - SELECT * FROM t7 ORDER BY C1; - - # since bug#31552/31609 idempotency is not default any longer. In order --# the preceeding test INSERT INTO t7 to pass the mode is switched -+# the preceding test INSERT INTO t7 to pass the mode is switched - # temprorarily - set @@global.slave_exec_mode= 'IDEMPOTENT'; - -@@ -260,7 +260,7 @@ INSERT INTO t8 VALUES (1,2,3), (2,4,6), (3,6,9); - SELECT * FROM t8 ORDER BY a; - - # since bug#31552/31609 idempotency is not default any longer. In order --# the preceeding test INSERT INTO t8 to pass the mode is switched -+# the preceding test INSERT INTO t8 to pass the mode is switched - # temprorarily - set @@global.slave_exec_mode= 'IDEMPOTENT'; - ---- a/mysql-test/include/wait_until_count_sessions.inc -+++ b/mysql-test/include/wait_until_count_sessions.inc -@@ -10,7 +10,7 @@ - # 1. We wait for $current_sessions <= $count_sessions because in the use case - # with count_sessions.inc before and wait_until_count_sessions.inc after - # the core of the test it could happen that the disconnects of sessions --# belonging to the preceeding test are not finished. -+# belonging to the preceding test are not finished. - # sessions at test begin($count_sessions) = m + n - # sessions of the previous test which will be soon disconnected = n (n >= 0) - # sessions at test end ($current sessions, assuming the test disconnects --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -6489,7 +6489,7 @@ Misc options @@ -159,54 +104,6 @@ ); create table t4 ( user_id varchar(50) not null, ---- a/mysql-test/suite/funcs_1/views/func_view.inc -+++ b/mysql-test/suite/funcs_1/views/func_view.inc -@@ -282,7 +282,7 @@ INSERT INTO t1_values SET - # other interesting value - # numbers -> 0 - # strings, blobs, binaries -> not full length of used data type, "exotic" --# characters and preceeding and trailing spaces -+# characters and preceding and trailing spaces - # FIXME enum, set ?? - INSERT INTO t1_values SET - my_char_30 = ' ---äÖüß@µ*$-- ', ---- a/mysql-test/suite/funcs_1/views/views_master.inc -+++ b/mysql-test/suite/funcs_1/views/views_master.inc -@@ -545,7 +545,7 @@ let $message= Testcase 3.3.1.7 ; - # view names are accepted, at creation time, alteration time, - # and drop time. - ############################################################################### --# Note(mleich): non-qualified view name means a view name without preceeding -+# Note(mleich): non-qualified view name means a view name without preceding - # database name - --disable_warnings - DROP VIEW IF EXISTS v1 ; ---- a/mysql-test/suite/rpl/t/rpl_ddl.test -+++ b/mysql-test/suite/rpl/t/rpl_ddl.test -@@ -13,10 +13,10 @@ - # sequences start. - # - # 2. Never use a test object, which was direct or indirect affected by a --# preceeding test sequence again. -+# preceding test sequence again. - # Except table d1.t1 where ONLY DML is allowed. - # --# If one preceeding test sequence hits a (sometimes not good visible, -+# If one preceding test sequence hits a (sometimes not good visible, - # because the sql error code of the statement might be 0) bug - # and these rules are ignored, a following test sequence might earn ugly - # effects like failing 'sync_slave_with_master', crashes of the slave or ---- a/mysql-test/suite/rpl/t/rpl_row_basic_11bugs.test -+++ b/mysql-test/suite/rpl/t/rpl_row_basic_11bugs.test -@@ -244,7 +244,7 @@ sync_slave_with_master; - UPDATE t1 SET a = 5, b = 'slave' WHERE a = 1; - SELECT * FROM t1 ORDER BY a; - # since bug#31552/31609 idempotency is not default any longer. In --# order for the preceeding test UPDATE t1 to pass, the mode is switched -+# order for the preceding test UPDATE t1 to pass, the mode is switched - # temprorarily - set @@global.slave_exec_mode= 'IDEMPOTENT'; - --echo **** On Master **** --- a/mysql-test/t/ps.test +++ b/mysql-test/t/ps.test @@ -677,8 +677,8 @@ insert into t2 values ("1", "1", "sup", "0"), ("2", "1", "sup", "1"), @@ -253,17 +150,6 @@ */ enum row_type row_type; uint null_bits; /* NULL bits at start of record */ ---- a/sql/log_event.cc -+++ b/sql/log_event.cc -@@ -3649,7 +3649,7 @@ int Query_log_event::do_apply_event(Relay_log_info const *rli, - if ((error= rows_event_stmt_cleanup(const_cast(rli), thd))) - { - const_cast(rli)->report(ERROR_LEVEL, error, -- "Error in cleaning up after an event preceeding the commit; " -+ "Error in cleaning up after an event preceding the commit; " - "the group log file/position: %s %s", - const_cast(rli)->group_master_log_name, - llstr(const_cast(rli)->group_master_log_pos, --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1860,7 +1860,7 @@ public: @@ -341,7 +227,7 @@ placeholder. --- a/sql/sql_select.cc +++ b/sql/sql_select.cc -@@ -20409,7 +20409,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, +@@ -20444,7 +20444,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, Item *view_ref= NULL; /* If we have found field not by its alias in select list but by its @@ -350,7 +236,7 @@ for this name (in case if we would perform lookup in all tables). */ if (resolution == RESOLVED_BEHIND_ALIAS && !order_item->fixed && -@@ -21432,7 +21432,7 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array, +@@ -21467,7 +21467,7 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array, We are replacing the argument of Item_func_set_user_var after its value has been read. The argument's null_value should be set by now, so we must set it explicitly for the replacement argument since the null_value @@ -361,7 +247,7 @@ List list; --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc -@@ -3816,7 +3816,7 @@ static Sys_var_mybool Sys_binlog_annotate_row_events( +@@ -3817,7 +3817,7 @@ static Sys_var_mybool Sys_binlog_annotate_row_events( #ifdef HAVE_REPLICATION static Sys_var_mybool Sys_replicate_annotate_row_events( "replicate_annotate_row_events", @@ -392,17 +278,6 @@ equals to mi_rfirst(), we must restore original state as if failing mi_rfirst() was not called. */ ---- a/storage/myisam/mi_rnext.c -+++ b/storage/myisam/mi_rnext.c -@@ -66,7 +66,7 @@ int mi_rnext(MI_INFO *info, uchar *buf, int inx) - Normally SQL layer would never request "search next" if - "search first" failed. But HANDLER may do anything. - -- As mi_rnext() without preceeding mi_rkey()/mi_rfirst() -+ As mi_rnext() without preceding mi_rkey()/mi_rfirst() - equals to mi_rfirst(), we must restore original state - as if failing mi_rfirst() was not called. - */ --- a/storage/ndb/include/ndbapi/NdbEventOperation.hpp +++ b/storage/ndb/include/ndbapi/NdbEventOperation.hpp @@ -63,7 +63,7 @@ class NdbEventOperationImpl; @@ -480,17 +355,6 @@ * -# Let TYPE is 'ITEM' in Table 20, FieldIdentifer is zero * -# Let TYPE is 'ITEM' in Table 20, ColumnNumber is less than one * -# FieldIdentifer is not one of the code valuess in Table 20 ---- a/storage/tokudb/ft-index/ft/ft-flusher.cc -+++ b/storage/tokudb/ft-index/ft/ft-flusher.cc -@@ -1024,7 +1024,7 @@ ft_nonleaf_split( - B->bp[targchild] = node->bp[i]; - memset(&node->bp[i], 0, sizeof(node->bp[0])); - -- // Delete a child, removing the preceeding pivot key. The child number must be > 0 -+ // Delete a child, removing the preceding pivot key. The child number must be > 0 - { - paranoid_invariant(i>0); - if (i>n_children_in_a) { --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -3946,7 +3946,7 @@ func_exit: diff -Nru mariadb-5.5-5.5.39/debian/patches/series mariadb-5.5-5.5.40/debian/patches/series --- mariadb-5.5-5.5.39/debian/patches/series 2014-09-01 10:19:05.000000000 +0000 +++ mariadb-5.5-5.5.40/debian/patches/series 2014-11-17 20:23:22.000000000 +0000 @@ -1,6 +1,3 @@ -20_kfreebsd_tests.diff -21_kfreebsd-peercred.diff -22_hppa_invalid_operands.diff 33_scripts__mysql_create_system_tables__no_test.diff 38_scripts__mysqld_safe.sh__signals.diff 41_scripts__mysql_install_db.sh__no_test.diff diff -Nru mariadb-5.5-5.5.39/debian/rules mariadb-5.5-5.5.40/debian/rules --- mariadb-5.5-5.5.39/debian/rules 2014-09-01 10:19:05.000000000 +0000 +++ mariadb-5.5-5.5.40/debian/rules 2014-11-17 20:23:22.000000000 +0000 @@ -16,10 +16,10 @@ TAOCRYPT_OPT="-DTAOCRYPT_DISABLE_X86ASM" endif -# Skip TokuDB if arch is not amd64 -ifneq ($(ARCH), amd64) - TOKUDB_OPT:=-DWITHOUT_TOKUDB=true -endif +# Disable TokuDB due to failing test suite +# TokuDB is disabled in Trusty and can very well +# be disabled in Utopic too +TOKUDB_OPT:=-DWITHOUT_TOKUDB=true export MYSQL_BUILD_CC=$(DEB_HOST_GNU_TYPE)-gcc export MYSQL_BUILD_CXX=$(DEB_HOST_GNU_TYPE)-g++ diff -Nru mariadb-5.5-5.5.39/Docs/INFO_SRC mariadb-5.5-5.5.40/Docs/INFO_SRC --- mariadb-5.5-5.5.39/Docs/INFO_SRC 2014-08-03 12:00:48.000000000 +0000 +++ mariadb-5.5-5.5.40/Docs/INFO_SRC 2014-10-08 13:19:59.000000000 +0000 @@ -1,8 +1,8 @@ -revision-id: sergii@pisem.net-20140803113854-ku86z409wqigl8s6 -date: 2014-08-03 13:38:54 +0200 -build-date: 2014-08-03 14:00:45 +0200 -revno: 4264 +revision-id: sergii@pisem.net-20141008073500-4nlkv72m59ztgtex +date: 2014-10-08 09:35:00 +0200 +build-date: 2014-10-08 15:19:54 +0200 +revno: 4321 branch-nick: 5.5 -MySQL source 5.5.39 +MySQL source 5.5.40 diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/autogen.sh mariadb-5.5-5.5.40/extra/jemalloc/autogen.sh --- mariadb-5.5-5.5.39/extra/jemalloc/autogen.sh 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/autogen.sh 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -#!/bin/sh - -for i in autoconf; do - echo "$i" - $i - if [ $? -ne 0 ]; then - echo "Error $? in $i" - exit 1 - fi -done - -echo "./configure --enable-autogen $@" -./configure --enable-autogen $@ -if [ $? -ne 0 ]; then - echo "Error $? in ./configure" - exit 1 -fi diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/bin/jemalloc.sh mariadb-5.5-5.5.40/extra/jemalloc/bin/jemalloc.sh --- mariadb-5.5-5.5.39/extra/jemalloc/bin/jemalloc.sh 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/bin/jemalloc.sh 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -#!/bin/sh - -prefix=/usr/local -exec_prefix=/usr/local -libdir=${exec_prefix}/lib - -LD_PRELOAD=${libdir}/libjemalloc.so.1 -export LD_PRELOAD -exec "$@" diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/bin/jemalloc.sh.in mariadb-5.5-5.5.40/extra/jemalloc/bin/jemalloc.sh.in --- mariadb-5.5-5.5.39/extra/jemalloc/bin/jemalloc.sh.in 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/bin/jemalloc.sh.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -#!/bin/sh - -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ - -@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SOREV@ -export @LD_PRELOAD_VAR@ -exec "$@" diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/bin/pprof mariadb-5.5-5.5.40/extra/jemalloc/bin/pprof --- mariadb-5.5-5.5.39/extra/jemalloc/bin/pprof 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/bin/pprof 1970-01-01 00:00:00.000000000 +0000 @@ -1,5348 +0,0 @@ -#! /usr/bin/env perl - -# Copyright (c) 1998-2007, Google Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following disclaimer -# in the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# --- -# Program for printing the profile generated by common/profiler.cc, -# or by the heap profiler (common/debugallocation.cc) -# -# The profile contains a sequence of entries of the form: -# -# This program parses the profile, and generates user-readable -# output. -# -# Examples: -# -# % tools/pprof "program" "profile" -# Enters "interactive" mode -# -# % tools/pprof --text "program" "profile" -# Generates one line per procedure -# -# % tools/pprof --gv "program" "profile" -# Generates annotated call-graph and displays via "gv" -# -# % tools/pprof --gv --focus=Mutex "program" "profile" -# Restrict to code paths that involve an entry that matches "Mutex" -# -# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" -# Restrict to code paths that involve an entry that matches "Mutex" -# and does not match "string" -# -# % tools/pprof --list=IBF_CheckDocid "program" "profile" -# Generates disassembly listing of all routines with at least one -# sample that match the --list= pattern. The listing is -# annotated with the flat and cumulative sample counts at each line. -# -# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" -# Generates disassembly listing of all routines with at least one -# sample that match the --disasm= pattern. The listing is -# annotated with the flat and cumulative sample counts at each PC value. -# -# TODO: Use color to indicate files? - -use strict; -use warnings; -use Getopt::Long; - -my $PPROF_VERSION = "2.0"; - -# These are the object tools we use which can come from a -# user-specified location using --tools, from the PPROF_TOOLS -# environment variable, or from the environment. -my %obj_tool_map = ( - "objdump" => "objdump", - "nm" => "nm", - "addr2line" => "addr2line", - "c++filt" => "c++filt", - ## ConfigureObjTools may add architecture-specific entries: - #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables - #"addr2line_pdb" => "addr2line-pdb", # ditto - #"otool" => "otool", # equivalent of objdump on OS X -); -# NOTE: these are lists, so you can put in commandline flags if you want. -my @DOT = ("dot"); # leave non-absolute, since it may be in /usr/local -my @GV = ("gv"); -my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread -my @KCACHEGRIND = ("kcachegrind"); -my @PS2PDF = ("ps2pdf"); -# These are used for dynamic profiles -my @URL_FETCHER = ("curl", "-s"); - -# These are the web pages that servers need to support for dynamic profiles -my $HEAP_PAGE = "/pprof/heap"; -my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" -my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param - # ?seconds=#&event=x&period=n -my $GROWTH_PAGE = "/pprof/growth"; -my $CONTENTION_PAGE = "/pprof/contention"; -my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter -my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; -my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param - # "?seconds=#", - # "?tags_regexp=#" and - # "?type=#". -my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST -my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; - -# These are the web pages that can be named on the command line. -# All the alternatives must begin with /. -my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . - "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . - "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)"; - -# default binary name -my $UNKNOWN_BINARY = "(unknown)"; - -# There is a pervasive dependency on the length (in hex characters, -# i.e., nibbles) of an address, distinguishing between 32-bit and -# 64-bit profiles. To err on the safe size, default to 64-bit here: -my $address_length = 16; - -my $dev_null = "/dev/null"; -if (! -e $dev_null && $^O =~ /MSWin/) { # $^O is the OS perl was built for - $dev_null = "nul"; -} - -# A list of paths to search for shared object files -my @prefix_list = (); - -# Special routine name that should not have any symbols. -# Used as separator to parse "addr2line -i" output. -my $sep_symbol = '_fini'; -my $sep_address = undef; - -##### Argument parsing ##### - -sub usage_string { - return < - is a space separated list of profile names. -pprof [options] - is a list of profile files where each file contains - the necessary symbol mappings as well as profile data (likely generated - with --raw). -pprof [options] - is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE - - Each name can be: - /path/to/profile - a path to a profile file - host:port[/] - a location of a service to get profile from - - The / can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, - $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, - $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. - For instance: - pprof http://myserver.com:80$HEAP_PAGE - If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). -pprof --symbols - Maps addresses to symbol names. In this mode, stdin should be a - list of library mappings, in the same format as is found in the heap- - and cpu-profile files (this loosely matches that of /proc/self/maps - on linux), followed by a list of hex addresses to map, one per line. - - For more help with querying remote servers, including how to add the - necessary server-side support code, see this filename (or one like it): - - /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html - -Options: - --cum Sort by cumulative data - --base= Subtract from before display - --interactive Run in interactive mode (interactive "help" gives help) [default] - --seconds= Length of time for dynamic profiles [default=30 secs] - --add_lib= Read additional symbols and line info from the given library - --lib_prefix= Comma separated list of library path prefixes - -Reporting Granularity: - --addresses Report at address level - --lines Report at source line level - --functions Report at function level [default] - --files Report at source file level - -Output type: - --text Generate text report - --callgrind Generate callgrind format to stdout - --gv Generate Postscript and display - --evince Generate PDF and display - --web Generate SVG and display - --list= Generate source listing of matching routines - --disasm= Generate disassembly of matching routines - --symbols Print demangled symbol names found at given addresses - --dot Generate DOT file to stdout - --ps Generate Postcript to stdout - --pdf Generate PDF to stdout - --svg Generate SVG to stdout - --gif Generate GIF to stdout - --raw Generate symbolized pprof data (useful with remote fetch) - -Heap-Profile Options: - --inuse_space Display in-use (mega)bytes [default] - --inuse_objects Display in-use objects - --alloc_space Display allocated (mega)bytes - --alloc_objects Display allocated objects - --show_bytes Display space in bytes - --drop_negative Ignore negative differences - -Contention-profile options: - --total_delay Display total delay at each region [default] - --contentions Display number of delays at each region - --mean_delay Display mean delay at each region - -Call-graph Options: - --nodecount= Show at most so many nodes [default=80] - --nodefraction= Hide nodes below *total [default=.005] - --edgefraction= Hide edges below *total [default=.001] - --maxdegree= Max incoming/outgoing edges per node [default=8] - --focus= Focus on nodes matching - --ignore= Ignore nodes matching - --scale= Set GV scaling [default=0] - --heapcheck Make nodes with non-0 object counts - (i.e. direct leak generators) more visible - -Miscellaneous: - --tools=[,...] \$PATH for object tool pathnames - --test Run unit tests - --help This message - --version Version information - -Environment Variables: - PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof - PPROF_TOOLS Prefix for object tools pathnames - -Examples: - -pprof /bin/ls ls.prof - Enters "interactive" mode -pprof --text /bin/ls ls.prof - Outputs one line per procedure -pprof --web /bin/ls ls.prof - Displays annotated call-graph in web browser -pprof --gv /bin/ls ls.prof - Displays annotated call-graph via 'gv' -pprof --gv --focus=Mutex /bin/ls ls.prof - Restricts to code paths including a .*Mutex.* entry -pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof - Code paths including Mutex but not string -pprof --list=getdir /bin/ls ls.prof - (Per-line) annotated source listing for getdir() -pprof --disasm=getdir /bin/ls ls.prof - (Per-PC) annotated disassembly for getdir() - -pprof http://localhost:1234/ - Enters "interactive" mode -pprof --text localhost:1234 - Outputs one line per procedure for localhost:1234 -pprof --raw localhost:1234 > ./local.raw -pprof --text ./local.raw - Fetches a remote profile for later analysis and then - analyzes it in text mode. -EOF -} - -sub version_string { - return < \$main::opt_help, - "version!" => \$main::opt_version, - "cum!" => \$main::opt_cum, - "base=s" => \$main::opt_base, - "seconds=i" => \$main::opt_seconds, - "add_lib=s" => \$main::opt_lib, - "lib_prefix=s" => \$main::opt_lib_prefix, - "functions!" => \$main::opt_functions, - "lines!" => \$main::opt_lines, - "addresses!" => \$main::opt_addresses, - "files!" => \$main::opt_files, - "text!" => \$main::opt_text, - "callgrind!" => \$main::opt_callgrind, - "list=s" => \$main::opt_list, - "disasm=s" => \$main::opt_disasm, - "symbols!" => \$main::opt_symbols, - "gv!" => \$main::opt_gv, - "evince!" => \$main::opt_evince, - "web!" => \$main::opt_web, - "dot!" => \$main::opt_dot, - "ps!" => \$main::opt_ps, - "pdf!" => \$main::opt_pdf, - "svg!" => \$main::opt_svg, - "gif!" => \$main::opt_gif, - "raw!" => \$main::opt_raw, - "interactive!" => \$main::opt_interactive, - "nodecount=i" => \$main::opt_nodecount, - "nodefraction=f" => \$main::opt_nodefraction, - "edgefraction=f" => \$main::opt_edgefraction, - "maxdegree=i" => \$main::opt_maxdegree, - "focus=s" => \$main::opt_focus, - "ignore=s" => \$main::opt_ignore, - "scale=i" => \$main::opt_scale, - "heapcheck" => \$main::opt_heapcheck, - "inuse_space!" => \$main::opt_inuse_space, - "inuse_objects!" => \$main::opt_inuse_objects, - "alloc_space!" => \$main::opt_alloc_space, - "alloc_objects!" => \$main::opt_alloc_objects, - "show_bytes!" => \$main::opt_show_bytes, - "drop_negative!" => \$main::opt_drop_negative, - "total_delay!" => \$main::opt_total_delay, - "contentions!" => \$main::opt_contentions, - "mean_delay!" => \$main::opt_mean_delay, - "tools=s" => \$main::opt_tools, - "test!" => \$main::opt_test, - "debug!" => \$main::opt_debug, - # Undocumented flags used only by unittests: - "test_stride=i" => \$main::opt_test_stride, - ) || usage("Invalid option(s)"); - - # Deal with the standard --help and --version - if ($main::opt_help) { - print usage_string(); - exit(0); - } - - if ($main::opt_version) { - print version_string(); - exit(0); - } - - # Disassembly/listing/symbols mode requires address-level info - if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) { - $main::opt_functions = 0; - $main::opt_lines = 0; - $main::opt_addresses = 1; - $main::opt_files = 0; - } - - # Check heap-profiling flags - if ($main::opt_inuse_space + - $main::opt_inuse_objects + - $main::opt_alloc_space + - $main::opt_alloc_objects > 1) { - usage("Specify at most on of --inuse/--alloc options"); - } - - # Check output granularities - my $grains = - $main::opt_functions + - $main::opt_lines + - $main::opt_addresses + - $main::opt_files + - 0; - if ($grains > 1) { - usage("Only specify one output granularity option"); - } - if ($grains == 0) { - $main::opt_functions = 1; - } - - # Check output modes - my $modes = - $main::opt_text + - $main::opt_callgrind + - ($main::opt_list eq '' ? 0 : 1) + - ($main::opt_disasm eq '' ? 0 : 1) + - ($main::opt_symbols == 0 ? 0 : 1) + - $main::opt_gv + - $main::opt_evince + - $main::opt_web + - $main::opt_dot + - $main::opt_ps + - $main::opt_pdf + - $main::opt_svg + - $main::opt_gif + - $main::opt_raw + - $main::opt_interactive + - 0; - if ($modes > 1) { - usage("Only specify one output mode"); - } - if ($modes == 0) { - if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode - $main::opt_interactive = 1; - } else { - $main::opt_text = 1; - } - } - - if ($main::opt_test) { - RunUnitTests(); - # Should not return - exit(1); - } - - # Binary name and profile arguments list - $main::prog = ""; - @main::pfile_args = (); - - # Remote profiling without a binary (using $SYMBOL_PAGE instead) - if (@ARGV > 0) { - if (IsProfileURL($ARGV[0])) { - $main::use_symbol_page = 1; - } elsif (IsSymbolizedProfileFile($ARGV[0])) { - $main::use_symbolized_profile = 1; - $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file - } - } - - if ($main::use_symbol_page || $main::use_symbolized_profile) { - # We don't need a binary! - my %disabled = ('--lines' => $main::opt_lines, - '--disasm' => $main::opt_disasm); - for my $option (keys %disabled) { - usage("$option cannot be used without a binary") if $disabled{$option}; - } - # Set $main::prog later... - scalar(@ARGV) || usage("Did not specify profile file"); - } elsif ($main::opt_symbols) { - # --symbols needs a binary-name (to run nm on, etc) but not profiles - $main::prog = shift(@ARGV) || usage("Did not specify program"); - } else { - $main::prog = shift(@ARGV) || usage("Did not specify program"); - scalar(@ARGV) || usage("Did not specify profile file"); - } - - # Parse profile file/location arguments - foreach my $farg (@ARGV) { - if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) { - my $machine = $1; - my $num_machines = $2; - my $path = $3; - for (my $i = 0; $i < $num_machines; $i++) { - unshift(@main::pfile_args, "$i.$machine$path"); - } - } else { - unshift(@main::pfile_args, $farg); - } - } - - if ($main::use_symbol_page) { - unless (IsProfileURL($main::pfile_args[0])) { - error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); - } - CheckSymbolPage(); - $main::prog = FetchProgramName(); - } elsif (!$main::use_symbolized_profile) { # may not need objtools! - ConfigureObjTools($main::prog) - } - - # Break the opt_lib_prefix into the prefix_list array - @prefix_list = split (',', $main::opt_lib_prefix); - - # Remove trailing / from the prefixes, in the list to prevent - # searching things like /my/path//lib/mylib.so - foreach (@prefix_list) { - s|/+$||; - } -} - -sub Main() { - Init(); - $main::collected_profile = undef; - @main::profile_files = (); - $main::op_time = time(); - - # Printing symbols is special and requires a lot less info that most. - if ($main::opt_symbols) { - PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin - return; - } - - # Fetch all profile data - FetchDynamicProfiles(); - - # this will hold symbols that we read from the profile files - my $symbol_map = {}; - - # Read one profile, pick the last item on the list - my $data = ReadProfile($main::prog, pop(@main::profile_files)); - my $profile = $data->{profile}; - my $pcs = $data->{pcs}; - my $libs = $data->{libs}; # Info about main program and shared libraries - $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); - - # Add additional profiles, if available. - if (scalar(@main::profile_files) > 0) { - foreach my $pname (@main::profile_files) { - my $data2 = ReadProfile($main::prog, $pname); - $profile = AddProfile($profile, $data2->{profile}); - $pcs = AddPcs($pcs, $data2->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); - } - } - - # Subtract base from profile, if specified - if ($main::opt_base ne '') { - my $base = ReadProfile($main::prog, $main::opt_base); - $profile = SubtractProfile($profile, $base->{profile}); - $pcs = AddPcs($pcs, $base->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); - } - - # Get total data in profile - my $total = TotalProfile($profile); - - # Collect symbols - my $symbols; - if ($main::use_symbolized_profile) { - $symbols = FetchSymbols($pcs, $symbol_map); - } elsif ($main::use_symbol_page) { - $symbols = FetchSymbols($pcs); - } else { - # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, - # which may differ from the data from subsequent profiles, especially - # if they were run on different machines. Use appropriate libs for - # each pc somehow. - $symbols = ExtractSymbols($libs, $pcs); - } - - # Remove uniniteresting stack items - $profile = RemoveUninterestingFrames($symbols, $profile); - - # Focus? - if ($main::opt_focus ne '') { - $profile = FocusProfile($symbols, $profile, $main::opt_focus); - } - - # Ignore? - if ($main::opt_ignore ne '') { - $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); - } - - my $calls = ExtractCalls($symbols, $profile); - - # Reduce profiles to required output granularity, and also clean - # each stack trace so a given entry exists at most once. - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - # Print - if (!$main::opt_interactive) { - if ($main::opt_disasm) { - PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); - } elsif ($main::opt_list) { - PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); - } elsif ($main::opt_text) { - # Make sure the output is empty when have nothing to report - # (only matters when --heapcheck is given but we must be - # compatible with old branches that did not pass --heapcheck always): - if ($total != 0) { - printf("Total: %s %s\n", Unparse($total), Units()); - } - PrintText($symbols, $flat, $cumulative, -1); - } elsif ($main::opt_raw) { - PrintSymbolizedProfile($symbols, $profile, $main::prog); - } elsif ($main::opt_callgrind) { - PrintCallgrind($calls); - } else { - if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - if ($main::opt_gv) { - RunGV(TempName($main::next_tmpfile, "ps"), ""); - } elsif ($main::opt_evince) { - RunEvince(TempName($main::next_tmpfile, "pdf"), ""); - } elsif ($main::opt_web) { - my $tmp = TempName($main::next_tmpfile, "svg"); - RunWeb($tmp); - # The command we run might hand the file name off - # to an already running browser instance and then exit. - # Normally, we'd remove $tmp on exit (right now), - # but fork a child to remove $tmp a little later, so that the - # browser has time to load it first. - delete $main::tempnames{$tmp}; - if (fork() == 0) { - sleep 5; - unlink($tmp); - exit(0); - } - } - } else { - cleanup(); - exit(1); - } - } - } else { - InteractiveMode($profile, $symbols, $libs, $total); - } - - cleanup(); - exit(0); -} - -##### Entry Point ##### - -Main(); - -# Temporary code to detect if we're running on a Goobuntu system. -# These systems don't have the right stuff installed for the special -# Readline libraries to work, so as a temporary workaround, we default -# to using the normal stdio code, rather than the fancier readline-based -# code -sub ReadlineMightFail { - if (-e '/lib/libtermcap.so.2') { - return 0; # libtermcap exists, so readline should be okay - } else { - return 1; - } -} - -sub RunGV { - my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background - if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) { - # Options using double dash are supported by this gv version. - # Also, turn on noantialias to better handle bug in gv for - # postscript files with large dimensions. - # TODO: Maybe we should not pass the --noantialias flag - # if the gv version is known to work properly without the flag. - system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname) - . $bg); - } else { - # Old gv version - only supports options that use single dash. - print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n"; - system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg); - } -} - -sub RunEvince { - my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background - system(ShellEscape(@EVINCE, $fname) . $bg); -} - -sub RunWeb { - my $fname = shift; - print STDERR "Loading web page file:///$fname\n"; - - if (`uname` =~ /Darwin/) { - # OS X: open will use standard preference for SVG files. - system("/usr/bin/open", $fname); - return; - } - - # Some kind of Unix; try generic symlinks, then specific browsers. - # (Stop once we find one.) - # Works best if the browser is already running. - my @alt = ( - "/etc/alternatives/gnome-www-browser", - "/etc/alternatives/x-www-browser", - "google-chrome", - "firefox", - ); - foreach my $b (@alt) { - if (system($b, $fname) == 0) { - return; - } - } - - print STDERR "Could not load web browser.\n"; -} - -sub RunKcachegrind { - my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background - print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n"; - system(ShellEscape(@KCACHEGRIND, $fname) . $bg); -} - - -##### Interactive helper routines ##### - -sub InteractiveMode { - $| = 1; # Make output unbuffered for interactive mode - my ($orig_profile, $symbols, $libs, $total) = @_; - - print STDERR "Welcome to pprof! For help, type 'help'.\n"; - - # Use ReadLine if it's installed and input comes from a console. - if ( -t STDIN && - !ReadlineMightFail() && - defined(eval {require Term::ReadLine}) ) { - my $term = new Term::ReadLine 'pprof'; - while ( defined ($_ = $term->readline('(pprof) '))) { - $term->addhistory($_) if /\S/; - if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { - last; # exit when we get an interactive command to quit - } - } - } else { # don't have readline - while (1) { - print STDERR "(pprof) "; - $_ = ; - last if ! defined $_ ; - s/\r//g; # turn windows-looking lines into unix-looking lines - - # Save some flags that might be reset by InteractiveCommand() - my $save_opt_lines = $main::opt_lines; - - if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { - last; # exit when we get an interactive command to quit - } - - # Restore flags - $main::opt_lines = $save_opt_lines; - } - } -} - -# Takes two args: orig profile, and command to run. -# Returns 1 if we should keep going, or 0 if we were asked to quit -sub InteractiveCommand { - my($orig_profile, $symbols, $libs, $total, $command) = @_; - $_ = $command; # just to make future m//'s easier - if (!defined($_)) { - print STDERR "\n"; - return 0; - } - if (m/^\s*quit/) { - return 0; - } - if (m/^\s*help/) { - InteractiveHelpMessage(); - return 1; - } - # Clear all the mode options -- mode is controlled by "$command" - $main::opt_text = 0; - $main::opt_callgrind = 0; - $main::opt_disasm = 0; - $main::opt_list = 0; - $main::opt_gv = 0; - $main::opt_evince = 0; - $main::opt_cum = 0; - - if (m/^\s*(text|top)(\d*)\s*(.*)/) { - $main::opt_text = 1; - - my $line_limit = ($2 ne "") ? int($2) : 10; - - my $routine; - my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($3); - - my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - PrintText($symbols, $flat, $cumulative, $line_limit); - return 1; - } - if (m/^\s*callgrind\s*([^ \n]*)/) { - $main::opt_callgrind = 1; - - # Get derived profiles - my $calls = ExtractCalls($symbols, $orig_profile); - my $filename = $1; - if ( $1 eq '' ) { - $filename = TempName($main::next_tmpfile, "callgrind"); - } - PrintCallgrind($calls, $filename); - if ( $1 eq '' ) { - RunKcachegrind($filename, " & "); - $main::next_tmpfile++; - } - - return 1; - } - if (m/^\s*(web)?list\s*(.+)/) { - my $html = (defined($1) && ($1 eq "web")); - $main::opt_list = 1; - - my $routine; - my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($2); - - my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - PrintListing($total, $libs, $flat, $cumulative, $routine, $html); - return 1; - } - if (m/^\s*disasm\s*(.+)/) { - $main::opt_disasm = 1; - - my $routine; - my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($1); - - # Process current profile to account for various settings - my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - PrintDisassembly($libs, $flat, $cumulative, $routine); - return 1; - } - if (m/^\s*(gv|web|evince)\s*(.*)/) { - $main::opt_gv = 0; - $main::opt_evince = 0; - $main::opt_web = 0; - if ($1 eq "gv") { - $main::opt_gv = 1; - } elsif ($1 eq "evince") { - $main::opt_evince = 1; - } elsif ($1 eq "web") { - $main::opt_web = 1; - } - - my $focus; - my $ignore; - ($focus, $ignore) = ParseInteractiveArgs($2); - - # Process current profile to account for various settings - my $profile = ProcessProfile($total, $orig_profile, $symbols, - $focus, $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - if ($main::opt_gv) { - RunGV(TempName($main::next_tmpfile, "ps"), " &"); - } elsif ($main::opt_evince) { - RunEvince(TempName($main::next_tmpfile, "pdf"), " &"); - } elsif ($main::opt_web) { - RunWeb(TempName($main::next_tmpfile, "svg")); - } - $main::next_tmpfile++; - } - return 1; - } - if (m/^\s*$/) { - return 1; - } - print STDERR "Unknown command: try 'help'.\n"; - return 1; -} - - -sub ProcessProfile { - my $total_count = shift; - my $orig_profile = shift; - my $symbols = shift; - my $focus = shift; - my $ignore = shift; - - # Process current profile to account for various settings - my $profile = $orig_profile; - printf("Total: %s %s\n", Unparse($total_count), Units()); - if ($focus ne '') { - $profile = FocusProfile($symbols, $profile, $focus); - my $focus_count = TotalProfile($profile); - printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", - $focus, - Unparse($focus_count), Units(), - Unparse($total_count), ($focus_count*100.0) / $total_count); - } - if ($ignore ne '') { - $profile = IgnoreProfile($symbols, $profile, $ignore); - my $ignore_count = TotalProfile($profile); - printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", - $ignore, - Unparse($ignore_count), Units(), - Unparse($total_count), - ($ignore_count*100.0) / $total_count); - } - - return $profile; -} - -sub InteractiveHelpMessage { - print STDERR <{$k}; - my @addrs = split(/\n/, $k); - if ($#addrs >= 0) { - my $depth = $#addrs + 1; - # int(foo / 2**32) is the only reliable way to get rid of bottom - # 32 bits on both 32- and 64-bit systems. - print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32)); - print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32)); - - foreach my $full_addr (@addrs) { - my $addr = $full_addr; - $addr =~ s/0x0*//; # strip off leading 0x, zeroes - if (length($addr) > 16) { - print STDERR "Invalid address in profile: $full_addr\n"; - next; - } - my $low_addr = substr($addr, -8); # get last 8 hex chars - my $high_addr = substr($addr, -16, 8); # get up to 8 more hex chars - print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr)); - } - } - } -} - -# Print symbols and profile data -sub PrintSymbolizedProfile { - my $symbols = shift; - my $profile = shift; - my $prog = shift; - - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - - print '--- ', $symbol_marker, "\n"; - if (defined($prog)) { - print 'binary=', $prog, "\n"; - } - while (my ($pc, $name) = each(%{$symbols})) { - my $sep = ' '; - print '0x', $pc; - # We have a list of function names, which include the inlined - # calls. They are separated (and terminated) by --, which is - # illegal in function names. - for (my $j = 2; $j <= $#{$name}; $j += 3) { - print $sep, $name->[$j]; - $sep = '--'; - } - print "\n"; - } - print '---', "\n"; - - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $profile_marker = $&; - print '--- ', $profile_marker, "\n"; - if (defined($main::collected_profile)) { - # if used with remote fetch, simply dump the collected profile to output. - open(SRC, "<$main::collected_profile"); - while () { - print $_; - } - close(SRC); - } else { - # dump a cpu-format profile to standard out - PrintProfileData($profile); - } -} - -# Print text output -sub PrintText { - my $symbols = shift; - my $flat = shift; - my $cumulative = shift; - my $line_limit = shift; - - my $total = TotalProfile($flat); - - # Which profile to sort by? - my $s = $main::opt_cum ? $cumulative : $flat; - - my $running_sum = 0; - my $lines = 0; - foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b } - keys(%{$cumulative})) { - my $f = GetEntry($flat, $k); - my $c = GetEntry($cumulative, $k); - $running_sum += $f; - - my $sym = $k; - if (exists($symbols->{$k})) { - $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1]; - if ($main::opt_addresses) { - $sym = $k . " " . $sym; - } - } - - if ($f != 0 || $c != 0) { - printf("%8s %6s %6s %8s %6s %s\n", - Unparse($f), - Percent($f, $total), - Percent($running_sum, $total), - Unparse($c), - Percent($c, $total), - $sym); - } - $lines++; - last if ($line_limit >= 0 && $lines >= $line_limit); - } -} - -# Callgrind format has a compression for repeated function and file -# names. You show the name the first time, and just use its number -# subsequently. This can cut down the file to about a third or a -# quarter of its uncompressed size. $key and $val are the key/value -# pair that would normally be printed by callgrind; $map is a map from -# value to number. -sub CompressedCGName { - my($key, $val, $map) = @_; - my $idx = $map->{$val}; - # For very short keys, providing an index hurts rather than helps. - if (length($val) <= 3) { - return "$key=$val\n"; - } elsif (defined($idx)) { - return "$key=($idx)\n"; - } else { - # scalar(keys $map) gives the number of items in the map. - $idx = scalar(keys(%{$map})) + 1; - $map->{$val} = $idx; - return "$key=($idx) $val\n"; - } -} - -# Print the call graph in a way that's suiteable for callgrind. -sub PrintCallgrind { - my $calls = shift; - my $filename; - my %filename_to_index_map; - my %fnname_to_index_map; - - if ($main::opt_interactive) { - $filename = shift; - print STDERR "Writing callgrind file to '$filename'.\n" - } else { - $filename = "&STDOUT"; - } - open(CG, ">$filename"); - printf CG ("events: Hits\n\n"); - foreach my $call ( map { $_->[0] } - sort { $a->[1] cmp $b ->[1] || - $a->[2] <=> $b->[2] } - map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; - [$_, $1, $2] } - keys %$calls ) { - my $count = int($calls->{$call}); - $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; - my ( $caller_file, $caller_line, $caller_function, - $callee_file, $callee_line, $callee_function ) = - ( $1, $2, $3, $5, $6, $7 ); - - # TODO(csilvers): for better compression, collect all the - # caller/callee_files and functions first, before printing - # anything, and only compress those referenced more than once. - printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map); - printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map); - if (defined $6) { - printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map); - printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map); - printf CG ("calls=$count $callee_line\n"); - } - printf CG ("$caller_line $count\n\n"); - } -} - -# Print disassembly for all all routines that match $main::opt_disasm -sub PrintDisassembly { - my $libs = shift; - my $flat = shift; - my $cumulative = shift; - my $disasm_opts = shift; - - my $total = TotalProfile($flat); - - foreach my $lib (@{$libs}) { - my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); - my $offset = AddressSub($lib->[1], $lib->[3]); - foreach my $routine (sort ByName keys(%{$symbol_table})) { - my $start_addr = $symbol_table->{$routine}->[0]; - my $end_addr = $symbol_table->{$routine}->[1]; - # See if there are any samples in this routine - my $length = hex(AddressSub($end_addr, $start_addr)); - my $addr = AddressAdd($start_addr, $offset); - for (my $i = 0; $i < $length; $i++) { - if (defined($cumulative->{$addr})) { - PrintDisassembledFunction($lib->[0], $offset, - $routine, $flat, $cumulative, - $start_addr, $end_addr, $total); - last; - } - $addr = AddressInc($addr); - } - } - } -} - -# Return reference to array of tuples of the form: -# [start_address, filename, linenumber, instruction, limit_address] -# E.g., -# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] -sub Disassemble { - my $prog = shift; - my $offset = shift; - my $start_addr = shift; - my $end_addr = shift; - - my $objdump = $obj_tool_map{"objdump"}; - my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn", - "--start-address=0x$start_addr", - "--stop-address=0x$end_addr", $prog); - open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); - my @result = (); - my $filename = ""; - my $linenumber = -1; - my $last = ["", "", "", ""]; - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - chop; - if (m|\s*([^:\s]+):(\d+)\s*$|) { - # Location line of the form: - # : - $filename = $1; - $linenumber = $2; - } elsif (m/^ +([0-9a-f]+):\s*(.*)/) { - # Disassembly line -- zero-extend address to full length - my $addr = HexExtend($1); - my $k = AddressAdd($addr, $offset); - $last->[4] = $k; # Store ending address for previous instruction - $last = [$k, $filename, $linenumber, $2, $end_addr]; - push(@result, $last); - } - } - close(OBJDUMP); - return @result; -} - -# The input file should contain lines of the form /proc/maps-like -# output (same format as expected from the profiles) or that looks -# like hex addresses (like "0xDEADBEEF"). We will parse all -# /proc/maps output, and for all the hex addresses, we will output -# "short" symbol names, one per line, in the same order as the input. -sub PrintSymbols { - my $maps_and_symbols_file = shift; - - # ParseLibraries expects pcs to be in a set. Fine by us... - my @pclist = (); # pcs in sorted order - my $pcs = {}; - my $map = ""; - foreach my $line (<$maps_and_symbols_file>) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - if ($line =~ /\b(0x[0-9a-f]+)\b/i) { - push(@pclist, HexExtend($1)); - $pcs->{$pclist[-1]} = 1; - } else { - $map .= $line; - } - } - - my $libs = ParseLibraries($main::prog, $map, $pcs); - my $symbols = ExtractSymbols($libs, $pcs); - - foreach my $pc (@pclist) { - # ->[0] is the shortname, ->[2] is the full name - print(($symbols->{$pc}->[0] || "??") . "\n"); - } -} - - -# For sorting functions by name -sub ByName { - return ShortFunctionName($a) cmp ShortFunctionName($b); -} - -# Print source-listing for all all routines that match $list_opts -sub PrintListing { - my $total = shift; - my $libs = shift; - my $flat = shift; - my $cumulative = shift; - my $list_opts = shift; - my $html = shift; - - my $output = \*STDOUT; - my $fname = ""; - - if ($html) { - # Arrange to write the output to a temporary file - $fname = TempName($main::next_tmpfile, "html"); - $main::next_tmpfile++; - if (!open(TEMP, ">$fname")) { - print STDERR "$fname: $!\n"; - return; - } - $output = \*TEMP; - print $output HtmlListingHeader(); - printf $output ("
%s
Total: %s %s
\n", - $main::prog, Unparse($total), Units()); - } - - my $listed = 0; - foreach my $lib (@{$libs}) { - my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); - my $offset = AddressSub($lib->[1], $lib->[3]); - foreach my $routine (sort ByName keys(%{$symbol_table})) { - # Print if there are any samples in this routine - my $start_addr = $symbol_table->{$routine}->[0]; - my $end_addr = $symbol_table->{$routine}->[1]; - my $length = hex(AddressSub($end_addr, $start_addr)); - my $addr = AddressAdd($start_addr, $offset); - for (my $i = 0; $i < $length; $i++) { - if (defined($cumulative->{$addr})) { - $listed += PrintSource( - $lib->[0], $offset, - $routine, $flat, $cumulative, - $start_addr, $end_addr, - $html, - $output); - last; - } - $addr = AddressInc($addr); - } - } - } - - if ($html) { - if ($listed > 0) { - print $output HtmlListingFooter(); - close($output); - RunWeb($fname); - } else { - close($output); - unlink($fname); - } - } -} - -sub HtmlListingHeader { - return <<'EOF'; - - - -Pprof listing - - - - -EOF -} - -sub HtmlListingFooter { - return <<'EOF'; - - -EOF -} - -sub HtmlEscape { - my $text = shift; - $text =~ s/&/&/g; - $text =~ s//>/g; - return $text; -} - -# Returns the indentation of the line, if it has any non-whitespace -# characters. Otherwise, returns -1. -sub Indentation { - my $line = shift; - if (m/^(\s*)\S/) { - return length($1); - } else { - return -1; - } -} - -# If the symbol table contains inlining info, Disassemble() may tag an -# instruction with a location inside an inlined function. But for -# source listings, we prefer to use the location in the function we -# are listing. So use MapToSymbols() to fetch full location -# information for each instruction and then pick out the first -# location from a location list (location list contains callers before -# callees in case of inlining). -# -# After this routine has run, each entry in $instructions contains: -# [0] start address -# [1] filename for function we are listing -# [2] line number for function we are listing -# [3] disassembly -# [4] limit address -# [5] most specific filename (may be different from [1] due to inlining) -# [6] most specific line number (may be different from [2] due to inlining) -sub GetTopLevelLineNumbers { - my ($lib, $offset, $instructions) = @_; - my $pcs = []; - for (my $i = 0; $i <= $#{$instructions}; $i++) { - push(@{$pcs}, $instructions->[$i]->[0]); - } - my $symbols = {}; - MapToSymbols($lib, $offset, $pcs, $symbols); - for (my $i = 0; $i <= $#{$instructions}; $i++) { - my $e = $instructions->[$i]; - push(@{$e}, $e->[1]); - push(@{$e}, $e->[2]); - my $addr = $e->[0]; - my $sym = $symbols->{$addr}; - if (defined($sym)) { - if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) { - $e->[1] = $1; # File name - $e->[2] = $2; # Line number - } - } - } -} - -# Print source-listing for one routine -sub PrintSource { - my $prog = shift; - my $offset = shift; - my $routine = shift; - my $flat = shift; - my $cumulative = shift; - my $start_addr = shift; - my $end_addr = shift; - my $html = shift; - my $output = shift; - - # Disassemble all instructions (just to get line numbers) - my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); - GetTopLevelLineNumbers($prog, $offset, \@instructions); - - # Hack 1: assume that the first source file encountered in the - # disassembly contains the routine - my $filename = undef; - for (my $i = 0; $i <= $#instructions; $i++) { - if ($instructions[$i]->[2] >= 0) { - $filename = $instructions[$i]->[1]; - last; - } - } - if (!defined($filename)) { - print STDERR "no filename found in $routine\n"; - return 0; - } - - # Hack 2: assume that the largest line number from $filename is the - # end of the procedure. This is typically safe since if P1 contains - # an inlined call to P2, then P2 usually occurs earlier in the - # source file. If this does not work, we might have to compute a - # density profile or just print all regions we find. - my $lastline = 0; - for (my $i = 0; $i <= $#instructions; $i++) { - my $f = $instructions[$i]->[1]; - my $l = $instructions[$i]->[2]; - if (($f eq $filename) && ($l > $lastline)) { - $lastline = $l; - } - } - - # Hack 3: assume the first source location from "filename" is the start of - # the source code. - my $firstline = 1; - for (my $i = 0; $i <= $#instructions; $i++) { - if ($instructions[$i]->[1] eq $filename) { - $firstline = $instructions[$i]->[2]; - last; - } - } - - # Hack 4: Extend last line forward until its indentation is less than - # the indentation we saw on $firstline - my $oldlastline = $lastline; - { - if (!open(FILE, "<$filename")) { - print STDERR "$filename: $!\n"; - return 0; - } - my $l = 0; - my $first_indentation = -1; - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - $l++; - my $indent = Indentation($_); - if ($l >= $firstline) { - if ($first_indentation < 0 && $indent >= 0) { - $first_indentation = $indent; - last if ($first_indentation == 0); - } - } - if ($l >= $lastline && $indent >= 0) { - if ($indent >= $first_indentation) { - $lastline = $l+1; - } else { - last; - } - } - } - close(FILE); - } - - # Assign all samples to the range $firstline,$lastline, - # Hack 4: If an instruction does not occur in the range, its samples - # are moved to the next instruction that occurs in the range. - my $samples1 = {}; # Map from line number to flat count - my $samples2 = {}; # Map from line number to cumulative count - my $running1 = 0; # Unassigned flat counts - my $running2 = 0; # Unassigned cumulative counts - my $total1 = 0; # Total flat counts - my $total2 = 0; # Total cumulative counts - my %disasm = (); # Map from line number to disassembly - my $running_disasm = ""; # Unassigned disassembly - my $skip_marker = "---\n"; - if ($html) { - $skip_marker = ""; - for (my $l = $firstline; $l <= $lastline; $l++) { - $disasm{$l} = ""; - } - } - my $last_dis_filename = ''; - my $last_dis_linenum = -1; - my $last_touched_line = -1; # To detect gaps in disassembly for a line - foreach my $e (@instructions) { - # Add up counts for all address that fall inside this instruction - my $c1 = 0; - my $c2 = 0; - for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { - $c1 += GetEntry($flat, $a); - $c2 += GetEntry($cumulative, $a); - } - - if ($html) { - my $dis = sprintf(" %6s %6s \t\t%8s: %s ", - HtmlPrintNumber($c1), - HtmlPrintNumber($c2), - UnparseAddress($offset, $e->[0]), - CleanDisassembly($e->[3])); - - # Append the most specific source line associated with this instruction - if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) }; - $dis = HtmlEscape($dis); - my $f = $e->[5]; - my $l = $e->[6]; - if ($f ne $last_dis_filename) { - $dis .= sprintf("%s:%d", - HtmlEscape(CleanFileName($f)), $l); - } elsif ($l ne $last_dis_linenum) { - # De-emphasize the unchanged file name portion - $dis .= sprintf("%s" . - ":%d", - HtmlEscape(CleanFileName($f)), $l); - } else { - # De-emphasize the entire location - $dis .= sprintf("%s:%d", - HtmlEscape(CleanFileName($f)), $l); - } - $last_dis_filename = $f; - $last_dis_linenum = $l; - $running_disasm .= $dis; - $running_disasm .= "\n"; - } - - $running1 += $c1; - $running2 += $c2; - $total1 += $c1; - $total2 += $c2; - my $file = $e->[1]; - my $line = $e->[2]; - if (($file eq $filename) && - ($line >= $firstline) && - ($line <= $lastline)) { - # Assign all accumulated samples to this line - AddEntry($samples1, $line, $running1); - AddEntry($samples2, $line, $running2); - $running1 = 0; - $running2 = 0; - if ($html) { - if ($line != $last_touched_line && $disasm{$line} ne '') { - $disasm{$line} .= "\n"; - } - $disasm{$line} .= $running_disasm; - $running_disasm = ''; - $last_touched_line = $line; - } - } - } - - # Assign any leftover samples to $lastline - AddEntry($samples1, $lastline, $running1); - AddEntry($samples2, $lastline, $running2); - if ($html) { - if ($lastline != $last_touched_line && $disasm{$lastline} ne '') { - $disasm{$lastline} .= "\n"; - } - $disasm{$lastline} .= $running_disasm; - } - - if ($html) { - printf $output ( - "

%s

%s\n
\n" .
-      "Total:%6s %6s (flat / cumulative %s)\n",
-      HtmlEscape(ShortFunctionName($routine)),
-      HtmlEscape(CleanFileName($filename)),
-      Unparse($total1),
-      Unparse($total2),
-      Units());
-  } else {
-    printf $output (
-      "ROUTINE ====================== %s in %s\n" .
-      "%6s %6s Total %s (flat / cumulative)\n",
-      ShortFunctionName($routine),
-      CleanFileName($filename),
-      Unparse($total1),
-      Unparse($total2),
-      Units());
-  }
-  if (!open(FILE, "<$filename")) {
-    print STDERR "$filename: $!\n";
-    return 0;
-  }
-  my $l = 0;
-  while () {
-    s/\r//g;         # turn windows-looking lines into unix-looking lines
-    $l++;
-    if ($l >= $firstline - 5 &&
-        (($l <= $oldlastline + 5) || ($l <= $lastline))) {
-      chop;
-      my $text = $_;
-      if ($l == $firstline) { print $output $skip_marker; }
-      my $n1 = GetEntry($samples1, $l);
-      my $n2 = GetEntry($samples2, $l);
-      if ($html) {
-        # Emit a span that has one of the following classes:
-        #    livesrc -- has samples
-        #    deadsrc -- has disassembly, but with no samples
-        #    nop     -- has no matching disasembly
-        # Also emit an optional span containing disassembly.
-        my $dis = $disasm{$l};
-        my $asm = "";
-        if (defined($dis) && $dis ne '') {
-          $asm = "" . $dis . "";
-        }
-        my $source_class = (($n1 + $n2 > 0) 
-                            ? "livesrc" 
-                            : (($asm ne "") ? "deadsrc" : "nop"));
-        printf $output (
-          "%5d " .
-          "%6s %6s %s%s\n",
-          $l, $source_class,
-          HtmlPrintNumber($n1),
-          HtmlPrintNumber($n2),
-          HtmlEscape($text),
-          $asm);
-      } else {
-        printf $output(
-          "%6s %6s %4d: %s\n",
-          UnparseAlt($n1),
-          UnparseAlt($n2),
-          $l,
-          $text);
-      }
-      if ($l == $lastline)  { print $output $skip_marker; }
-    };
-  }
-  close(FILE);
-  if ($html) {
-    print $output "
\n"; - } - return 1; -} - -# Return the source line for the specified file/linenumber. -# Returns undef if not found. -sub SourceLine { - my $file = shift; - my $line = shift; - - # Look in cache - if (!defined($main::source_cache{$file})) { - if (100 < scalar keys(%main::source_cache)) { - # Clear the cache when it gets too big - $main::source_cache = (); - } - - # Read all lines from the file - if (!open(FILE, "<$file")) { - print STDERR "$file: $!\n"; - $main::source_cache{$file} = []; # Cache the negative result - return undef; - } - my $lines = []; - push(@{$lines}, ""); # So we can use 1-based line numbers as indices - while () { - push(@{$lines}, $_); - } - close(FILE); - - # Save the lines in the cache - $main::source_cache{$file} = $lines; - } - - my $lines = $main::source_cache{$file}; - if (($line < 0) || ($line > $#{$lines})) { - return undef; - } else { - return $lines->[$line]; - } -} - -# Print disassembly for one routine with interspersed source if available -sub PrintDisassembledFunction { - my $prog = shift; - my $offset = shift; - my $routine = shift; - my $flat = shift; - my $cumulative = shift; - my $start_addr = shift; - my $end_addr = shift; - my $total = shift; - - # Disassemble all instructions - my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); - - # Make array of counts per instruction - my @flat_count = (); - my @cum_count = (); - my $flat_total = 0; - my $cum_total = 0; - foreach my $e (@instructions) { - # Add up counts for all address that fall inside this instruction - my $c1 = 0; - my $c2 = 0; - for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { - $c1 += GetEntry($flat, $a); - $c2 += GetEntry($cumulative, $a); - } - push(@flat_count, $c1); - push(@cum_count, $c2); - $flat_total += $c1; - $cum_total += $c2; - } - - # Print header with total counts - printf("ROUTINE ====================== %s\n" . - "%6s %6s %s (flat, cumulative) %.1f%% of total\n", - ShortFunctionName($routine), - Unparse($flat_total), - Unparse($cum_total), - Units(), - ($cum_total * 100.0) / $total); - - # Process instructions in order - my $current_file = ""; - for (my $i = 0; $i <= $#instructions; ) { - my $e = $instructions[$i]; - - # Print the new file name whenever we switch files - if ($e->[1] ne $current_file) { - $current_file = $e->[1]; - my $fname = $current_file; - $fname =~ s|^\./||; # Trim leading "./" - - # Shorten long file names - if (length($fname) >= 58) { - $fname = "..." . substr($fname, -55); - } - printf("-------------------- %s\n", $fname); - } - - # TODO: Compute range of lines to print together to deal with - # small reorderings. - my $first_line = $e->[2]; - my $last_line = $first_line; - my %flat_sum = (); - my %cum_sum = (); - for (my $l = $first_line; $l <= $last_line; $l++) { - $flat_sum{$l} = 0; - $cum_sum{$l} = 0; - } - - # Find run of instructions for this range of source lines - my $first_inst = $i; - while (($i <= $#instructions) && - ($instructions[$i]->[2] >= $first_line) && - ($instructions[$i]->[2] <= $last_line)) { - $e = $instructions[$i]; - $flat_sum{$e->[2]} += $flat_count[$i]; - $cum_sum{$e->[2]} += $cum_count[$i]; - $i++; - } - my $last_inst = $i - 1; - - # Print source lines - for (my $l = $first_line; $l <= $last_line; $l++) { - my $line = SourceLine($current_file, $l); - if (!defined($line)) { - $line = "?\n"; - next; - } else { - $line =~ s/^\s+//; - } - printf("%6s %6s %5d: %s", - UnparseAlt($flat_sum{$l}), - UnparseAlt($cum_sum{$l}), - $l, - $line); - } - - # Print disassembly - for (my $x = $first_inst; $x <= $last_inst; $x++) { - my $e = $instructions[$x]; - printf("%6s %6s %8s: %6s\n", - UnparseAlt($flat_count[$x]), - UnparseAlt($cum_count[$x]), - UnparseAddress($offset, $e->[0]), - CleanDisassembly($e->[3])); - } - } -} - -# Print DOT graph -sub PrintDot { - my $prog = shift; - my $symbols = shift; - my $raw = shift; - my $flat = shift; - my $cumulative = shift; - my $overall_total = shift; - - # Get total - my $local_total = TotalProfile($flat); - my $nodelimit = int($main::opt_nodefraction * $local_total); - my $edgelimit = int($main::opt_edgefraction * $local_total); - my $nodecount = $main::opt_nodecount; - - # Find nodes to include - my @list = (sort { abs(GetEntry($cumulative, $b)) <=> - abs(GetEntry($cumulative, $a)) - || $a cmp $b } - keys(%{$cumulative})); - my $last = $nodecount - 1; - if ($last > $#list) { - $last = $#list; - } - while (($last >= 0) && - (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) { - $last--; - } - if ($last < 0) { - print STDERR "No nodes to print\n"; - return 0; - } - - if ($nodelimit > 0 || $edgelimit > 0) { - printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", - Unparse($nodelimit), Units(), - Unparse($edgelimit), Units()); - } - - # Open DOT output file - my $output; - my $escaped_dot = ShellEscape(@DOT); - my $escaped_ps2pdf = ShellEscape(@PS2PDF); - if ($main::opt_gv) { - my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps")); - $output = "| $escaped_dot -Tps2 >$escaped_outfile"; - } elsif ($main::opt_evince) { - my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf")); - $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile"; - } elsif ($main::opt_ps) { - $output = "| $escaped_dot -Tps2"; - } elsif ($main::opt_pdf) { - $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -"; - } elsif ($main::opt_web || $main::opt_svg) { - # We need to post-process the SVG, so write to a temporary file always. - my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg")); - $output = "| $escaped_dot -Tsvg >$escaped_outfile"; - } elsif ($main::opt_gif) { - $output = "| $escaped_dot -Tgif"; - } else { - $output = ">&STDOUT"; - } - open(DOT, $output) || error("$output: $!\n"); - - # Title - printf DOT ("digraph \"%s; %s %s\" {\n", - $prog, - Unparse($overall_total), - Units()); - if ($main::opt_pdf) { - # The output is more printable if we set the page size for dot. - printf DOT ("size=\"8,11\"\n"); - } - printf DOT ("node [width=0.375,height=0.25];\n"); - - # Print legend - printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," . - "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n", - $prog, - sprintf("Total %s: %s", Units(), Unparse($overall_total)), - sprintf("Focusing on: %s", Unparse($local_total)), - sprintf("Dropped nodes with <= %s abs(%s)", - Unparse($nodelimit), Units()), - sprintf("Dropped edges with <= %s %s", - Unparse($edgelimit), Units()) - ); - - # Print nodes - my %node = (); - my $nextnode = 1; - foreach my $a (@list[0..$last]) { - # Pick font size - my $f = GetEntry($flat, $a); - my $c = GetEntry($cumulative, $a); - - my $fs = 8; - if ($local_total > 0) { - $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total))); - } - - $node{$a} = $nextnode++; - my $sym = $a; - $sym =~ s/\s+/\\n/g; - $sym =~ s/::/\\n/g; - - # Extra cumulative info to print for non-leaves - my $extra = ""; - if ($f != $c) { - $extra = sprintf("\\rof %s (%s)", - Unparse($c), - Percent($c, $local_total)); - } - my $style = ""; - if ($main::opt_heapcheck) { - if ($f > 0) { - # make leak-causing nodes more visible (add a background) - $style = ",style=filled,fillcolor=gray" - } elsif ($f < 0) { - # make anti-leak-causing nodes (which almost never occur) - # stand out as well (triple border) - $style = ",peripheries=3" - } - } - - printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" . - "\",shape=box,fontsize=%.1f%s];\n", - $node{$a}, - $sym, - Unparse($f), - Percent($f, $local_total), - $extra, - $fs, - $style, - ); - } - - # Get edges and counts per edge - my %edge = (); - my $n; - my $fullname_to_shortname_map = {}; - FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); - foreach my $k (keys(%{$raw})) { - # TODO: omit low %age edges - $n = $raw->{$k}; - my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); - for (my $i = 1; $i <= $#translated; $i++) { - my $src = $translated[$i]; - my $dst = $translated[$i-1]; - #next if ($src eq $dst); # Avoid self-edges? - if (exists($node{$src}) && exists($node{$dst})) { - my $edge_label = "$src\001$dst"; - if (!exists($edge{$edge_label})) { - $edge{$edge_label} = 0; - } - $edge{$edge_label} += $n; - } - } - } - - # Print edges (process in order of decreasing counts) - my %indegree = (); # Number of incoming edges added per node so far - my %outdegree = (); # Number of outgoing edges added per node so far - foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) { - my @x = split(/\001/, $e); - $n = $edge{$e}; - - # Initialize degree of kept incoming and outgoing edges if necessary - my $src = $x[0]; - my $dst = $x[1]; - if (!exists($outdegree{$src})) { $outdegree{$src} = 0; } - if (!exists($indegree{$dst})) { $indegree{$dst} = 0; } - - my $keep; - if ($indegree{$dst} == 0) { - # Keep edge if needed for reachability - $keep = 1; - } elsif (abs($n) <= $edgelimit) { - # Drop if we are below --edgefraction - $keep = 0; - } elsif ($outdegree{$src} >= $main::opt_maxdegree || - $indegree{$dst} >= $main::opt_maxdegree) { - # Keep limited number of in/out edges per node - $keep = 0; - } else { - $keep = 1; - } - - if ($keep) { - $outdegree{$src}++; - $indegree{$dst}++; - - # Compute line width based on edge count - my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); - if ($fraction > 1) { $fraction = 1; } - my $w = $fraction * 2; - if ($w < 1 && ($main::opt_web || $main::opt_svg)) { - # SVG output treats line widths < 1 poorly. - $w = 1; - } - - # Dot sometimes segfaults if given edge weights that are too large, so - # we cap the weights at a large value - my $edgeweight = abs($n) ** 0.7; - if ($edgeweight > 100000) { $edgeweight = 100000; } - $edgeweight = int($edgeweight); - - my $style = sprintf("setlinewidth(%f)", $w); - if ($x[1] =~ m/\(inline\)/) { - $style .= ",dashed"; - } - - # Use a slightly squashed function of the edge count as the weight - printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", - $node{$x[0]}, - $node{$x[1]}, - Unparse($n), - $edgeweight, - $style); - } - } - - print DOT ("}\n"); - close(DOT); - - if ($main::opt_web || $main::opt_svg) { - # Rewrite SVG to be more usable inside web browser. - RewriteSvg(TempName($main::next_tmpfile, "svg")); - } - - return 1; -} - -sub RewriteSvg { - my $svgfile = shift; - - open(SVG, $svgfile) || die "open temp svg: $!"; - my @svg = ; - close(SVG); - unlink $svgfile; - my $svg = join('', @svg); - - # Dot's SVG output is - # - # - # - # ... - # - # - # - # Change it to - # - # - # $svg_javascript - # - # - # ... - # - # - # - - # Fix width, height; drop viewBox. - $svg =~ s/(?s) above first - my $svg_javascript = SvgJavascript(); - my $viewport = "\n"; - $svg =~ s/ above . - $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; - $svg =~ s/$svgfile") || die "open $svgfile: $!"; - print SVG $svg; - close(SVG); - } -} - -sub SvgJavascript { - return <<'EOF'; - -EOF -} - -# Provides a map from fullname to shortname for cases where the -# shortname is ambiguous. The symlist has both the fullname and -# shortname for all symbols, which is usually fine, but sometimes -- -# such as overloaded functions -- two different fullnames can map to -# the same shortname. In that case, we use the address of the -# function to disambiguate the two. This function fills in a map that -# maps fullnames to modified shortnames in such cases. If a fullname -# is not present in the map, the 'normal' shortname provided by the -# symlist is the appropriate one to use. -sub FillFullnameToShortnameMap { - my $symbols = shift; - my $fullname_to_shortname_map = shift; - my $shortnames_seen_once = {}; - my $shortnames_seen_more_than_once = {}; - - foreach my $symlist (values(%{$symbols})) { - # TODO(csilvers): deal with inlined symbols too. - my $shortname = $symlist->[0]; - my $fullname = $symlist->[2]; - if ($fullname !~ /<[0-9a-fA-F]+>$/) { # fullname doesn't end in an address - next; # the only collisions we care about are when addresses differ - } - if (defined($shortnames_seen_once->{$shortname}) && - $shortnames_seen_once->{$shortname} ne $fullname) { - $shortnames_seen_more_than_once->{$shortname} = 1; - } else { - $shortnames_seen_once->{$shortname} = $fullname; - } - } - - foreach my $symlist (values(%{$symbols})) { - my $shortname = $symlist->[0]; - my $fullname = $symlist->[2]; - # TODO(csilvers): take in a list of addresses we care about, and only - # store in the map if $symlist->[1] is in that list. Saves space. - next if defined($fullname_to_shortname_map->{$fullname}); - if (defined($shortnames_seen_more_than_once->{$shortname})) { - if ($fullname =~ /<0*([^>]*)>$/) { # fullname has address at end of it - $fullname_to_shortname_map->{$fullname} = "$shortname\@$1"; - } - } - } -} - -# Return a small number that identifies the argument. -# Multiple calls with the same argument will return the same number. -# Calls with different arguments will return different numbers. -sub ShortIdFor { - my $key = shift; - my $id = $main::uniqueid{$key}; - if (!defined($id)) { - $id = keys(%main::uniqueid) + 1; - $main::uniqueid{$key} = $id; - } - return $id; -} - -# Translate a stack of addresses into a stack of symbols -sub TranslateStack { - my $symbols = shift; - my $fullname_to_shortname_map = shift; - my $k = shift; - - my @addrs = split(/\n/, $k); - my @result = (); - for (my $i = 0; $i <= $#addrs; $i++) { - my $a = $addrs[$i]; - - # Skip large addresses since they sometimes show up as fake entries on RH9 - if (length($a) > 8 && $a gt "7fffffffffffffff") { - next; - } - - if ($main::opt_disasm || $main::opt_list) { - # We want just the address for the key - push(@result, $a); - next; - } - - my $symlist = $symbols->{$a}; - if (!defined($symlist)) { - $symlist = [$a, "", $a]; - } - - # We can have a sequence of symbols for a particular entry - # (more than one symbol in the case of inlining). Callers - # come before callees in symlist, so walk backwards since - # the translated stack should contain callees before callers. - for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { - my $func = $symlist->[$j-2]; - my $fileline = $symlist->[$j-1]; - my $fullfunc = $symlist->[$j]; - if (defined($fullname_to_shortname_map->{$fullfunc})) { - $func = $fullname_to_shortname_map->{$fullfunc}; - } - if ($j > 2) { - $func = "$func (inline)"; - } - - # Do not merge nodes corresponding to Callback::Run since that - # causes confusing cycles in dot display. Instead, we synthesize - # a unique name for this frame per caller. - if ($func =~ m/Callback.*::Run$/) { - my $caller = ($i > 0) ? $addrs[$i-1] : 0; - $func = "Run#" . ShortIdFor($caller); - } - - if ($main::opt_addresses) { - push(@result, "$a $func $fileline"); - } elsif ($main::opt_lines) { - if ($func eq '??' && $fileline eq '??:0') { - push(@result, "$a"); - } else { - push(@result, "$func $fileline"); - } - } elsif ($main::opt_functions) { - if ($func eq '??') { - push(@result, "$a"); - } else { - push(@result, $func); - } - } elsif ($main::opt_files) { - if ($fileline eq '??:0' || $fileline eq '') { - push(@result, "$a"); - } else { - my $f = $fileline; - $f =~ s/:\d+$//; - push(@result, $f); - } - } else { - push(@result, $a); - last; # Do not print inlined info - } - } - } - - # print join(",", @addrs), " => ", join(",", @result), "\n"; - return @result; -} - -# Generate percent string for a number and a total -sub Percent { - my $num = shift; - my $tot = shift; - if ($tot != 0) { - return sprintf("%.1f%%", $num * 100.0 / $tot); - } else { - return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf"); - } -} - -# Generate pretty-printed form of number -sub Unparse { - my $num = shift; - if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { - if ($main::opt_inuse_objects || $main::opt_alloc_objects) { - return sprintf("%d", $num); - } else { - if ($main::opt_show_bytes) { - return sprintf("%d", $num); - } else { - return sprintf("%.1f", $num / 1048576.0); - } - } - } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { - return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds - } else { - return sprintf("%d", $num); - } -} - -# Alternate pretty-printed form: 0 maps to "." -sub UnparseAlt { - my $num = shift; - if ($num == 0) { - return "."; - } else { - return Unparse($num); - } -} - -# Alternate pretty-printed form: 0 maps to "" -sub HtmlPrintNumber { - my $num = shift; - if ($num == 0) { - return ""; - } else { - return Unparse($num); - } -} - -# Return output units -sub Units { - if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { - if ($main::opt_inuse_objects || $main::opt_alloc_objects) { - return "objects"; - } else { - if ($main::opt_show_bytes) { - return "B"; - } else { - return "MB"; - } - } - } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { - return "seconds"; - } else { - return "samples"; - } -} - -##### Profile manipulation code ##### - -# Generate flattened profile: -# If count is charged to stack [a,b,c,d], in generated profile, -# it will be charged to [a] -sub FlatProfile { - my $profile = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - if ($#addrs >= 0) { - AddEntry($result, $addrs[0], $count); - } - } - return $result; -} - -# Generate cumulative profile: -# If count is charged to stack [a,b,c,d], in generated profile, -# it will be charged to [a], [b], [c], [d] -sub CumulativeProfile { - my $profile = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - foreach my $a (@addrs) { - AddEntry($result, $a, $count); - } - } - return $result; -} - -# If the second-youngest PC on the stack is always the same, returns -# that pc. Otherwise, returns undef. -sub IsSecondPcAlwaysTheSame { - my $profile = shift; - - my $second_pc = undef; - foreach my $k (keys(%{$profile})) { - my @addrs = split(/\n/, $k); - if ($#addrs < 1) { - return undef; - } - if (not defined $second_pc) { - $second_pc = $addrs[1]; - } else { - if ($second_pc ne $addrs[1]) { - return undef; - } - } - } - return $second_pc; -} - -sub ExtractSymbolLocation { - my $symbols = shift; - my $address = shift; - # 'addr2line' outputs "??:0" for unknown locations; we do the - # same to be consistent. - my $location = "??:0:unknown"; - if (exists $symbols->{$address}) { - my $file = $symbols->{$address}->[1]; - if ($file eq "?") { - $file = "??:0" - } - $location = $file . ":" . $symbols->{$address}->[0]; - } - return $location; -} - -# Extracts a graph of calls. -sub ExtractCalls { - my $symbols = shift; - my $profile = shift; - - my $calls = {}; - while( my ($stack_trace, $count) = each %$profile ) { - my @address = split(/\n/, $stack_trace); - my $destination = ExtractSymbolLocation($symbols, $address[0]); - AddEntry($calls, $destination, $count); - for (my $i = 1; $i <= $#address; $i++) { - my $source = ExtractSymbolLocation($symbols, $address[$i]); - my $call = "$source -> $destination"; - AddEntry($calls, $call, $count); - $destination = $source; - } - } - - return $calls; -} - -sub RemoveUninterestingFrames { - my $symbols = shift; - my $profile = shift; - - # List of function names to skip - my %skip = (); - my $skip_regexp = 'NOMATCH'; - if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { - foreach my $name ('calloc', - 'cfree', - 'malloc', - 'free', - 'memalign', - 'posix_memalign', - 'pvalloc', - 'valloc', - 'realloc', - 'tc_calloc', - 'tc_cfree', - 'tc_malloc', - 'tc_free', - 'tc_memalign', - 'tc_posix_memalign', - 'tc_pvalloc', - 'tc_valloc', - 'tc_realloc', - 'tc_new', - 'tc_delete', - 'tc_newarray', - 'tc_deletearray', - 'tc_new_nothrow', - 'tc_newarray_nothrow', - 'do_malloc', - '::do_malloc', # new name -- got moved to an unnamed ns - '::do_malloc_or_cpp_alloc', - 'DoSampledAllocation', - 'simple_alloc::allocate', - '__malloc_alloc_template::allocate', - '__builtin_delete', - '__builtin_new', - '__builtin_vec_delete', - '__builtin_vec_new', - 'operator new', - 'operator new[]', - # The entry to our memory-allocation routines on OS X - 'malloc_zone_malloc', - 'malloc_zone_calloc', - 'malloc_zone_valloc', - 'malloc_zone_realloc', - 'malloc_zone_memalign', - 'malloc_zone_free', - # These mark the beginning/end of our custom sections - '__start_google_malloc', - '__stop_google_malloc', - '__start_malloc_hook', - '__stop_malloc_hook') { - $skip{$name} = 1; - $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything - } - # TODO: Remove TCMalloc once everything has been - # moved into the tcmalloc:: namespace and we have flushed - # old code out of the system. - $skip_regexp = "TCMalloc|^tcmalloc::"; - } elsif ($main::profile_type eq 'contention') { - foreach my $vname ('base::RecordLockProfileData', - 'base::SubmitMutexProfileData', - 'base::SubmitSpinLockProfileData', - 'Mutex::Unlock', - 'Mutex::UnlockSlow', - 'Mutex::ReaderUnlock', - 'MutexLock::~MutexLock', - 'SpinLock::Unlock', - 'SpinLock::SlowUnlock', - 'SpinLockHolder::~SpinLockHolder') { - $skip{$vname} = 1; - } - } elsif ($main::profile_type eq 'cpu') { - # Drop signal handlers used for CPU profile collection - # TODO(dpeng): this should not be necessary; it's taken - # care of by the general 2nd-pc mechanism below. - foreach my $name ('ProfileData::Add', # historical - 'ProfileData::prof_handler', # historical - 'CpuProfiler::prof_handler', - '__FRAME_END__', - '__pthread_sighandler', - '__restore') { - $skip{$name} = 1; - } - } else { - # Nothing skipped for unknown types - } - - if ($main::profile_type eq 'cpu') { - # If all the second-youngest program counters are the same, - # this STRONGLY suggests that it is an artifact of measurement, - # i.e., stack frames pushed by the CPU profiler signal handler. - # Hence, we delete them. - # (The topmost PC is read from the signal structure, not from - # the stack, so it does not get involved.) - while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) { - my $result = {}; - my $func = ''; - if (exists($symbols->{$second_pc})) { - $second_pc = $symbols->{$second_pc}->[0]; - } - print STDERR "Removing $second_pc from all stack traces.\n"; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - splice @addrs, 1, 1; - my $reduced_path = join("\n", @addrs); - AddEntry($result, $reduced_path, $count); - } - $profile = $result; - } - } - - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - my @path = (); - foreach my $a (@addrs) { - if (exists($symbols->{$a})) { - my $func = $symbols->{$a}->[0]; - if ($skip{$func} || ($func =~ m/$skip_regexp/)) { - next; - } - } - push(@path, $a); - } - my $reduced_path = join("\n", @path); - AddEntry($result, $reduced_path, $count); - } - return $result; -} - -# Reduce profile to granularity given by user -sub ReduceProfile { - my $symbols = shift; - my $profile = shift; - my $result = {}; - my $fullname_to_shortname_map = {}; - FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); - my @path = (); - my %seen = (); - $seen{''} = 1; # So that empty keys are skipped - foreach my $e (@translated) { - # To avoid double-counting due to recursion, skip a stack-trace - # entry if it has already been seen - if (!$seen{$e}) { - $seen{$e} = 1; - push(@path, $e); - } - } - my $reduced_path = join("\n", @path); - AddEntry($result, $reduced_path, $count); - } - return $result; -} - -# Does the specified symbol array match the regexp? -sub SymbolMatches { - my $sym = shift; - my $re = shift; - if (defined($sym)) { - for (my $i = 0; $i < $#{$sym}; $i += 3) { - if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { - return 1; - } - } - } - return 0; -} - -# Focus only on paths involving specified regexps -sub FocusProfile { - my $symbols = shift; - my $profile = shift; - my $focus = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - foreach my $a (@addrs) { - # Reply if it matches either the address/shortname/fileline - if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { - AddEntry($result, $k, $count); - last; - } - } - } - return $result; -} - -# Focus only on paths not involving specified regexps -sub IgnoreProfile { - my $symbols = shift; - my $profile = shift; - my $ignore = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - my $matched = 0; - foreach my $a (@addrs) { - # Reply if it matches either the address/shortname/fileline - if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { - $matched = 1; - last; - } - } - if (!$matched) { - AddEntry($result, $k, $count); - } - } - return $result; -} - -# Get total count in profile -sub TotalProfile { - my $profile = shift; - my $result = 0; - foreach my $k (keys(%{$profile})) { - $result += $profile->{$k}; - } - return $result; -} - -# Add A to B -sub AddProfile { - my $A = shift; - my $B = shift; - - my $R = {}; - # add all keys in A - foreach my $k (keys(%{$A})) { - my $v = $A->{$k}; - AddEntry($R, $k, $v); - } - # add all keys in B - foreach my $k (keys(%{$B})) { - my $v = $B->{$k}; - AddEntry($R, $k, $v); - } - return $R; -} - -# Merges symbol maps -sub MergeSymbols { - my $A = shift; - my $B = shift; - - my $R = {}; - foreach my $k (keys(%{$A})) { - $R->{$k} = $A->{$k}; - } - if (defined($B)) { - foreach my $k (keys(%{$B})) { - $R->{$k} = $B->{$k}; - } - } - return $R; -} - - -# Add A to B -sub AddPcs { - my $A = shift; - my $B = shift; - - my $R = {}; - # add all keys in A - foreach my $k (keys(%{$A})) { - $R->{$k} = 1 - } - # add all keys in B - foreach my $k (keys(%{$B})) { - $R->{$k} = 1 - } - return $R; -} - -# Subtract B from A -sub SubtractProfile { - my $A = shift; - my $B = shift; - - my $R = {}; - foreach my $k (keys(%{$A})) { - my $v = $A->{$k} - GetEntry($B, $k); - if ($v < 0 && $main::opt_drop_negative) { - $v = 0; - } - AddEntry($R, $k, $v); - } - if (!$main::opt_drop_negative) { - # Take care of when subtracted profile has more entries - foreach my $k (keys(%{$B})) { - if (!exists($A->{$k})) { - AddEntry($R, $k, 0 - $B->{$k}); - } - } - } - return $R; -} - -# Get entry from profile; zero if not present -sub GetEntry { - my $profile = shift; - my $k = shift; - if (exists($profile->{$k})) { - return $profile->{$k}; - } else { - return 0; - } -} - -# Add entry to specified profile -sub AddEntry { - my $profile = shift; - my $k = shift; - my $n = shift; - if (!exists($profile->{$k})) { - $profile->{$k} = 0; - } - $profile->{$k} += $n; -} - -# Add a stack of entries to specified profile, and add them to the $pcs -# list. -sub AddEntries { - my $profile = shift; - my $pcs = shift; - my $stack = shift; - my $count = shift; - my @k = (); - - foreach my $e (split(/\s+/, $stack)) { - my $pc = HexExtend($e); - $pcs->{$pc} = 1; - push @k, $pc; - } - AddEntry($profile, (join "\n", @k), $count); -} - -##### Code to profile a server dynamically ##### - -sub CheckSymbolPage { - my $url = SymbolPageURL(); - my $command = ShellEscape(@URL_FETCHER, $url); - open(SYMBOL, "$command |") or error($command); - my $line = ; - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - close(SYMBOL); - unless (defined($line)) { - error("$url doesn't exist\n"); - } - - if ($line =~ /^num_symbols:\s+(\d+)$/) { - if ($1 == 0) { - error("Stripped binary. No symbols available.\n"); - } - } else { - error("Failed to get the number of symbols from $url\n"); - } -} - -sub IsProfileURL { - my $profile_name = shift; - if (-f $profile_name) { - printf STDERR "Using local file $profile_name.\n"; - return 0; - } - return 1; -} - -sub ParseProfileURL { - my $profile_name = shift; - - if (!defined($profile_name) || $profile_name eq "") { - return (); - } - - # Split profile URL - matches all non-empty strings, so no test. - $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; - - my $proto = $1 || "http://"; - my $hostport = $2; - my $prefix = $3; - my $profile = $4 || "/"; - - my $host = $hostport; - $host =~ s/:.*//; - - my $baseurl = "$proto$hostport$prefix"; - return ($host, $baseurl, $profile); -} - -# We fetch symbols from the first profile argument. -sub SymbolPageURL { - my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); - return "$baseURL$SYMBOL_PAGE"; -} - -sub FetchProgramName() { - my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); - my $url = "$baseURL$PROGRAM_NAME_PAGE"; - my $command_line = ShellEscape(@URL_FETCHER, $url); - open(CMDLINE, "$command_line |") or error($command_line); - my $cmdline = ; - $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines - close(CMDLINE); - error("Failed to get program name from $url\n") unless defined($cmdline); - $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. - $cmdline =~ s!\n!!g; # Remove LFs. - return $cmdline; -} - -# Gee, curl's -L (--location) option isn't reliable at least -# with its 7.12.3 version. Curl will forget to post data if -# there is a redirection. This function is a workaround for -# curl. Redirection happens on borg hosts. -sub ResolveRedirectionForCurl { - my $url = shift; - my $command_line = ShellEscape(@URL_FETCHER, "--head", $url); - open(CMDLINE, "$command_line |") or error($command_line); - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - if (/^Location: (.*)/) { - $url = $1; - } - } - close(CMDLINE); - return $url; -} - -# Add a timeout flat to URL_FETCHER. Returns a new list. -sub AddFetchTimeout { - my $timeout = shift; - my @fetcher = shift; - if (defined($timeout)) { - if (join(" ", @fetcher) =~ m/\bcurl -s/) { - push(@fetcher, "--max-time", sprintf("%d", $timeout)); - } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) { - push(@fetcher, sprintf("--deadline=%d", $timeout)); - } - } - return @fetcher; -} - -# Reads a symbol map from the file handle name given as $1, returning -# the resulting symbol map. Also processes variables relating to symbols. -# Currently, the only variable processed is 'binary=' which updates -# $main::prog to have the correct program name. -sub ReadSymbols { - my $in = shift; - my $map = {}; - while (<$in>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - # Removes all the leading zeroes from the symbols, see comment below. - if (m/^0x0*([0-9a-f]+)\s+(.+)/) { - $map->{$1} = $2; - } elsif (m/^---/) { - last; - } elsif (m/^([a-z][^=]*)=(.*)$/ ) { - my ($variable, $value) = ($1, $2); - for ($variable, $value) { - s/^\s+//; - s/\s+$//; - } - if ($variable eq "binary") { - if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) { - printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n", - $main::prog, $value); - } - $main::prog = $value; - } else { - printf STDERR ("Ignoring unknown variable in symbols list: " . - "'%s' = '%s'\n", $variable, $value); - } - } - } - return $map; -} - -# Fetches and processes symbols to prepare them for use in the profile output -# code. If the optional 'symbol_map' arg is not given, fetches symbols from -# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols -# are assumed to have already been fetched into 'symbol_map' and are simply -# extracted and processed. -sub FetchSymbols { - my $pcset = shift; - my $symbol_map = shift; - - my %seen = (); - my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq - - if (!defined($symbol_map)) { - my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); - - open(POSTFILE, ">$main::tmpfile_sym"); - print POSTFILE $post_data; - close(POSTFILE); - - my $url = SymbolPageURL(); - - my $command_line; - if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { - $url = ResolveRedirectionForCurl($url); - $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", - $url); - } else { - $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) - . " < " . ShellEscape($main::tmpfile_sym)); - } - # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. - my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"}); - open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line); - $symbol_map = ReadSymbols(*SYMBOL{IO}); - close(SYMBOL); - } - - my $symbols = {}; - foreach my $pc (@pcs) { - my $fullname; - # For 64 bits binaries, symbols are extracted with 8 leading zeroes. - # Then /symbol reads the long symbols in as uint64, and outputs - # the result with a "0x%08llx" format which get rid of the zeroes. - # By removing all the leading zeroes in both $pc and the symbols from - # /symbol, the symbols match and are retrievable from the map. - my $shortpc = $pc; - $shortpc =~ s/^0*//; - # Each line may have a list of names, which includes the function - # and also other functions it has inlined. They are separated (in - # PrintSymbolizedProfile), by --, which is illegal in function names. - my $fullnames; - if (defined($symbol_map->{$shortpc})) { - $fullnames = $symbol_map->{$shortpc}; - } else { - $fullnames = "0x" . $pc; # Just use addresses - } - my $sym = []; - $symbols->{$pc} = $sym; - foreach my $fullname (split("--", $fullnames)) { - my $name = ShortFunctionName($fullname); - push(@{$sym}, $name, "?", $fullname); - } - } - return $symbols; -} - -sub BaseName { - my $file_name = shift; - $file_name =~ s!^.*/!!; # Remove directory name - return $file_name; -} - -sub MakeProfileBaseName { - my ($binary_name, $profile_name) = @_; - my ($host, $baseURL, $path) = ParseProfileURL($profile_name); - my $binary_shortname = BaseName($binary_name); - return sprintf("%s.%s.%s", - $binary_shortname, $main::op_time, $host); -} - -sub FetchDynamicProfile { - my $binary_name = shift; - my $profile_name = shift; - my $fetch_name_only = shift; - my $encourage_patience = shift; - - if (!IsProfileURL($profile_name)) { - return $profile_name; - } else { - my ($host, $baseURL, $path) = ParseProfileURL($profile_name); - if ($path eq "" || $path eq "/") { - # Missing type specifier defaults to cpu-profile - $path = $PROFILE_PAGE; - } - - my $profile_file = MakeProfileBaseName($binary_name, $profile_name); - - my $url = "$baseURL$path"; - my $fetch_timeout = undef; - if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { - if ($path =~ m/[?]/) { - $url .= "&"; - } else { - $url .= "?"; - } - $url .= sprintf("seconds=%d", $main::opt_seconds); - $fetch_timeout = $main::opt_seconds * 1.01 + 60; - } else { - # For non-CPU profiles, we add a type-extension to - # the target profile file name. - my $suffix = $path; - $suffix =~ s,/,.,g; - $profile_file .= $suffix; - } - - my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof"); - if (! -d $profile_dir) { - mkdir($profile_dir) - || die("Unable to create profile directory $profile_dir: $!\n"); - } - my $tmp_profile = "$profile_dir/.tmp.$profile_file"; - my $real_profile = "$profile_dir/$profile_file"; - - if ($fetch_name_only > 0) { - return $real_profile; - } - - my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER); - my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile); - if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ - print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; - if ($encourage_patience) { - print STDERR "Be patient...\n"; - } - } else { - print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; - } - - (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); - (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n"); - print STDERR "Wrote profile to $real_profile\n"; - $main::collected_profile = $real_profile; - return $main::collected_profile; - } -} - -# Collect profiles in parallel -sub FetchDynamicProfiles { - my $items = scalar(@main::pfile_args); - my $levels = log($items) / log(2); - - if ($items == 1) { - $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1); - } else { - # math rounding issues - if ((2 ** $levels) < $items) { - $levels++; - } - my $count = scalar(@main::pfile_args); - for (my $i = 0; $i < $count; $i++) { - $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0); - } - print STDERR "Fetching $count profiles, Be patient...\n"; - FetchDynamicProfilesRecurse($levels, 0, 0); - $main::collected_profile = join(" \\\n ", @main::profile_files); - } -} - -# Recursively fork a process to get enough processes -# collecting profiles -sub FetchDynamicProfilesRecurse { - my $maxlevel = shift; - my $level = shift; - my $position = shift; - - if (my $pid = fork()) { - $position = 0 | ($position << 1); - TryCollectProfile($maxlevel, $level, $position); - wait; - } else { - $position = 1 | ($position << 1); - TryCollectProfile($maxlevel, $level, $position); - cleanup(); - exit(0); - } -} - -# Collect a single profile -sub TryCollectProfile { - my $maxlevel = shift; - my $level = shift; - my $position = shift; - - if ($level >= ($maxlevel - 1)) { - if ($position < scalar(@main::pfile_args)) { - FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0); - } - } else { - FetchDynamicProfilesRecurse($maxlevel, $level+1, $position); - } -} - -##### Parsing code ##### - -# Provide a small streaming-read module to handle very large -# cpu-profile files. Stream in chunks along a sliding window. -# Provides an interface to get one 'slot', correctly handling -# endian-ness differences. A slot is one 32-bit or 64-bit word -# (depending on the input profile). We tell endianness and bit-size -# for the profile by looking at the first 8 bytes: in cpu profiles, -# the second slot is always 3 (we'll accept anything that's not 0). -BEGIN { - package CpuProfileStream; - - sub new { - my ($class, $file, $fname) = @_; - my $self = { file => $file, - base => 0, - stride => 512 * 1024, # must be a multiple of bitsize/8 - slots => [], - unpack_code => "", # N for big-endian, V for little - perl_is_64bit => 1, # matters if profile is 64-bit - }; - bless $self, $class; - # Let unittests adjust the stride - if ($main::opt_test_stride > 0) { - $self->{stride} = $main::opt_test_stride; - } - # Read the first two slots to figure out bitsize and endianness. - my $slots = $self->{slots}; - my $str; - read($self->{file}, $str, 8); - # Set the global $address_length based on what we see here. - # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). - $address_length = ($str eq (chr(0)x8)) ? 16 : 8; - if ($address_length == 8) { - if (substr($str, 6, 2) eq chr(0)x2) { - $self->{unpack_code} = 'V'; # Little-endian. - } elsif (substr($str, 4, 2) eq chr(0)x2) { - $self->{unpack_code} = 'N'; # Big-endian - } else { - ::error("$fname: header size >= 2**16\n"); - } - @$slots = unpack($self->{unpack_code} . "*", $str); - } else { - # If we're a 64-bit profile, check if we're a 64-bit-capable - # perl. Otherwise, each slot will be represented as a float - # instead of an int64, losing precision and making all the - # 64-bit addresses wrong. We won't complain yet, but will - # later if we ever see a value that doesn't fit in 32 bits. - my $has_q = 0; - eval { $has_q = pack("Q", "1") ? 1 : 1; }; - if (!$has_q) { - $self->{perl_is_64bit} = 0; - } - read($self->{file}, $str, 8); - if (substr($str, 4, 4) eq chr(0)x4) { - # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. - $self->{unpack_code} = 'V'; # Little-endian. - } elsif (substr($str, 0, 4) eq chr(0)x4) { - $self->{unpack_code} = 'N'; # Big-endian - } else { - ::error("$fname: header size >= 2**32\n"); - } - my @pair = unpack($self->{unpack_code} . "*", $str); - # Since we know one of the pair is 0, it's fine to just add them. - @$slots = (0, $pair[0] + $pair[1]); - } - return $self; - } - - # Load more data when we access slots->get(X) which is not yet in memory. - sub overflow { - my ($self) = @_; - my $slots = $self->{slots}; - $self->{base} += $#$slots + 1; # skip over data we're replacing - my $str; - read($self->{file}, $str, $self->{stride}); - if ($address_length == 8) { # the 32-bit case - # This is the easy case: unpack provides 32-bit unpacking primitives. - @$slots = unpack($self->{unpack_code} . "*", $str); - } else { - # We need to unpack 32 bits at a time and combine. - my @b32_values = unpack($self->{unpack_code} . "*", $str); - my @b64_values = (); - for (my $i = 0; $i < $#b32_values; $i += 2) { - # TODO(csilvers): if this is a 32-bit perl, the math below - # could end up in a too-large int, which perl will promote - # to a double, losing necessary precision. Deal with that. - # Right now, we just die. - my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); - if ($self->{unpack_code} eq 'N') { # big-endian - ($lo, $hi) = ($hi, $lo); - } - my $value = $lo + $hi * (2**32); - if (!$self->{perl_is_64bit} && # check value is exactly represented - (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { - ::error("Need a 64-bit perl to process this 64-bit profile.\n"); - } - push(@b64_values, $value); - } - @$slots = @b64_values; - } - } - - # Access the i-th long in the file (logically), or -1 at EOF. - sub get { - my ($self, $idx) = @_; - my $slots = $self->{slots}; - while ($#$slots >= 0) { - if ($idx < $self->{base}) { - # The only time we expect a reference to $slots[$i - something] - # after referencing $slots[$i] is reading the very first header. - # Since $stride > |header|, that shouldn't cause any lookback - # errors. And everything after the header is sequential. - print STDERR "Unexpected look-back reading CPU profile"; - return -1; # shrug, don't know what better to return - } elsif ($idx > $self->{base} + $#$slots) { - $self->overflow(); - } else { - return $slots->[$idx - $self->{base}]; - } - } - # If we get here, $slots is [], which means we've reached EOF - return -1; # unique since slots is supposed to hold unsigned numbers - } -} - -# Reads the top, 'header' section of a profile, and returns the last -# line of the header, commonly called a 'header line'. The header -# section of a profile consists of zero or more 'command' lines that -# are instructions to pprof, which pprof executes when reading the -# header. All 'command' lines start with a %. After the command -# lines is the 'header line', which is a profile-specific line that -# indicates what type of profile it is, and perhaps other global -# information about the profile. For instance, here's a header line -# for a heap profile: -# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile -# For historical reasons, the CPU profile does not contain a text- -# readable header line. If the profile looks like a CPU profile, -# this function returns "". If no header line could be found, this -# function returns undef. -# -# The following commands are recognized: -# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:' -# -# The input file should be in binmode. -sub ReadProfileHeader { - local *PROFILE = shift; - my $firstchar = ""; - my $line = ""; - read(PROFILE, $firstchar, 1); - seek(PROFILE, -1, 1); # unread the firstchar - if ($firstchar !~ /[[:print:]]/) { # is not a text character - return ""; - } - while (defined($line = )) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - if ($line =~ /^%warn\s+(.*)/) { # 'warn' command - # Note this matches both '%warn blah\n' and '%warn\n'. - print STDERR "WARNING: $1\n"; # print the rest of the line - } elsif ($line =~ /^%/) { - print STDERR "Ignoring unknown command from profile header: $line"; - } else { - # End of commands, must be the header line. - return $line; - } - } - return undef; # got to EOF without seeing a header line -} - -sub IsSymbolizedProfileFile { - my $file_name = shift; - if (!(-e $file_name) || !(-r $file_name)) { - return 0; - } - # Check if the file contains a symbol-section marker. - open(TFILE, "<$file_name"); - binmode TFILE; - my $firstline = ReadProfileHeader(*TFILE); - close(TFILE); - if (!$firstline) { - return 0; - } - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - return $firstline =~ /^--- *$symbol_marker/; -} - -# Parse profile generated by common/profiler.cc and return a reference -# to a map: -# $result->{version} Version number of profile file -# $result->{period} Sampling period (in microseconds) -# $result->{profile} Profile object -# $result->{map} Memory map info from profile -# $result->{pcs} Hash of all PC values seen, key is hex address -sub ReadProfile { - my $prog = shift; - my $fname = shift; - my $result; # return value - - $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $contention_marker = $&; - $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $growth_marker = $&; - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $profile_marker = $&; - - # Look at first line to see if it is a heap or a CPU profile. - # CPU profile may start with no header at all, and just binary data - # (starting with \0\0\0\0) -- in that case, don't try to read the - # whole firstline, since it may be gigabytes(!) of data. - open(PROFILE, "<$fname") || error("$fname: $!\n"); - binmode PROFILE; # New perls do UTF-8 processing - my $header = ReadProfileHeader(*PROFILE); - if (!defined($header)) { # means "at EOF" - error("Profile is empty.\n"); - } - - my $symbols; - if ($header =~ m/^--- *$symbol_marker/o) { - # Verify that the user asked for a symbolized profile - if (!$main::use_symbolized_profile) { - # we have both a binary and symbolized profiles, abort - error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . - "a binary arg. Try again without passing\n $prog\n"); - } - # Read the symbol section of the symbolized profile file. - $symbols = ReadSymbols(*PROFILE{IO}); - # Read the next line to get the header for the remaining profile. - $header = ReadProfileHeader(*PROFILE) || ""; - } - - $main::profile_type = ''; - if ($header =~ m/^heap profile:.*$growth_marker/o) { - $main::profile_type = 'growth'; - $result = ReadHeapProfile($prog, *PROFILE, $header); - } elsif ($header =~ m/^heap profile:/) { - $main::profile_type = 'heap'; - $result = ReadHeapProfile($prog, *PROFILE, $header); - } elsif ($header =~ m/^--- *$contention_marker/o) { - $main::profile_type = 'contention'; - $result = ReadSynchProfile($prog, *PROFILE); - } elsif ($header =~ m/^--- *Stacks:/) { - print STDERR - "Old format contention profile: mistakenly reports " . - "condition variable signals as lock contentions.\n"; - $main::profile_type = 'contention'; - $result = ReadSynchProfile($prog, *PROFILE); - } elsif ($header =~ m/^--- *$profile_marker/) { - # the binary cpu profile data starts immediately after this line - $main::profile_type = 'cpu'; - $result = ReadCPUProfile($prog, $fname, *PROFILE); - } else { - if (defined($symbols)) { - # a symbolized profile contains a format we don't recognize, bail out - error("$fname: Cannot recognize profile section after symbols.\n"); - } - # no ascii header present -- must be a CPU profile - $main::profile_type = 'cpu'; - $result = ReadCPUProfile($prog, $fname, *PROFILE); - } - - close(PROFILE); - - # if we got symbols along with the profile, return those as well - if (defined($symbols)) { - $result->{symbols} = $symbols; - } - - return $result; -} - -# Subtract one from caller pc so we map back to call instr. -# However, don't do this if we're reading a symbolized profile -# file, in which case the subtract-one was done when the file -# was written. -# -# We apply the same logic to all readers, though ReadCPUProfile uses an -# independent implementation. -sub FixCallerAddresses { - my $stack = shift; - if ($main::use_symbolized_profile) { - return $stack; - } else { - $stack =~ /(\s)/; - my $delimiter = $1; - my @addrs = split(' ', $stack); - my @fixedaddrs; - $#fixedaddrs = $#addrs; - if ($#addrs >= 0) { - $fixedaddrs[0] = $addrs[0]; - } - for (my $i = 1; $i <= $#addrs; $i++) { - $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); - } - return join $delimiter, @fixedaddrs; - } -} - -# CPU profile reader -sub ReadCPUProfile { - my $prog = shift; - my $fname = shift; # just used for logging - local *PROFILE = shift; - my $version; - my $period; - my $i; - my $profile = {}; - my $pcs = {}; - - # Parse string into array of slots. - my $slots = CpuProfileStream->new(*PROFILE, $fname); - - # Read header. The current header version is a 5-element structure - # containing: - # 0: header count (always 0) - # 1: header "words" (after this one: 3) - # 2: format version (0) - # 3: sampling period (usec) - # 4: unused padding (always 0) - if ($slots->get(0) != 0 ) { - error("$fname: not a profile file, or old format profile file\n"); - } - $i = 2 + $slots->get(1); - $version = $slots->get(2); - $period = $slots->get(3); - # Do some sanity checking on these header values. - if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { - error("$fname: not a profile file, or corrupted profile file\n"); - } - - # Parse profile - while ($slots->get($i) != -1) { - my $n = $slots->get($i++); - my $d = $slots->get($i++); - if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? - my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); - print STDERR "At index $i (address $addr):\n"; - error("$fname: stack trace depth >= 2**32\n"); - } - if ($slots->get($i) == 0) { - # End of profile data marker - $i += $d; - last; - } - - # Make key out of the stack entries - my @k = (); - for (my $j = 0; $j < $d; $j++) { - my $pc = $slots->get($i+$j); - # Subtract one from caller pc so we map back to call instr. - # However, don't do this if we're reading a symbolized profile - # file, in which case the subtract-one was done when the file - # was written. - if ($j > 0 && !$main::use_symbolized_profile) { - $pc--; - } - $pc = sprintf("%0*x", $address_length, $pc); - $pcs->{$pc} = 1; - push @k, $pc; - } - - AddEntry($profile, (join "\n", @k), $n); - $i += $d; - } - - # Parse map - my $map = ''; - seek(PROFILE, $i * 4, 0); - read(PROFILE, $map, (stat PROFILE)[7]); - - my $r = {}; - $r->{version} = $version; - $r->{period} = $period; - $r->{profile} = $profile; - $r->{libs} = ParseLibraries($prog, $map, $pcs); - $r->{pcs} = $pcs; - - return $r; -} - -sub ReadHeapProfile { - my $prog = shift; - local *PROFILE = shift; - my $header = shift; - - my $index = 1; - if ($main::opt_inuse_space) { - $index = 1; - } elsif ($main::opt_inuse_objects) { - $index = 0; - } elsif ($main::opt_alloc_space) { - $index = 3; - } elsif ($main::opt_alloc_objects) { - $index = 2; - } - - # Find the type of this profile. The header line looks like: - # heap profile: 1246: 8800744 [ 1246: 8800744] @ /266053 - # There are two pairs , the first inuse objects/space, and the - # second allocated objects/space. This is followed optionally by a profile - # type, and if that is present, optionally by a sampling frequency. - # For remote heap profiles (v1): - # The interpretation of the sampling frequency is that the profiler, for - # each sample, calculates a uniformly distributed random integer less than - # the given value, and records the next sample after that many bytes have - # been allocated. Therefore, the expected sample interval is half of the - # given frequency. By default, if not specified, the expected sample - # interval is 128KB. Only remote-heap-page profiles are adjusted for - # sample size. - # For remote heap profiles (v2): - # The sampling frequency is the rate of a Poisson process. This means that - # the probability of sampling an allocation of size X with sampling rate Y - # is 1 - exp(-X/Y) - # For version 2, a typical header line might look like this: - # heap profile: 1922: 127792360 [ 1922: 127792360] @ _v2/524288 - # the trailing number (524288) is the sampling rate. (Version 1 showed - # double the 'rate' here) - my $sampling_algorithm = 0; - my $sample_adjustment = 0; - chomp($header); - my $type = "unknown"; - if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { - if (defined($6) && ($6 ne '')) { - $type = $6; - my $sample_period = $8; - # $type is "heapprofile" for profiles generated by the - # heap-profiler, and either "heap" or "heap_v2" for profiles - # generated by sampling directly within tcmalloc. It can also - # be "growth" for heap-growth profiles. The first is typically - # found for profiles generated locally, and the others for - # remote profiles. - if (($type eq "heapprofile") || ($type !~ /heap/) ) { - # No need to adjust for the sampling rate with heap-profiler-derived data - $sampling_algorithm = 0; - } elsif ($type =~ /_v2/) { - $sampling_algorithm = 2; # version 2 sampling - if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period); - } - } else { - $sampling_algorithm = 1; # version 1 sampling - if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period)/2; - } - } - } else { - # We detect whether or not this is a remote-heap profile by checking - # that the total-allocated stats ($n2,$s2) are exactly the - # same as the in-use stats ($n1,$s1). It is remotely conceivable - # that a non-remote-heap profile may pass this check, but it is hard - # to imagine how that could happen. - # In this case it's so old it's guaranteed to be remote-heap version 1. - my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); - if (($n1 == $n2) && ($s1 == $s2)) { - # This is likely to be a remote-heap based sample profile - $sampling_algorithm = 1; - } - } - } - - if ($sampling_algorithm > 0) { - # For remote-heap generated profiles, adjust the counts and sizes to - # account for the sample rate (we sample once every 128KB by default). - if ($sample_adjustment == 0) { - # Turn on profile adjustment. - $sample_adjustment = 128*1024; - print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; - } else { - printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", - $sample_adjustment); - } - if ($sampling_algorithm > 1) { - # We don't bother printing anything for the original version (version 1) - printf STDERR "Heap version $sampling_algorithm\n"; - } - } - - my $profile = {}; - my $pcs = {}; - my $map = ""; - - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - if (/^MAPPED_LIBRARIES:/) { - # Read the /proc/self/maps data - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - $map .= $_; - } - last; - } - - if (/^--- Memory map:/) { - # Read /proc/self/maps data as formatted by DumpAddressMap() - my $buildvar = ""; - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - # Parse "build=" specification if supplied - if (m/^\s*build=(.*)\n/) { - $buildvar = $1; - } - - # Expand "$build" variable if available - $_ =~ s/\$build\b/$buildvar/g; - - $map .= $_; - } - last; - } - - # Read entry of the form: - # : [: ] @ a1 a2 a3 ... an - s/^\s*//; - s/\s*$//; - if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { - my $stack = $5; - my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); - - if ($sample_adjustment) { - if ($sampling_algorithm == 2) { - # Remote-heap version 2 - # The sampling frequency is the rate of a Poisson process. - # This means that the probability of sampling an allocation of - # size X with sampling rate Y is 1 - exp(-X/Y) - if ($n1 != 0) { - my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n1 *= $scale_factor; - $s1 *= $scale_factor; - } - if ($n2 != 0) { - my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n2 *= $scale_factor; - $s2 *= $scale_factor; - } - } else { - # Remote-heap version 1 - my $ratio; - $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - if ($ratio < 1) { - $n1 /= $ratio; - $s1 /= $ratio; - } - $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - if ($ratio < 1) { - $n2 /= $ratio; - $s2 /= $ratio; - } - } - } - - my @counts = ($n1, $s1, $n2, $s2); - AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); - } - } - - my $r = {}; - $r->{version} = "heap"; - $r->{period} = 1; - $r->{profile} = $profile; - $r->{libs} = ParseLibraries($prog, $map, $pcs); - $r->{pcs} = $pcs; - return $r; -} - -sub ReadSynchProfile { - my $prog = shift; - local *PROFILE = shift; - my $header = shift; - - my $map = ''; - my $profile = {}; - my $pcs = {}; - my $sampling_period = 1; - my $cyclespernanosec = 2.8; # Default assumption for old binaries - my $seen_clockrate = 0; - my $line; - - my $index = 0; - if ($main::opt_total_delay) { - $index = 0; - } elsif ($main::opt_contentions) { - $index = 1; - } elsif ($main::opt_mean_delay) { - $index = 2; - } - - while ( $line = ) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { - my ($cycles, $count, $stack) = ($1, $2, $3); - - # Convert cycles to nanoseconds - $cycles /= $cyclespernanosec; - - # Adjust for sampling done by application - $cycles *= $sampling_period; - $count *= $sampling_period; - - my @values = ($cycles, $count, $cycles / $count); - AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); - - } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || - $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { - my ($cycles, $stack) = ($1, $2); - if ($cycles !~ /^\d+$/) { - next; - } - - # Convert cycles to nanoseconds - $cycles /= $cyclespernanosec; - - # Adjust for sampling done by application - $cycles *= $sampling_period; - - AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); - - } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { - my ($variable, $value) = ($1,$2); - for ($variable, $value) { - s/^\s+//; - s/\s+$//; - } - if ($variable eq "cycles/second") { - $cyclespernanosec = $value / 1e9; - $seen_clockrate = 1; - } elsif ($variable eq "sampling period") { - $sampling_period = $value; - } elsif ($variable eq "ms since reset") { - # Currently nothing is done with this value in pprof - # So we just silently ignore it for now - } elsif ($variable eq "discarded samples") { - # Currently nothing is done with this value in pprof - # So we just silently ignore it for now - } else { - printf STDERR ("Ignoring unnknown variable in /contention output: " . - "'%s' = '%s'\n",$variable,$value); - } - } else { - # Memory map entry - $map .= $line; - } - } - - if (!$seen_clockrate) { - printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", - $cyclespernanosec); - } - - my $r = {}; - $r->{version} = 0; - $r->{period} = $sampling_period; - $r->{profile} = $profile; - $r->{libs} = ParseLibraries($prog, $map, $pcs); - $r->{pcs} = $pcs; - return $r; -} - -# Given a hex value in the form "0x1abcd" or "1abcd", return either -# "0001abcd" or "000000000001abcd", depending on the current (global) -# address length. -sub HexExtend { - my $addr = shift; - - $addr =~ s/^(0x)?0*//; - my $zeros_needed = $address_length - length($addr); - if ($zeros_needed < 0) { - printf STDERR "Warning: address $addr is longer than address length $address_length\n"; - return $addr; - } - return ("0" x $zeros_needed) . $addr; -} - -##### Symbol extraction ##### - -# Aggressively search the lib_prefix values for the given library -# If all else fails, just return the name of the library unmodified. -# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so" -# it will search the following locations in this order, until it finds a file: -# /my/path/lib/dir/mylib.so -# /other/path/lib/dir/mylib.so -# /my/path/dir/mylib.so -# /other/path/dir/mylib.so -# /my/path/mylib.so -# /other/path/mylib.so -# /lib/dir/mylib.so (returned as last resort) -sub FindLibrary { - my $file = shift; - my $suffix = $file; - - # Search for the library as described above - do { - foreach my $prefix (@prefix_list) { - my $fullpath = $prefix . $suffix; - if (-e $fullpath) { - return $fullpath; - } - } - } while ($suffix =~ s|^/[^/]+/|/|); - return $file; -} - -# Return path to library with debugging symbols. -# For libc libraries, the copy in /usr/lib/debug contains debugging symbols -sub DebuggingLibrary { - my $file = shift; - if ($file =~ m|^/| && -f "/usr/lib/debug$file") { - return "/usr/lib/debug$file"; - } - return undef; -} - -# Parse text section header of a library using objdump -sub ParseTextSectionHeaderFromObjdump { - my $lib = shift; - - my $size = undef; - my $vma; - my $file_offset; - # Get objdump output from the library file to figure out how to - # map between mapped addresses and addresses in the library. - my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib); - open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - # Idx Name Size VMA LMA File off Algn - # 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4 - # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file - # offset may still be 8. But AddressSub below will still handle that. - my @x = split; - if (($#x >= 6) && ($x[1] eq '.text')) { - $size = $x[2]; - $vma = $x[3]; - $file_offset = $x[5]; - last; - } - } - close(OBJDUMP); - - if (!defined($size)) { - return undef; - } - - my $r = {}; - $r->{size} = $size; - $r->{vma} = $vma; - $r->{file_offset} = $file_offset; - - return $r; -} - -# Parse text section header of a library using otool (on OS X) -sub ParseTextSectionHeaderFromOtool { - my $lib = shift; - - my $size = undef; - my $vma = undef; - my $file_offset = undef; - # Get otool output from the library file to figure out how to - # map between mapped addresses and addresses in the library. - my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib); - open(OTOOL, "$command |") || error("$command: $!\n"); - my $cmd = ""; - my $sectname = ""; - my $segname = ""; - foreach my $line () { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - # Load command <#> - # cmd LC_SEGMENT - # [...] - # Section - # sectname __text - # segname __TEXT - # addr 0x000009f8 - # size 0x00018b9e - # offset 2552 - # align 2^2 (4) - # We will need to strip off the leading 0x from the hex addresses, - # and convert the offset into hex. - if ($line =~ /Load command/) { - $cmd = ""; - $sectname = ""; - $segname = ""; - } elsif ($line =~ /Section/) { - $sectname = ""; - $segname = ""; - } elsif ($line =~ /cmd (\w+)/) { - $cmd = $1; - } elsif ($line =~ /sectname (\w+)/) { - $sectname = $1; - } elsif ($line =~ /segname (\w+)/) { - $segname = $1; - } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && - $sectname eq "__text" && - $segname eq "__TEXT")) { - next; - } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { - $vma = $1; - } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) { - $size = $1; - } elsif ($line =~ /\boffset ([0-9]+)/) { - $file_offset = sprintf("%016x", $1); - } - if (defined($vma) && defined($size) && defined($file_offset)) { - last; - } - } - close(OTOOL); - - if (!defined($vma) || !defined($size) || !defined($file_offset)) { - return undef; - } - - my $r = {}; - $r->{size} = $size; - $r->{vma} = $vma; - $r->{file_offset} = $file_offset; - - return $r; -} - -sub ParseTextSectionHeader { - # obj_tool_map("otool") is only defined if we're in a Mach-O environment - if (defined($obj_tool_map{"otool"})) { - my $r = ParseTextSectionHeaderFromOtool(@_); - if (defined($r)){ - return $r; - } - } - # If otool doesn't work, or we don't have it, fall back to objdump - return ParseTextSectionHeaderFromObjdump(@_); -} - -# Split /proc/pid/maps dump into a list of libraries -sub ParseLibraries { - return if $main::use_symbol_page; # We don't need libraries info. - my $prog = shift; - my $map = shift; - my $pcs = shift; - - my $result = []; - my $h = "[a-f0-9]+"; - my $zero_offset = HexExtend("0"); - - my $buildvar = ""; - foreach my $l (split("\n", $map)) { - if ($l =~ m/^\s*build=(.*)$/) { - $buildvar = $1; - } - - my $start; - my $finish; - my $offset; - my $lib; - if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { - # Full line from /proc/self/maps. Example: - # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so - $start = HexExtend($1); - $finish = HexExtend($2); - $offset = HexExtend($3); - $lib = $4; - $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths - } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) { - # Cooked line from DumpAddressMap. Example: - # 40000000-40015000: /lib/ld-2.3.2.so - $start = HexExtend($1); - $finish = HexExtend($2); - $offset = $zero_offset; - $lib = $3; - } else { - next; - } - - # Expand "$build" variable if available - $lib =~ s/\$build\b/$buildvar/g; - - $lib = FindLibrary($lib); - - # Check for pre-relocated libraries, which use pre-relocated symbol tables - # and thus require adjusting the offset that we'll use to translate - # VM addresses into symbol table addresses. - # Only do this if we're not going to fetch the symbol table from a - # debugging copy of the library. - if (!DebuggingLibrary($lib)) { - my $text = ParseTextSectionHeader($lib); - if (defined($text)) { - my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); - $offset = AddressAdd($offset, $vma_offset); - } - } - - push(@{$result}, [$lib, $start, $finish, $offset]); - } - - # Append special entry for additional library (not relocated) - if ($main::opt_lib ne "") { - my $text = ParseTextSectionHeader($main::opt_lib); - if (defined($text)) { - my $start = $text->{vma}; - my $finish = AddressAdd($start, $text->{size}); - - push(@{$result}, [$main::opt_lib, $start, $finish, $start]); - } - } - - # Append special entry for the main program. This covers - # 0..max_pc_value_seen, so that we assume pc values not found in one - # of the library ranges will be treated as coming from the main - # program binary. - my $min_pc = HexExtend("0"); - my $max_pc = $min_pc; # find the maximal PC value in any sample - foreach my $pc (keys(%{$pcs})) { - if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); } - } - push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]); - - return $result; -} - -# Add two hex addresses of length $address_length. -# Run pprof --test for unit test if this is changed. -sub AddressAdd { - my $addr1 = shift; - my $addr2 = shift; - my $sum; - - if ($address_length == 8) { - # Perl doesn't cope with wraparound arithmetic, so do it explicitly: - $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16); - return sprintf("%08x", $sum); - - } else { - # Do the addition in 7-nibble chunks to trivialize carry handling. - - if ($main::opt_debug and $main::opt_test) { - print STDERR "AddressAdd $addr1 + $addr2 = "; - } - - my $a1 = substr($addr1,-7); - $addr1 = substr($addr1,0,-7); - my $a2 = substr($addr2,-7); - $addr2 = substr($addr2,0,-7); - $sum = hex($a1) + hex($a2); - my $c = 0; - if ($sum > 0xfffffff) { - $c = 1; - $sum -= 0x10000000; - } - my $r = sprintf("%07x", $sum); - - $a1 = substr($addr1,-7); - $addr1 = substr($addr1,0,-7); - $a2 = substr($addr2,-7); - $addr2 = substr($addr2,0,-7); - $sum = hex($a1) + hex($a2) + $c; - $c = 0; - if ($sum > 0xfffffff) { - $c = 1; - $sum -= 0x10000000; - } - $r = sprintf("%07x", $sum) . $r; - - $sum = hex($addr1) + hex($addr2) + $c; - if ($sum > 0xff) { $sum -= 0x100; } - $r = sprintf("%02x", $sum) . $r; - - if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; } - - return $r; - } -} - - -# Subtract two hex addresses of length $address_length. -# Run pprof --test for unit test if this is changed. -sub AddressSub { - my $addr1 = shift; - my $addr2 = shift; - my $diff; - - if ($address_length == 8) { - # Perl doesn't cope with wraparound arithmetic, so do it explicitly: - $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16); - return sprintf("%08x", $diff); - - } else { - # Do the addition in 7-nibble chunks to trivialize borrow handling. - # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; } - - my $a1 = hex(substr($addr1,-7)); - $addr1 = substr($addr1,0,-7); - my $a2 = hex(substr($addr2,-7)); - $addr2 = substr($addr2,0,-7); - my $b = 0; - if ($a2 > $a1) { - $b = 1; - $a1 += 0x10000000; - } - $diff = $a1 - $a2; - my $r = sprintf("%07x", $diff); - - $a1 = hex(substr($addr1,-7)); - $addr1 = substr($addr1,0,-7); - $a2 = hex(substr($addr2,-7)) + $b; - $addr2 = substr($addr2,0,-7); - $b = 0; - if ($a2 > $a1) { - $b = 1; - $a1 += 0x10000000; - } - $diff = $a1 - $a2; - $r = sprintf("%07x", $diff) . $r; - - $a1 = hex($addr1); - $a2 = hex($addr2) + $b; - if ($a2 > $a1) { $a1 += 0x100; } - $diff = $a1 - $a2; - $r = sprintf("%02x", $diff) . $r; - - # if ($main::opt_debug) { print STDERR "$r\n"; } - - return $r; - } -} - -# Increment a hex addresses of length $address_length. -# Run pprof --test for unit test if this is changed. -sub AddressInc { - my $addr = shift; - my $sum; - - if ($address_length == 8) { - # Perl doesn't cope with wraparound arithmetic, so do it explicitly: - $sum = (hex($addr)+1) % (0x10000000 * 16); - return sprintf("%08x", $sum); - - } else { - # Do the addition in 7-nibble chunks to trivialize carry handling. - # We are always doing this to step through the addresses in a function, - # and will almost never overflow the first chunk, so we check for this - # case and exit early. - - # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; } - - my $a1 = substr($addr,-7); - $addr = substr($addr,0,-7); - $sum = hex($a1) + 1; - my $r = sprintf("%07x", $sum); - if ($sum <= 0xfffffff) { - $r = $addr . $r; - # if ($main::opt_debug) { print STDERR "$r\n"; } - return HexExtend($r); - } else { - $r = "0000000"; - } - - $a1 = substr($addr,-7); - $addr = substr($addr,0,-7); - $sum = hex($a1) + 1; - $r = sprintf("%07x", $sum) . $r; - if ($sum <= 0xfffffff) { - $r = $addr . $r; - # if ($main::opt_debug) { print STDERR "$r\n"; } - return HexExtend($r); - } else { - $r = "00000000000000"; - } - - $sum = hex($addr) + 1; - if ($sum > 0xff) { $sum -= 0x100; } - $r = sprintf("%02x", $sum) . $r; - - # if ($main::opt_debug) { print STDERR "$r\n"; } - return $r; - } -} - -# Extract symbols for all PC values found in profile -sub ExtractSymbols { - my $libs = shift; - my $pcset = shift; - - my $symbols = {}; - - # Map each PC value to the containing library. To make this faster, - # we sort libraries by their starting pc value (highest first), and - # advance through the libraries as we advance the pc. Sometimes the - # addresses of libraries may overlap with the addresses of the main - # binary, so to make sure the libraries 'win', we iterate over the - # libraries in reverse order (which assumes the binary doesn't start - # in the middle of a library, which seems a fair assumption). - my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings - foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { - my $libname = $lib->[0]; - my $start = $lib->[1]; - my $finish = $lib->[2]; - my $offset = $lib->[3]; - - # Get list of pcs that belong in this library. - my $contained = []; - my ($start_pc_index, $finish_pc_index); - # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. - for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; - $finish_pc_index--) { - last if $pcs[$finish_pc_index - 1] le $finish; - } - # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. - for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; - $start_pc_index--) { - last if $pcs[$start_pc_index - 1] lt $start; - } - # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, - # in case there are overlaps in libraries and the main binary. - @{$contained} = splice(@pcs, $start_pc_index, - $finish_pc_index - $start_pc_index); - # Map to symbols - MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); - } - - return $symbols; -} - -# Map list of PC values to symbols for a given image -sub MapToSymbols { - my $image = shift; - my $offset = shift; - my $pclist = shift; - my $symbols = shift; - - my $debug = 0; - - # Ignore empty binaries - if ($#{$pclist} < 0) { return; } - - # Figure out the addr2line command to use - my $addr2line = $obj_tool_map{"addr2line"}; - my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image); - if (exists $obj_tool_map{"addr2line_pdb"}) { - $addr2line = $obj_tool_map{"addr2line_pdb"}; - $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image); - } - - # If "addr2line" isn't installed on the system at all, just use - # nm to get what info we can (function names, but not line numbers). - if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) { - MapSymbolsWithNM($image, $offset, $pclist, $symbols); - return; - } - - # "addr2line -i" can produce a variable number of lines per input - # address, with no separator that allows us to tell when data for - # the next address starts. So we find the address for a special - # symbol (_fini) and interleave this address between all real - # addresses passed to addr2line. The name of this special symbol - # can then be used as a separator. - $sep_address = undef; # May be filled in by MapSymbolsWithNM() - my $nm_symbols = {}; - MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); - if (defined($sep_address)) { - # Only add " -i" to addr2line if the binary supports it. - # addr2line --help returns 0, but not if it sees an unknown flag first. - if (system("$cmd -i --help >$dev_null 2>&1") == 0) { - $cmd .= " -i"; - } else { - $sep_address = undef; # no need for sep_address if we don't support -i - } - } - - # Make file with all PC values with intervening 'sep_address' so - # that we can reliably detect the end of inlined function list - open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); - if ($debug) { print("---- $image ---\n"); } - for (my $i = 0; $i <= $#{$pclist}; $i++) { - # addr2line always reads hex addresses, and does not need '0x' prefix. - if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } - printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); - if (defined($sep_address)) { - printf ADDRESSES ("%s\n", $sep_address); - } - } - close(ADDRESSES); - if ($debug) { - print("----\n"); - system("cat", $main::tmpfile_sym); - print("----\n"); - system("$cmd < " . ShellEscape($main::tmpfile_sym)); - print("----\n"); - } - - open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |") - || error("$cmd: $!\n"); - my $count = 0; # Index in pclist - while () { - # Read fullfunction and filelineinfo from next pair of lines - s/\r?\n$//g; - my $fullfunction = $_; - $_ = ; - s/\r?\n$//g; - my $filelinenum = $_; - - if (defined($sep_address) && $fullfunction eq $sep_symbol) { - # Terminating marker for data for this address - $count++; - next; - } - - $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths - - my $pcstr = $pclist->[$count]; - my $function = ShortFunctionName($fullfunction); - my $nms = $nm_symbols->{$pcstr}; - if (defined($nms)) { - if ($fullfunction eq '??') { - # nm found a symbol for us. - $function = $nms->[0]; - $fullfunction = $nms->[2]; - } else { - # MapSymbolsWithNM tags each routine with its starting address, - # useful in case the image has multiple occurrences of this - # routine. (It uses a syntax that resembles template paramters, - # that are automatically stripped out by ShortFunctionName().) - # addr2line does not provide the same information. So we check - # if nm disambiguated our symbol, and if so take the annotated - # (nm) version of the routine-name. TODO(csilvers): this won't - # catch overloaded, inlined symbols, which nm doesn't see. - # Better would be to do a check similar to nm's, in this fn. - if ($nms->[2] =~ m/^\Q$function\E/) { # sanity check it's the right fn - $function = $nms->[0]; - $fullfunction = $nms->[2]; - } - } - } - - # Prepend to accumulated symbols for pcstr - # (so that caller comes before callee) - my $sym = $symbols->{$pcstr}; - if (!defined($sym)) { - $sym = []; - $symbols->{$pcstr} = $sym; - } - unshift(@{$sym}, $function, $filelinenum, $fullfunction); - if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } - if (!defined($sep_address)) { - # Inlining is off, so this entry ends immediately - $count++; - } - } - close(SYMBOLS); -} - -# Use nm to map the list of referenced PCs to symbols. Return true iff we -# are able to read procedure information via nm. -sub MapSymbolsWithNM { - my $image = shift; - my $offset = shift; - my $pclist = shift; - my $symbols = shift; - - # Get nm output sorted by increasing address - my $symbol_table = GetProcedureBoundaries($image, "."); - if (!%{$symbol_table}) { - return 0; - } - # Start addresses are already the right length (8 or 16 hex digits). - my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] } - keys(%{$symbol_table}); - - if ($#names < 0) { - # No symbols: just use addresses - foreach my $pc (@{$pclist}) { - my $pcstr = "0x" . $pc; - $symbols->{$pc} = [$pcstr, "?", $pcstr]; - } - return 0; - } - - # Sort addresses so we can do a join against nm output - my $index = 0; - my $fullname = $names[0]; - my $name = ShortFunctionName($fullname); - foreach my $pc (sort { $a cmp $b } @{$pclist}) { - # Adjust for mapped offset - my $mpc = AddressSub($pc, $offset); - while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){ - $index++; - $fullname = $names[$index]; - $name = ShortFunctionName($fullname); - } - if ($mpc lt $symbol_table->{$fullname}->[1]) { - $symbols->{$pc} = [$name, "?", $fullname]; - } else { - my $pcstr = "0x" . $pc; - $symbols->{$pc} = [$pcstr, "?", $pcstr]; - } - } - return 1; -} - -sub ShortFunctionName { - my $function = shift; - while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types - while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments - $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type - return $function; -} - -# Trim overly long symbols found in disassembler output -sub CleanDisassembly { - my $d = shift; - while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) - while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments - return $d; -} - -# Clean file name for display -sub CleanFileName { - my ($f) = @_; - $f =~ s|^/proc/self/cwd/||; - $f =~ s|^\./||; - return $f; -} - -# Make address relative to section and clean up for display -sub UnparseAddress { - my ($offset, $address) = @_; - $address = AddressSub($address, $offset); - $address =~ s/^0x//; - $address =~ s/^0*//; - return $address; -} - -##### Miscellaneous ##### - -# Find the right versions of the above object tools to use. The -# argument is the program file being analyzed, and should be an ELF -# 32-bit or ELF 64-bit executable file. The location of the tools -# is determined by considering the following options in this order: -# 1) --tools option, if set -# 2) PPROF_TOOLS environment variable, if set -# 3) the environment -sub ConfigureObjTools { - my $prog_file = shift; - - # Check for the existence of $prog_file because /usr/bin/file does not - # predictably return error status in prod. - (-e $prog_file) || error("$prog_file does not exist.\n"); - - my $file_type = undef; - if (-e "/usr/bin/file") { - # Follow symlinks (at least for systems where "file" supports that). - my $escaped_prog_file = ShellEscape($prog_file); - $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null || - /usr/bin/file $escaped_prog_file`; - } elsif ($^O == "MSWin32") { - $file_type = "MS Windows"; - } else { - print STDERR "WARNING: Can't determine the file type of $prog_file"; - } - - if ($file_type =~ /64-bit/) { - # Change $address_length to 16 if the program file is ELF 64-bit. - # We can't detect this from many (most?) heap or lock contention - # profiles, since the actual addresses referenced are generally in low - # memory even for 64-bit programs. - $address_length = 16; - } - - if ($file_type =~ /MS Windows/) { - # For windows, we provide a version of nm and addr2line as part of - # the opensource release, which is capable of parsing - # Windows-style PDB executables. It should live in the path, or - # in the same directory as pprof. - $obj_tool_map{"nm_pdb"} = "nm-pdb"; - $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb"; - } - - if ($file_type =~ /Mach-O/) { - # OS X uses otool to examine Mach-O files, rather than objdump. - $obj_tool_map{"otool"} = "otool"; - $obj_tool_map{"addr2line"} = "false"; # no addr2line - $obj_tool_map{"objdump"} = "false"; # no objdump - } - - # Go fill in %obj_tool_map with the pathnames to use: - foreach my $tool (keys %obj_tool_map) { - $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}); - } -} - -# Returns the path of a caller-specified object tool. If --tools or -# PPROF_TOOLS are specified, then returns the full path to the tool -# with that prefix. Otherwise, returns the path unmodified (which -# means we will look for it on PATH). -sub ConfigureTool { - my $tool = shift; - my $path; - - # --tools (or $PPROF_TOOLS) is a comma separated list, where each - # item is either a) a pathname prefix, or b) a map of the form - # :. First we look for an entry of type (b) for our - # tool. If one is found, we use it. Otherwise, we consider all the - # pathname prefixes in turn, until one yields an existing file. If - # none does, we use a default path. - my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || ""; - if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) { - $path = $2; - # TODO(csilvers): sanity-check that $path exists? Hard if it's relative. - } elsif ($tools ne '') { - foreach my $prefix (split(',', $tools)) { - next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list - if (-x $prefix . $tool) { - $path = $prefix . $tool; - last; - } - } - if (!$path) { - error("No '$tool' found with prefix specified by " . - "--tools (or \$PPROF_TOOLS) '$tools'\n"); - } - } else { - # ... otherwise use the version that exists in the same directory as - # pprof. If there's nothing there, use $PATH. - $0 =~ m,[^/]*$,; # this is everything after the last slash - my $dirname = $`; # this is everything up to and including the last slash - if (-x "$dirname$tool") { - $path = "$dirname$tool"; - } else { - $path = $tool; - } - } - if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; } - return $path; -} - -sub ShellEscape { - my @escaped_words = (); - foreach my $word (@_) { - my $escaped_word = $word; - if ($word =~ m![^a-zA-Z0-9/.,_=-]!) { # check for anything not in whitelist - $escaped_word =~ s/'/'\\''/; - $escaped_word = "'$escaped_word'"; - } - push(@escaped_words, $escaped_word); - } - return join(" ", @escaped_words); -} - -sub cleanup { - unlink($main::tmpfile_sym); - unlink(keys %main::tempnames); - - # We leave any collected profiles in $HOME/pprof in case the user wants - # to look at them later. We print a message informing them of this. - if ((scalar(@main::profile_files) > 0) && - defined($main::collected_profile)) { - if (scalar(@main::profile_files) == 1) { - print STDERR "Dynamically gathered profile is in $main::collected_profile\n"; - } - print STDERR "If you want to investigate this profile further, you can do:\n"; - print STDERR "\n"; - print STDERR " pprof \\\n"; - print STDERR " $main::prog \\\n"; - print STDERR " $main::collected_profile\n"; - print STDERR "\n"; - } -} - -sub sighandler { - cleanup(); - exit(1); -} - -sub error { - my $msg = shift; - print STDERR $msg; - cleanup(); - exit(1); -} - - -# Run $nm_command and get all the resulting procedure boundaries whose -# names match "$regexp" and returns them in a hashtable mapping from -# procedure name to a two-element vector of [start address, end address] -sub GetProcedureBoundariesViaNm { - my $escaped_nm_command = shift; # shell-escaped - my $regexp = shift; - - my $symbol_table = {}; - open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n"); - my $last_start = "0"; - my $routine = ""; - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - if (m/^\s*([0-9a-f]+) (.) (..*)/) { - my $start_val = $1; - my $type = $2; - my $this_routine = $3; - - # It's possible for two symbols to share the same address, if - # one is a zero-length variable (like __start_google_malloc) or - # one symbol is a weak alias to another (like __libc_malloc). - # In such cases, we want to ignore all values except for the - # actual symbol, which in nm-speak has type "T". The logic - # below does this, though it's a bit tricky: what happens when - # we have a series of lines with the same address, is the first - # one gets queued up to be processed. However, it won't - # *actually* be processed until later, when we read a line with - # a different address. That means that as long as we're reading - # lines with the same address, we have a chance to replace that - # item in the queue, which we do whenever we see a 'T' entry -- - # that is, a line with type 'T'. If we never see a 'T' entry, - # we'll just go ahead and process the first entry (which never - # got touched in the queue), and ignore the others. - if ($start_val eq $last_start && $type =~ /t/i) { - # We are the 'T' symbol at this address, replace previous symbol. - $routine = $this_routine; - next; - } elsif ($start_val eq $last_start) { - # We're not the 'T' symbol at this address, so ignore us. - next; - } - - if ($this_routine eq $sep_symbol) { - $sep_address = HexExtend($start_val); - } - - # Tag this routine with the starting address in case the image - # has multiple occurrences of this routine. We use a syntax - # that resembles template paramters that are automatically - # stripped out by ShortFunctionName() - $this_routine .= "<$start_val>"; - - if (defined($routine) && $routine =~ m/$regexp/) { - $symbol_table->{$routine} = [HexExtend($last_start), - HexExtend($start_val)]; - } - $last_start = $start_val; - $routine = $this_routine; - } elsif (m/^Loaded image name: (.+)/) { - # The win32 nm workalike emits information about the binary it is using. - if ($main::opt_debug) { print STDERR "Using Image $1\n"; } - } elsif (m/^PDB file name: (.+)/) { - # The win32 nm workalike emits information about the pdb it is using. - if ($main::opt_debug) { print STDERR "Using PDB $1\n"; } - } - } - close(NM); - # Handle the last line in the nm output. Unfortunately, we don't know - # how big this last symbol is, because we don't know how big the file - # is. For now, we just give it a size of 0. - # TODO(csilvers): do better here. - if (defined($routine) && $routine =~ m/$regexp/) { - $symbol_table->{$routine} = [HexExtend($last_start), - HexExtend($last_start)]; - } - return $symbol_table; -} - -# Gets the procedure boundaries for all routines in "$image" whose names -# match "$regexp" and returns them in a hashtable mapping from procedure -# name to a two-element vector of [start address, end address]. -# Will return an empty map if nm is not installed or not working properly. -sub GetProcedureBoundaries { - my $image = shift; - my $regexp = shift; - - # If $image doesn't start with /, then put ./ in front of it. This works - # around an obnoxious bug in our probing of nm -f behavior. - # "nm -f $image" is supposed to fail on GNU nm, but if: - # - # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND - # b. you have a.out in your current directory (a not uncommon occurence) - # - # then "nm -f $image" succeeds because -f only looks at the first letter of - # the argument, which looks valid because it's [BbSsPp], and then since - # there's no image provided, it looks for a.out and finds it. - # - # This regex makes sure that $image starts with . or /, forcing the -f - # parsing to fail since . and / are not valid formats. - $image =~ s#^[^/]#./$&#; - - # For libc libraries, the copy in /usr/lib/debug contains debugging symbols - my $debugging = DebuggingLibrary($image); - if ($debugging) { - $image = $debugging; - } - - my $nm = $obj_tool_map{"nm"}; - my $cppfilt = $obj_tool_map{"c++filt"}; - - # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm - # binary doesn't support --demangle. In addition, for OS X we need - # to use the -f flag to get 'flat' nm output (otherwise we don't sort - # properly and get incorrect results). Unfortunately, GNU nm uses -f - # in an incompatible way. So first we test whether our nm supports - # --demangle and -f. - my $demangle_flag = ""; - my $cppfilt_flag = ""; - my $to_devnull = ">$dev_null 2>&1"; - if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) { - # In this mode, we do "nm --demangle " - $demangle_flag = "--demangle"; - $cppfilt_flag = ""; - } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) { - # In this mode, we do "nm | c++filt" - $cppfilt_flag = " | " . ShellEscape($cppfilt); - }; - my $flatten_flag = ""; - if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) { - $flatten_flag = "-f"; - } - - # Finally, in the case $imagie isn't a debug library, we try again with - # -D to at least get *exported* symbols. If we can't use --demangle, - # we use c++filt instead, if it exists on this system. - my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag, - $image) . " 2>$dev_null $cppfilt_flag", - ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag, - $image) . " 2>$dev_null $cppfilt_flag", - # 6nm is for Go binaries - ShellEscape("6nm", "$image") . " 2>$dev_null | sort", - ); - - # If the executable is an MS Windows PDB-format executable, we'll - # have set up obj_tool_map("nm_pdb"). In this case, we actually - # want to use both unix nm and windows-specific nm_pdb, since - # PDB-format executables can apparently include dwarf .o files. - if (exists $obj_tool_map{"nm_pdb"}) { - push(@nm_commands, - ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image) - . " 2>$dev_null"); - } - - foreach my $nm_command (@nm_commands) { - my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp); - return $symbol_table if (%{$symbol_table}); - } - my $symbol_table = {}; - return $symbol_table; -} - - -# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings. -# To make them more readable, we add underscores at interesting places. -# This routine removes the underscores, producing the canonical representation -# used by pprof to represent addresses, particularly in the tested routines. -sub CanonicalHex { - my $arg = shift; - return join '', (split '_',$arg); -} - - -# Unit test for AddressAdd: -sub AddressAddUnitTest { - my $test_data_8 = shift; - my $test_data_16 = shift; - my $error_count = 0; - my $fail_count = 0; - my $pass_count = 0; - # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n"; - - # First a few 8-nibble addresses. Note that this implementation uses - # plain old arithmetic, so a quick sanity check along with verifying what - # happens to overflow (we want it to wrap): - $address_length = 8; - foreach my $row (@{$test_data_8}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressAdd ($row->[0], $row->[1]); - if ($sum ne $row->[2]) { - printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, - $row->[0], $row->[1], $row->[2]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count = $fail_count; - $fail_count = 0; - $pass_count = 0; - - # Now 16-nibble addresses. - $address_length = 16; - foreach my $row (@{$test_data_16}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1])); - my $expected = join '', (split '_',$row->[2]); - if ($sum ne CanonicalHex($row->[2])) { - printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, - $row->[0], $row->[1], $row->[2]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count += $fail_count; - - return $error_count; -} - - -# Unit test for AddressSub: -sub AddressSubUnitTest { - my $test_data_8 = shift; - my $test_data_16 = shift; - my $error_count = 0; - my $fail_count = 0; - my $pass_count = 0; - # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n"; - - # First a few 8-nibble addresses. Note that this implementation uses - # plain old arithmetic, so a quick sanity check along with verifying what - # happens to overflow (we want it to wrap): - $address_length = 8; - foreach my $row (@{$test_data_8}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressSub ($row->[0], $row->[1]); - if ($sum ne $row->[3]) { - printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, - $row->[0], $row->[1], $row->[3]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count = $fail_count; - $fail_count = 0; - $pass_count = 0; - - # Now 16-nibble addresses. - $address_length = 16; - foreach my $row (@{$test_data_16}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1])); - if ($sum ne CanonicalHex($row->[3])) { - printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, - $row->[0], $row->[1], $row->[3]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count += $fail_count; - - return $error_count; -} - - -# Unit test for AddressInc: -sub AddressIncUnitTest { - my $test_data_8 = shift; - my $test_data_16 = shift; - my $error_count = 0; - my $fail_count = 0; - my $pass_count = 0; - # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n"; - - # First a few 8-nibble addresses. Note that this implementation uses - # plain old arithmetic, so a quick sanity check along with verifying what - # happens to overflow (we want it to wrap): - $address_length = 8; - foreach my $row (@{$test_data_8}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressInc ($row->[0]); - if ($sum ne $row->[4]) { - printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, - $row->[0], $row->[4]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count = $fail_count; - $fail_count = 0; - $pass_count = 0; - - # Now 16-nibble addresses. - $address_length = 16; - foreach my $row (@{$test_data_16}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressInc (CanonicalHex($row->[0])); - if ($sum ne CanonicalHex($row->[4])) { - printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, - $row->[0], $row->[4]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count += $fail_count; - - return $error_count; -} - - -# Driver for unit tests. -# Currently just the address add/subtract/increment routines for 64-bit. -sub RunUnitTests { - my $error_count = 0; - - # This is a list of tuples [a, b, a+b, a-b, a+1] - my $unit_test_data_8 = [ - [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)], - [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)], - [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)], - [qw(00000001 ffffffff 00000000 00000002 00000002)], - [qw(00000001 fffffff0 fffffff1 00000011 00000002)], - ]; - my $unit_test_data_16 = [ - # The implementation handles data in 7-nibble chunks, so those are the - # interesting boundaries. - [qw(aaaaaaaa 50505050 - 00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)], - [qw(50505050 aaaaaaaa - 00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)], - [qw(ffffffff aaaaaaaa - 00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)], - [qw(00000001 ffffffff - 00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)], - [qw(00000001 fffffff0 - 00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)], - - [qw(00_a00000a_aaaaaaa 50505050 - 00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)], - [qw(0f_fff0005_0505050 aaaaaaaa - 0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)], - [qw(00_000000f_fffffff 01_800000a_aaaaaaa - 01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)], - [qw(00_0000000_0000001 ff_fffffff_fffffff - 00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)], - [qw(00_0000000_0000001 ff_fffffff_ffffff0 - ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)], - ]; - - $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16); - $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16); - $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16); - if ($error_count > 0) { - print STDERR $error_count, " errors: FAILED\n"; - } else { - print STDERR "PASS\n"; - } - exit ($error_count); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/ChangeLog mariadb-5.5-5.5.40/extra/jemalloc/ChangeLog --- mariadb-5.5-5.5.39/extra/jemalloc/ChangeLog 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/ChangeLog 1970-01-01 00:00:00.000000000 +0000 @@ -1,425 +0,0 @@ -Following are change highlights associated with official releases. Important -bug fixes are all mentioned, but internal enhancements are omitted here for -brevity (even though they are more fun to write about). Much more detail can be -found in the git revision history: - - http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git - git://canonware.com/jemalloc.git - -* 3.3.1 (March 6, 2013) - - This version fixes bugs that are typically encountered only when utilizing - custom run-time options. - - Bug fixes: - - Fix a locking order bug that could cause deadlock during fork if heap - profiling were enabled. - - Fix a chunk recycling bug that could cause the allocator to lose track of - whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause - corruption if allocating via sbrk(2) (unlikely unless running with the - "dss:primary" option specified). This was completely harmless on Linux - unless using mlockall(2) (and unlikely even then, unless the - --disable-munmap configure option or the "dss:primary" option was - specified). This regression was introduced in 3.1.0 by the - mlockall(2)/madvise(2) interaction fix. - - Fix TLS-related memory corruption that could occur during thread exit if the - thread never allocated memory. Only the quarantine and prof facilities were - susceptible. - - Fix two quarantine bugs: - + Internal reallocation of the quarantined object array leaked the old - array. - + Reallocation failure for internal reallocation of the quarantined object - array (very unlikely) resulted in memory corruption. - - Fix Valgrind integration to annotate all internally allocated memory in a - way that keeps Valgrind happy about internal data structure access. - - Fix building for s390 systems. - -* 3.3.0 (January 23, 2013) - - This version includes a few minor performance improvements in addition to the - listed new features and bug fixes. - - New features: - - Add clipping support to lg_chunk option processing. - - Add the --enable-ivsalloc option. - - Add the --without-export option. - - Add the --disable-zone-allocator option. - - Bug fixes: - - Fix "arenas.extend" mallctl to output the number of arenas. - - Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory - is undefined. - - Fix build break on FreeBSD related to alloca.h. - -* 3.2.0 (November 9, 2012) - - In addition to a couple of bug fixes, this version modifies page run - allocation and dirty page purging algorithms in order to better control - page-level virtual memory fragmentation. - - Incompatible changes: - - Change the "opt.lg_dirty_mult" default from 5 to 3 (32:1 to 8:1). - - Bug fixes: - - Fix dss/mmap allocation precedence code to use recyclable mmap memory only - after primary dss allocation fails. - - Fix deadlock in the "arenas.purge" mallctl. This regression was introduced - in 3.1.0 by the addition of the "arena..purge" mallctl. - -* 3.1.0 (October 16, 2012) - - New features: - - Auto-detect whether running inside Valgrind, thus removing the need to - manually specify MALLOC_CONF=valgrind:true. - - Add the "arenas.extend" mallctl, which allows applications to create - manually managed arenas. - - Add the ALLOCM_ARENA() flag for {,r,d}allocm(). - - Add the "opt.dss", "arena..dss", and "stats.arenas..dss" mallctls, - which provide control over dss/mmap precedence. - - Add the "arena..purge" mallctl, which obsoletes "arenas.purge". - - Define LG_QUANTUM for hppa. - - Incompatible changes: - - Disable tcache by default if running inside Valgrind, in order to avoid - making unallocated objects appear reachable to Valgrind. - - Drop const from malloc_usable_size() argument on Linux. - - Bug fixes: - - Fix heap profiling crash if sampled object is freed via realloc(p, 0). - - Remove const from __*_hook variable declarations, so that glibc can modify - them during process forking. - - Fix mlockall(2)/madvise(2) interaction. - - Fix fork(2)-related deadlocks. - - Fix error return value for "thread.tcache.enabled" mallctl. - -* 3.0.0 (May 11, 2012) - - Although this version adds some major new features, the primary focus is on - internal code cleanup that facilitates maintainability and portability, most - of which is not reflected in the ChangeLog. This is the first release to - incorporate substantial contributions from numerous other developers, and the - result is a more broadly useful allocator (see the git revision history for - contribution details). Note that the license has been unified, thanks to - Facebook granting a license under the same terms as the other copyright - holders (see COPYING). - - New features: - - Implement Valgrind support, redzones, and quarantine. - - Add support for additional platforms: - + FreeBSD - + Mac OS X Lion - + MinGW - + Windows (no support yet for replacing the system malloc) - - Add support for additional architectures: - + MIPS - + SH4 - + Tilera - - Add support for cross compiling. - - Add nallocm(), which rounds a request size up to the nearest size class - without actually allocating. - - Implement aligned_alloc() (blame C11). - - Add the "thread.tcache.enabled" mallctl. - - Add the "opt.prof_final" mallctl. - - Update pprof (from gperftools 2.0). - - Add the --with-mangling option. - - Add the --disable-experimental option. - - Add the --disable-munmap option, and make it the default on Linux. - - Add the --enable-mremap option, which disables use of mremap(2) by default. - - Incompatible changes: - - Enable stats by default. - - Enable fill by default. - - Disable lazy locking by default. - - Rename the "tcache.flush" mallctl to "thread.tcache.flush". - - Rename the "arenas.pagesize" mallctl to "arenas.page". - - Change the "opt.lg_prof_sample" default from 0 to 19 (1 B to 512 KiB). - - Change the "opt.prof_accum" default from true to false. - - Removed features: - - Remove the swap feature, including the "config.swap", "swap.avail", - "swap.prezeroed", "swap.nfds", and "swap.fds" mallctls. - - Remove highruns statistics, including the - "stats.arenas..bins..highruns" and - "stats.arenas..lruns..highruns" mallctls. - - As part of small size class refactoring, remove the "opt.lg_[qc]space_max", - "arenas.cacheline", "arenas.subpage", "arenas.[tqcs]space_{min,max}", and - "arenas.[tqcs]bins" mallctls. - - Remove the "arenas.chunksize" mallctl. - - Remove the "opt.lg_prof_tcmax" option. - - Remove the "opt.lg_prof_bt_max" option. - - Remove the "opt.lg_tcache_gc_sweep" option. - - Remove the --disable-tiny option, including the "config.tiny" mallctl. - - Remove the --enable-dynamic-page-shift configure option. - - Remove the --enable-sysv configure option. - - Bug fixes: - - Fix a statistics-related bug in the "thread.arena" mallctl that could cause - invalid statistics and crashes. - - Work around TLS deallocation via free() on Linux. This bug could cause - write-after-free memory corruption. - - Fix a potential deadlock that could occur during interval- and - growth-triggered heap profile dumps. - - Fix large calloc() zeroing bugs due to dropping chunk map unzeroed flags. - - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could - cause memory corruption and crashes with --enable-dss specified. - - Fix fork-related bugs that could cause deadlock in children between fork - and exec. - - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. - - Fix realloc(p, 0) to act like free(p). - - Do not enforce minimum alignment in memalign(). - - Check for NULL pointer in malloc_usable_size(). - - Fix an off-by-one heap profile statistics bug that could be observed in - interval- and growth-triggered heap profiles. - - Fix the "epoch" mallctl to update cached stats even if the passed in epoch - is 0. - - Fix bin->runcur management to fix a layout policy bug. This bug did not - affect correctness. - - Fix a bug in choose_arena_hard() that potentially caused more arenas to be - initialized than necessary. - - Add missing "opt.lg_tcache_max" mallctl implementation. - - Use glibc allocator hooks to make mixed allocator usage less likely. - - Fix build issues for --disable-tcache. - - Don't mangle pthread_create() when --with-private-namespace is specified. - -* 2.2.5 (November 14, 2011) - - Bug fixes: - - Fix huge_ralloc() race when using mremap(2). This is a serious bug that - could cause memory corruption and/or crashes. - - Fix huge_ralloc() to maintain chunk statistics. - - Fix malloc_stats_print(..., "a") output. - -* 2.2.4 (November 5, 2011) - - Bug fixes: - - Initialize arenas_tsd before using it. This bug existed for 2.2.[0-3], as - well as for --disable-tls builds in earlier releases. - - Do not assume a 4 KiB page size in test/rallocm.c. - -* 2.2.3 (August 31, 2011) - - This version fixes numerous bugs related to heap profiling. - - Bug fixes: - - Fix a prof-related race condition. This bug could cause memory corruption, - but only occurred in non-default configurations (prof_accum:false). - - Fix off-by-one backtracing issues (make sure that prof_alloc_prep() is - excluded from backtraces). - - Fix a prof-related bug in realloc() (only triggered by OOM errors). - - Fix prof-related bugs in allocm() and rallocm(). - - Fix prof_tdata_cleanup() for --disable-tls builds. - - Fix a relative include path, to fix objdir builds. - -* 2.2.2 (July 30, 2011) - - Bug fixes: - - Fix a build error for --disable-tcache. - - Fix assertions in arena_purge() (for real this time). - - Add the --with-private-namespace option. This is a workaround for symbol - conflicts that can inadvertently arise when using static libraries. - -* 2.2.1 (March 30, 2011) - - Bug fixes: - - Implement atomic operations for x86/x64. This fixes compilation failures - for versions of gcc that are still in wide use. - - Fix an assertion in arena_purge(). - -* 2.2.0 (March 22, 2011) - - This version incorporates several improvements to algorithms and data - structures that tend to reduce fragmentation and increase speed. - - New features: - - Add the "stats.cactive" mallctl. - - Update pprof (from google-perftools 1.7). - - Improve backtracing-related configuration logic, and add the - --disable-prof-libgcc option. - - Bug fixes: - - Change default symbol visibility from "internal", to "hidden", which - decreases the overhead of library-internal function calls. - - Fix symbol visibility so that it is also set on OS X. - - Fix a build dependency regression caused by the introduction of the .pic.o - suffix for PIC object files. - - Add missing checks for mutex initialization failures. - - Don't use libgcc-based backtracing except on x64, where it is known to work. - - Fix deadlocks on OS X that were due to memory allocation in - pthread_mutex_lock(). - - Heap profiling-specific fixes: - + Fix memory corruption due to integer overflow in small region index - computation, when using a small enough sample interval that profiling - context pointers are stored in small run headers. - + Fix a bootstrap ordering bug that only occurred with TLS disabled. - + Fix a rallocm() rsize bug. - + Fix error detection bugs for aligned memory allocation. - -* 2.1.3 (March 14, 2011) - - Bug fixes: - - Fix a cpp logic regression (due to the "thread.{de,}allocatedp" mallctl fix - for OS X in 2.1.2). - - Fix a "thread.arena" mallctl bug. - - Fix a thread cache stats merging bug. - -* 2.1.2 (March 2, 2011) - - Bug fixes: - - Fix "thread.{de,}allocatedp" mallctl for OS X. - - Add missing jemalloc.a to build system. - -* 2.1.1 (January 31, 2011) - - Bug fixes: - - Fix aligned huge reallocation (affected allocm()). - - Fix the ALLOCM_LG_ALIGN macro definition. - - Fix a heap dumping deadlock. - - Fix a "thread.arena" mallctl bug. - -* 2.1.0 (December 3, 2010) - - This version incorporates some optimizations that can't quite be considered - bug fixes. - - New features: - - Use Linux's mremap(2) for huge object reallocation when possible. - - Avoid locking in mallctl*() when possible. - - Add the "thread.[de]allocatedp" mallctl's. - - Convert the manual page source from roff to DocBook, and generate both roff - and HTML manuals. - - Bug fixes: - - Fix a crash due to incorrect bootstrap ordering. This only impacted - --enable-debug --enable-dss configurations. - - Fix a minor statistics bug for mallctl("swap.avail", ...). - -* 2.0.1 (October 29, 2010) - - Bug fixes: - - Fix a race condition in heap profiling that could cause undefined behavior - if "opt.prof_accum" were disabled. - - Add missing mutex unlocks for some OOM error paths in the heap profiling - code. - - Fix a compilation error for non-C99 builds. - -* 2.0.0 (October 24, 2010) - - This version focuses on the experimental *allocm() API, and on improved - run-time configuration/introspection. Nonetheless, numerous performance - improvements are also included. - - New features: - - Implement the experimental {,r,s,d}allocm() API, which provides a superset - of the functionality available via malloc(), calloc(), posix_memalign(), - realloc(), malloc_usable_size(), and free(). These functions can be used to - allocate/reallocate aligned zeroed memory, ask for optional extra memory - during reallocation, prevent object movement during reallocation, etc. - - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is - more human-readable, and more flexible. For example: - JEMALLOC_OPTIONS=AJP - is now: - MALLOC_CONF=abort:true,fill:true,stats_print:true - - Port to Apple OS X. Sponsored by Mozilla. - - Make it possible for the application to control thread-->arena mappings via - the "thread.arena" mallctl. - - Add compile-time support for all TLS-related functionality via pthreads TSD. - This is mainly of interest for OS X, which does not support TLS, but has a - TSD implementation with similar performance. - - Override memalign() and valloc() if they are provided by the system. - - Add the "arenas.purge" mallctl, which can be used to synchronously purge all - dirty unused pages. - - Make cumulative heap profiling data optional, so that it is possible to - limit the amount of memory consumed by heap profiling data structures. - - Add per thread allocation counters that can be accessed via the - "thread.allocated" and "thread.deallocated" mallctls. - - Incompatible changes: - - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above). - - Increase default backtrace depth from 4 to 128 for heap profiling. - - Disable interval-based profile dumps by default. - - Bug fixes: - - Remove bad assertions in fork handler functions. These assertions could - cause aborts for some combinations of configure settings. - - Fix strerror_r() usage to deal with non-standard semantics in GNU libc. - - Fix leak context reporting. This bug tended to cause the number of contexts - to be underreported (though the reported number of objects and bytes were - correct). - - Fix a realloc() bug for large in-place growing reallocation. This bug could - cause memory corruption, but it was hard to trigger. - - Fix an allocation bug for small allocations that could be triggered if - multiple threads raced to create a new run of backing pages. - - Enhance the heap profiler to trigger samples based on usable size, rather - than request size. - - Fix a heap profiling bug due to sometimes losing track of requested object - size for sampled objects. - -* 1.0.3 (August 12, 2010) - - Bug fixes: - - Fix the libunwind-based implementation of stack backtracing (used for heap - profiling). This bug could cause zero-length backtraces to be reported. - - Add a missing mutex unlock in library initialization code. If multiple - threads raced to initialize malloc, some of them could end up permanently - blocked. - -* 1.0.2 (May 11, 2010) - - Bug fixes: - - Fix junk filling of large objects, which could cause memory corruption. - - Add MAP_NORESERVE support for chunk mapping, because otherwise virtual - memory limits could cause swap file configuration to fail. Contributed by - Jordan DeLong. - -* 1.0.1 (April 14, 2010) - - Bug fixes: - - Fix compilation when --enable-fill is specified. - - Fix threads-related profiling bugs that affected accuracy and caused memory - to be leaked during thread exit. - - Fix dirty page purging race conditions that could cause crashes. - - Fix crash in tcache flushing code during thread destruction. - -* 1.0.0 (April 11, 2010) - - This release focuses on speed and run-time introspection. Numerous - algorithmic improvements make this release substantially faster than its - predecessors. - - New features: - - Implement autoconf-based configuration system. - - Add mallctl*(), for the purposes of introspection and run-time - configuration. - - Make it possible for the application to manually flush a thread's cache, via - the "tcache.flush" mallctl. - - Base maximum dirty page count on proportion of active memory. - - Compute various addtional run-time statistics, including per size class - statistics for large objects. - - Expose malloc_stats_print(), which can be called repeatedly by the - application. - - Simplify the malloc_message() signature to only take one string argument, - and incorporate an opaque data pointer argument for use by the application - in combination with malloc_stats_print(). - - Add support for allocation backed by one or more swap files, and allow the - application to disable over-commit if swap files are in use. - - Implement allocation profiling and leak checking. - - Removed features: - - Remove the dynamic arena rebalancing code, since thread-specific caching - reduces its utility. - - Bug fixes: - - Modify chunk allocation to work when address space layout randomization - (ASLR) is in use. - - Fix thread cleanup bugs related to TLS destruction. - - Handle 0-size allocation requests in posix_memalign(). - - Fix a chunk leak. The leaked chunks were never touched, so this impacted - virtual memory usage, but not physical memory usage. - -* linux_2008082[78]a (August 27/28, 2008) - - These snapshot releases are the simple result of incorporating Linux-specific - support into the FreeBSD malloc sources. - --------------------------------------------------------------------------------- -vim:filetype=text:textwidth=80 diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/config.guess mariadb-5.5-5.5.40/extra/jemalloc/config.guess --- mariadb-5.5-5.5.39/extra/jemalloc/config.guess 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/config.guess 1970-01-01 00:00:00.000000000 +0000 @@ -1,1530 +0,0 @@ -#! /bin/sh -# Attempt to guess a canonical system name. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -# 2011, 2012 Free Software Foundation, Inc. - -timestamp='2012-02-10' - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see . -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - - -# Originally written by Per Bothner. Please send patches (context -# diff format) to and include a ChangeLog -# entry. -# -# This script attempts to guess a canonical system name similar to -# config.sub. If it succeeds, it prints the system name on stdout, and -# exits with 0. Otherwise, it exits with 1. -# -# You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] - -Output the configuration name of the system \`$me' is run on. - -Operation modes: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to ." - -version="\ -GNU config.guess ($timestamp) - -Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 -Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit ;; - --version | -v ) - echo "$version" ; exit ;; - --help | --h* | -h ) - echo "$usage"; exit ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" >&2 - exit 1 ;; - * ) - break ;; - esac -done - -if test $# != 0; then - echo "$me: too many arguments$help" >&2 - exit 1 -fi - -trap 'exit 1' 1 2 15 - -# CC_FOR_BUILD -- compiler used by this script. Note that the use of a -# compiler to aid in system detection is discouraged as it requires -# temporary files to be created and, as you can see below, it is a -# headache to deal with in a portable fashion. - -# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still -# use `HOST_CC' if defined, but it is deprecated. - -# Portable tmp directory creation inspired by the Autoconf team. - -set_cc_for_build=' -trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; -trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; -: ${TMPDIR=/tmp} ; - { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || - { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || - { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || - { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; -dummy=$tmp/dummy ; -tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; -case $CC_FOR_BUILD,$HOST_CC,$CC in - ,,) echo "int x;" > $dummy.c ; - for c in cc gcc c89 c99 ; do - if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then - CC_FOR_BUILD="$c"; break ; - fi ; - done ; - if test x"$CC_FOR_BUILD" = x ; then - CC_FOR_BUILD=no_compiler_found ; - fi - ;; - ,,*) CC_FOR_BUILD=$CC ;; - ,*,*) CC_FOR_BUILD=$HOST_CC ;; -esac ; set_cc_for_build= ;' - -# This is needed to find uname on a Pyramid OSx when run in the BSD universe. -# (ghazi@noc.rutgers.edu 1994-08-24) -if (test -f /.attbin/uname) >/dev/null 2>&1 ; then - PATH=$PATH:/.attbin ; export PATH -fi - -UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown -UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown -UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown -UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown - -# Note: order is significant - the case branches are not exclusive. - -case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in - *:NetBSD:*:*) - # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, - # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently - # switched to ELF, *-*-netbsd* would select the old - # object file format. This provides both forward - # compatibility and a consistent mechanism for selecting the - # object file format. - # - # Note: NetBSD doesn't particularly care about the vendor - # portion of the name. We always set it to "unknown". - sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || echo unknown)` - case "${UNAME_MACHINE_ARCH}" in - armeb) machine=armeb-unknown ;; - arm*) machine=arm-unknown ;; - sh3el) machine=shl-unknown ;; - sh3eb) machine=sh-unknown ;; - sh5el) machine=sh5le-unknown ;; - *) machine=${UNAME_MACHINE_ARCH}-unknown ;; - esac - # The Operating System including object format, if it has switched - # to ELF recently, or will in the future. - case "${UNAME_MACHINE_ARCH}" in - arm*|i386|m68k|ns32k|sh3*|sparc|vax) - eval $set_cc_for_build - if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ELF__ - then - # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). - # Return netbsd for either. FIX? - os=netbsd - else - os=netbsdelf - fi - ;; - *) - os=netbsd - ;; - esac - # The OS release - # Debian GNU/NetBSD machines have a different userland, and - # thus, need a distinct triplet. However, they do not need - # kernel version information, so it can be replaced with a - # suitable tag, in the style of linux-gnu. - case "${UNAME_VERSION}" in - Debian*) - release='-gnu' - ;; - *) - release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - ;; - esac - # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: - # contains redundant information, the shorter form: - # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}" - exit ;; - *:OpenBSD:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` - echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} - exit ;; - *:ekkoBSD:*:*) - echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} - exit ;; - *:SolidBSD:*:*) - echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} - exit ;; - macppc:MirBSD:*:*) - echo powerpc-unknown-mirbsd${UNAME_RELEASE} - exit ;; - *:MirBSD:*:*) - echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} - exit ;; - alpha:OSF1:*:*) - case $UNAME_RELEASE in - *4.0) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` - ;; - *5.*) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` - ;; - esac - # According to Compaq, /usr/sbin/psrinfo has been available on - # OSF/1 and Tru64 systems produced since 1995. I hope that - # covers most systems running today. This code pipes the CPU - # types through head -n 1, so we only detect the type of CPU 0. - ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` - case "$ALPHA_CPU_TYPE" in - "EV4 (21064)") - UNAME_MACHINE="alpha" ;; - "EV4.5 (21064)") - UNAME_MACHINE="alpha" ;; - "LCA4 (21066/21068)") - UNAME_MACHINE="alpha" ;; - "EV5 (21164)") - UNAME_MACHINE="alphaev5" ;; - "EV5.6 (21164A)") - UNAME_MACHINE="alphaev56" ;; - "EV5.6 (21164PC)") - UNAME_MACHINE="alphapca56" ;; - "EV5.7 (21164PC)") - UNAME_MACHINE="alphapca57" ;; - "EV6 (21264)") - UNAME_MACHINE="alphaev6" ;; - "EV6.7 (21264A)") - UNAME_MACHINE="alphaev67" ;; - "EV6.8CB (21264C)") - UNAME_MACHINE="alphaev68" ;; - "EV6.8AL (21264B)") - UNAME_MACHINE="alphaev68" ;; - "EV6.8CX (21264D)") - UNAME_MACHINE="alphaev68" ;; - "EV6.9A (21264/EV69A)") - UNAME_MACHINE="alphaev69" ;; - "EV7 (21364)") - UNAME_MACHINE="alphaev7" ;; - "EV7.9 (21364A)") - UNAME_MACHINE="alphaev79" ;; - esac - # A Pn.n version is a patched version. - # A Vn.n version is a released version. - # A Tn.n version is a released field test version. - # A Xn.n version is an unreleased experimental baselevel. - # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - # Reset EXIT trap before exiting to avoid spurious non-zero exit code. - exitcode=$? - trap '' 0 - exit $exitcode ;; - Alpha\ *:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # Should we change UNAME_MACHINE based on the output of uname instead - # of the specific Alpha model? - echo alpha-pc-interix - exit ;; - 21064:Windows_NT:50:3) - echo alpha-dec-winnt3.5 - exit ;; - Amiga*:UNIX_System_V:4.0:*) - echo m68k-unknown-sysv4 - exit ;; - *:[Aa]miga[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-amigaos - exit ;; - *:[Mm]orph[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-morphos - exit ;; - *:OS/390:*:*) - echo i370-ibm-openedition - exit ;; - *:z/VM:*:*) - echo s390-ibm-zvmoe - exit ;; - *:OS400:*:*) - echo powerpc-ibm-os400 - exit ;; - arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - echo arm-acorn-riscix${UNAME_RELEASE} - exit ;; - arm:riscos:*:*|arm:RISCOS:*:*) - echo arm-unknown-riscos - exit ;; - SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) - echo hppa1.1-hitachi-hiuxmpp - exit ;; - Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) - # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. - if test "`(/bin/universe) 2>/dev/null`" = att ; then - echo pyramid-pyramid-sysv3 - else - echo pyramid-pyramid-bsd - fi - exit ;; - NILE*:*:*:dcosx) - echo pyramid-pyramid-svr4 - exit ;; - DRS?6000:unix:4.0:6*) - echo sparc-icl-nx6 - exit ;; - DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) - case `/usr/bin/uname -p` in - sparc) echo sparc-icl-nx7; exit ;; - esac ;; - s390x:SunOS:*:*) - echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4H:SunOS:5.*:*) - echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) - echo i386-pc-auroraux${UNAME_RELEASE} - exit ;; - i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) - eval $set_cc_for_build - SUN_ARCH="i386" - # If there is a compiler, see if it is configured for 64-bit objects. - # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. - # This test works for both compilers. - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then - if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - SUN_ARCH="x86_64" - fi - fi - echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4*:SunOS:6*:*) - # According to config.sub, this is the proper way to canonicalize - # SunOS6. Hard to guess exactly what SunOS6 will be like, but - # it's likely to be more like Solaris than SunOS4. - echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - sun4*:SunOS:*:*) - case "`/usr/bin/arch -k`" in - Series*|S4*) - UNAME_RELEASE=`uname -v` - ;; - esac - # Japanese Language versions have a version number like `4.1.3-JL'. - echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit ;; - sun3*:SunOS:*:*) - echo m68k-sun-sunos${UNAME_RELEASE} - exit ;; - sun*:*:4.2BSD:*) - UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 - case "`/bin/arch`" in - sun3) - echo m68k-sun-sunos${UNAME_RELEASE} - ;; - sun4) - echo sparc-sun-sunos${UNAME_RELEASE} - ;; - esac - exit ;; - aushp:SunOS:*:*) - echo sparc-auspex-sunos${UNAME_RELEASE} - exit ;; - # The situation for MiNT is a little confusing. The machine name - # can be virtually everything (everything which is not - # "atarist" or "atariste" at least should have a processor - # > m68000). The system name ranges from "MiNT" over "FreeMiNT" - # to the lowercase version "mint" (or "freemint"). Finally - # the system name "TOS" denotes a system which is actually not - # MiNT. But MiNT is downward compatible to TOS, so this should - # be no problem. - atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit ;; - atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit ;; - *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit ;; - milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} - exit ;; - hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} - exit ;; - *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} - exit ;; - m68k:machten:*:*) - echo m68k-apple-machten${UNAME_RELEASE} - exit ;; - powerpc:machten:*:*) - echo powerpc-apple-machten${UNAME_RELEASE} - exit ;; - RISC*:Mach:*:*) - echo mips-dec-mach_bsd4.3 - exit ;; - RISC*:ULTRIX:*:*) - echo mips-dec-ultrix${UNAME_RELEASE} - exit ;; - VAX*:ULTRIX*:*:*) - echo vax-dec-ultrix${UNAME_RELEASE} - exit ;; - 2020:CLIX:*:* | 2430:CLIX:*:*) - echo clipper-intergraph-clix${UNAME_RELEASE} - exit ;; - mips:*:*:UMIPS | mips:*:*:RISCos) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c -#ifdef __cplusplus -#include /* for printf() prototype */ - int main (int argc, char *argv[]) { -#else - int main (argc, argv) int argc; char *argv[]; { -#endif - #if defined (host_mips) && defined (MIPSEB) - #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); - #endif - #endif - exit (-1); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c && - dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && - SYSTEM_NAME=`$dummy $dummyarg` && - { echo "$SYSTEM_NAME"; exit; } - echo mips-mips-riscos${UNAME_RELEASE} - exit ;; - Motorola:PowerMAX_OS:*:*) - echo powerpc-motorola-powermax - exit ;; - Motorola:*:4.3:PL8-*) - echo powerpc-harris-powermax - exit ;; - Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) - echo powerpc-harris-powermax - exit ;; - Night_Hawk:Power_UNIX:*:*) - echo powerpc-harris-powerunix - exit ;; - m88k:CX/UX:7*:*) - echo m88k-harris-cxux7 - exit ;; - m88k:*:4*:R4*) - echo m88k-motorola-sysv4 - exit ;; - m88k:*:3*:R3*) - echo m88k-motorola-sysv3 - exit ;; - AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` - if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] - then - if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ - [ ${TARGET_BINARY_INTERFACE}x = x ] - then - echo m88k-dg-dgux${UNAME_RELEASE} - else - echo m88k-dg-dguxbcs${UNAME_RELEASE} - fi - else - echo i586-dg-dgux${UNAME_RELEASE} - fi - exit ;; - M88*:DolphinOS:*:*) # DolphinOS (SVR3) - echo m88k-dolphin-sysv3 - exit ;; - M88*:*:R3*:*) - # Delta 88k system running SVR3 - echo m88k-motorola-sysv3 - exit ;; - XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) - echo m88k-tektronix-sysv3 - exit ;; - Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) - echo m68k-tektronix-bsd - exit ;; - *:IRIX*:*:*) - echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit ;; - ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' - i*86:AIX:*:*) - echo i386-ibm-aix - exit ;; - ia64:AIX:*:*) - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} - fi - echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} - exit ;; - *:AIX:2:3) - if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - - main() - { - if (!__power_pc()) - exit(1); - puts("powerpc-ibm-aix3.2.5"); - exit(0); - } -EOF - if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` - then - echo "$SYSTEM_NAME" - else - echo rs6000-ibm-aix3.2.5 - fi - elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then - echo rs6000-ibm-aix3.2.4 - else - echo rs6000-ibm-aix3.2 - fi - exit ;; - *:AIX:*:[4567]) - IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` - if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then - IBM_ARCH=rs6000 - else - IBM_ARCH=powerpc - fi - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} - fi - echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit ;; - *:AIX:*:*) - echo rs6000-ibm-aix - exit ;; - ibmrt:4.4BSD:*|romp-ibm:BSD:*) - echo romp-ibm-bsd4.4 - exit ;; - ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and - echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit ;; # report: romp-ibm BSD 4.3 - *:BOSX:*:*) - echo rs6000-bull-bosx - exit ;; - DPX/2?00:B.O.S.:*:*) - echo m68k-bull-sysv3 - exit ;; - 9000/[34]??:4.3bsd:1.*:*) - echo m68k-hp-bsd - exit ;; - hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) - echo m68k-hp-bsd4.4 - exit ;; - 9000/[34678]??:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - case "${UNAME_MACHINE}" in - 9000/31? ) HP_ARCH=m68000 ;; - 9000/[34]?? ) HP_ARCH=m68k ;; - 9000/[678][0-9][0-9]) - if [ -x /usr/bin/getconf ]; then - sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; - '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 - esac ;; - esac - fi - if [ "${HP_ARCH}" = "" ]; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - - #define _HPUX_SOURCE - #include - #include - - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); - - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } -EOF - (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` - test -z "$HP_ARCH" && HP_ARCH=hppa - fi ;; - esac - if [ ${HP_ARCH} = "hppa2.0w" ] - then - eval $set_cc_for_build - - # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating - # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler - # generating 64-bit code. GNU and HP use different nomenclature: - # - # $ CC_FOR_BUILD=cc ./config.guess - # => hppa2.0w-hp-hpux11.23 - # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess - # => hppa64-hp-hpux11.23 - - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | - grep -q __LP64__ - then - HP_ARCH="hppa2.0w" - else - HP_ARCH="hppa64" - fi - fi - echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit ;; - ia64:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - echo ia64-hp-hpux${HPUX_REV} - exit ;; - 3050*:HI-UX:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - int - main () - { - long cpu = sysconf (_SC_CPU_VERSION); - /* The order matters, because CPU_IS_HP_MC68K erroneously returns - true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct - results, however. */ - if (CPU_IS_PA_RISC (cpu)) - { - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; - case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; - default: puts ("hppa-hitachi-hiuxwe2"); break; - } - } - else if (CPU_IS_HP_MC68K (cpu)) - puts ("m68k-hitachi-hiuxwe2"); - else puts ("unknown-hitachi-hiuxwe2"); - exit (0); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && - { echo "$SYSTEM_NAME"; exit; } - echo unknown-hitachi-hiuxwe2 - exit ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) - echo hppa1.1-hp-bsd - exit ;; - 9000/8??:4.3bsd:*:*) - echo hppa1.0-hp-bsd - exit ;; - *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) - echo hppa1.0-hp-mpeix - exit ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) - echo hppa1.1-hp-osf - exit ;; - hp8??:OSF1:*:*) - echo hppa1.0-hp-osf - exit ;; - i*86:OSF1:*:*) - if [ -x /usr/sbin/sysversion ] ; then - echo ${UNAME_MACHINE}-unknown-osf1mk - else - echo ${UNAME_MACHINE}-unknown-osf1 - fi - exit ;; - parisc*:Lites*:*:*) - echo hppa1.1-hp-lites - exit ;; - C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) - echo c1-convex-bsd - exit ;; - C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) - echo c34-convex-bsd - exit ;; - C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) - echo c38-convex-bsd - exit ;; - C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) - echo c4-convex-bsd - exit ;; - CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*[A-Z]90:*:*:*) - echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ - | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ - -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ - -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*TS:*:*:*) - echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*T3E:*:*:*) - echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*SV1:*:*:*) - echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - *:UNICOS/mp:*:*) - echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit ;; - F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit ;; - 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` - echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit ;; - i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) - echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit ;; - sparc*:BSD/OS:*:*) - echo sparc-unknown-bsdi${UNAME_RELEASE} - exit ;; - *:BSD/OS:*:*) - echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} - exit ;; - *:FreeBSD:*:*) - UNAME_PROCESSOR=`/usr/bin/uname -p` - case ${UNAME_PROCESSOR} in - amd64) - echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - *) - echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - esac - exit ;; - i*:CYGWIN*:*) - echo ${UNAME_MACHINE}-pc-cygwin - exit ;; - *:MINGW*:*) - echo ${UNAME_MACHINE}-pc-mingw32 - exit ;; - i*:MSYS*:*) - echo ${UNAME_MACHINE}-pc-msys - exit ;; - i*:windows32*:*) - # uname -m includes "-pc" on this system. - echo ${UNAME_MACHINE}-mingw32 - exit ;; - i*:PW*:*) - echo ${UNAME_MACHINE}-pc-pw32 - exit ;; - *:Interix*:*) - case ${UNAME_MACHINE} in - x86) - echo i586-pc-interix${UNAME_RELEASE} - exit ;; - authenticamd | genuineintel | EM64T) - echo x86_64-unknown-interix${UNAME_RELEASE} - exit ;; - IA64) - echo ia64-unknown-interix${UNAME_RELEASE} - exit ;; - esac ;; - [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) - echo i${UNAME_MACHINE}-pc-mks - exit ;; - 8664:Windows_NT:*) - echo x86_64-pc-mks - exit ;; - i*:Windows_NT*:* | Pentium*:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we - # UNAME_MACHINE based on the output of uname instead of i386? - echo i586-pc-interix - exit ;; - i*:UWIN*:*) - echo ${UNAME_MACHINE}-pc-uwin - exit ;; - amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) - echo x86_64-unknown-cygwin - exit ;; - p*:CYGWIN*:*) - echo powerpcle-unknown-cygwin - exit ;; - prep*:SunOS:5.*:*) - echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit ;; - *:GNU:*:*) - # the GNU system - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit ;; - *:GNU/*:*:*) - # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu - exit ;; - i*86:Minix:*:*) - echo ${UNAME_MACHINE}-pc-minix - exit ;; - aarch64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - aarch64_be:Linux:*:*) - UNAME_MACHINE=aarch64_be - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - alpha:Linux:*:*) - case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in - EV5) UNAME_MACHINE=alphaev5 ;; - EV56) UNAME_MACHINE=alphaev56 ;; - PCA56) UNAME_MACHINE=alphapca56 ;; - PCA57) UNAME_MACHINE=alphapca56 ;; - EV6) UNAME_MACHINE=alphaev6 ;; - EV67) UNAME_MACHINE=alphaev67 ;; - EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} - exit ;; - arm*:Linux:*:*) - eval $set_cc_for_build - if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ARM_EABI__ - then - echo ${UNAME_MACHINE}-unknown-linux-gnu - else - if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ARM_PCS_VFP - then - echo ${UNAME_MACHINE}-unknown-linux-gnueabi - else - echo ${UNAME_MACHINE}-unknown-linux-gnueabihf - fi - fi - exit ;; - avr32*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - cris:Linux:*:*) - echo ${UNAME_MACHINE}-axis-linux-gnu - exit ;; - crisv32:Linux:*:*) - echo ${UNAME_MACHINE}-axis-linux-gnu - exit ;; - frv:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - hexagon:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - i*86:Linux:*:*) - LIBC=gnu - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` - echo "${UNAME_MACHINE}-pc-linux-${LIBC}" - exit ;; - ia64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - m32r*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - mips:Linux:*:* | mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef ${UNAME_MACHINE} - #undef ${UNAME_MACHINE}el - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=${UNAME_MACHINE}el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=${UNAME_MACHINE} - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } - ;; - or32:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - padre:Linux:*:*) - echo sparc-unknown-linux-gnu - exit ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu - exit ;; - parisc:Linux:*:* | hppa:Linux:*:*) - # Look for CPU level - case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) echo hppa1.1-unknown-linux-gnu ;; - PA8*) echo hppa2.0-unknown-linux-gnu ;; - *) echo hppa-unknown-linux-gnu ;; - esac - exit ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu - exit ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu - exit ;; - s390:Linux:*:* | s390x:Linux:*:*) - echo ${UNAME_MACHINE}-ibm-linux - exit ;; - sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - sh*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - sparc:Linux:*:* | sparc64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - tile*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - vax:Linux:*:*) - echo ${UNAME_MACHINE}-dec-linux-gnu - exit ;; - x86_64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - xtensa*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit ;; - i*86:DYNIX/ptx:4*:*) - # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. - # earlier versions are messed up and put the nodename in both - # sysname and nodename. - echo i386-sequent-sysv4 - exit ;; - i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, - # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. - echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit ;; - i*86:OS/2:*:*) - # If we were able to find `uname', then EMX Unix compatibility - # is probably installed. - echo ${UNAME_MACHINE}-pc-os2-emx - exit ;; - i*86:XTS-300:*:STOP) - echo ${UNAME_MACHINE}-unknown-stop - exit ;; - i*86:atheos:*:*) - echo ${UNAME_MACHINE}-unknown-atheos - exit ;; - i*86:syllable:*:*) - echo ${UNAME_MACHINE}-pc-syllable - exit ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) - echo i386-unknown-lynxos${UNAME_RELEASE} - exit ;; - i*86:*DOS:*:*) - echo ${UNAME_MACHINE}-pc-msdosdjgpp - exit ;; - i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) - UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` - if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} - else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} - fi - exit ;; - i*86:*:5:[678]*) - # UnixWare 7.x, OpenUNIX and OpenServer 6. - case `/bin/uname -X | grep "^Machine"` in - *486*) UNAME_MACHINE=i486 ;; - *Pentium) UNAME_MACHINE=i586 ;; - *Pent*|*Celeron) UNAME_MACHINE=i686 ;; - esac - echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} - exit ;; - i*86:*:3.2:*) - if test -f /usr/options/cb.name; then - UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then - UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` - (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 - (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ - && UNAME_MACHINE=i586 - (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ - && UNAME_MACHINE=i686 - (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ - && UNAME_MACHINE=i686 - echo ${UNAME_MACHINE}-pc-sco$UNAME_REL - else - echo ${UNAME_MACHINE}-pc-sysv32 - fi - exit ;; - pc:*:*:*) - # Left here for compatibility: - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i586. - # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configury will decide that - # this is a cross-build. - echo i586-pc-msdosdjgpp - exit ;; - Intel:Mach:3*:*) - echo i386-pc-mach3 - exit ;; - paragon:*:*:*) - echo i860-intel-osf1 - exit ;; - i860:*:4.*:*) # i860-SVR4 - if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 - else # Add other i860-SVR4 vendors below as they are discovered. - echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 - fi - exit ;; - mini*:CTIX:SYS*5:*) - # "miniframe" - echo m68010-convergent-sysv - exit ;; - mc68k:UNIX:SYSTEM5:3.51m) - echo m68k-convergent-sysv - exit ;; - M680?0:D-NIX:5.3:*) - echo m68k-diab-dnix - exit ;; - M68*:*:R3V[5678]*:*) - test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; - 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) - OS_REL='' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3${OS_REL}; exit; } - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; - 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4; exit; } ;; - NCR*:*:4.2:* | MPRAS*:*:4.2:*) - OS_REL='.3' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3${OS_REL}; exit; } - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } - /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; - m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) - echo m68k-unknown-lynxos${UNAME_RELEASE} - exit ;; - mc68030:UNIX_System_V:4.*:*) - echo m68k-atari-sysv4 - exit ;; - TSUNAMI:LynxOS:2.*:*) - echo sparc-unknown-lynxos${UNAME_RELEASE} - exit ;; - rs6000:LynxOS:2.*:*) - echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) - echo powerpc-unknown-lynxos${UNAME_RELEASE} - exit ;; - SM[BE]S:UNIX_SV:*:*) - echo mips-dde-sysv${UNAME_RELEASE} - exit ;; - RM*:ReliantUNIX-*:*:*) - echo mips-sni-sysv4 - exit ;; - RM*:SINIX-*:*:*) - echo mips-sni-sysv4 - exit ;; - *:SINIX-*:*:*) - if uname -p 2>/dev/null >/dev/null ; then - UNAME_MACHINE=`(uname -p) 2>/dev/null` - echo ${UNAME_MACHINE}-sni-sysv4 - else - echo ns32k-sni-sysv - fi - exit ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - echo i586-unisys-sysv4 - exit ;; - *:UNIX_System_V:4*:FTX*) - # From Gerald Hewes . - # How about differentiating between stratus architectures? -djm - echo hppa1.1-stratus-sysv4 - exit ;; - *:*:*:FTX*) - # From seanf@swdc.stratus.com. - echo i860-stratus-sysv4 - exit ;; - i*86:VOS:*:*) - # From Paul.Green@stratus.com. - echo ${UNAME_MACHINE}-stratus-vos - exit ;; - *:VOS:*:*) - # From Paul.Green@stratus.com. - echo hppa1.1-stratus-vos - exit ;; - mc68*:A/UX:*:*) - echo m68k-apple-aux${UNAME_RELEASE} - exit ;; - news*:NEWS-OS:6*:*) - echo mips-sony-newsos6 - exit ;; - R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) - if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} - else - echo mips-unknown-sysv${UNAME_RELEASE} - fi - exit ;; - BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. - echo powerpc-be-beos - exit ;; - BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. - echo powerpc-apple-beos - exit ;; - BePC:BeOS:*:*) # BeOS running on Intel PC compatible. - echo i586-pc-beos - exit ;; - BePC:Haiku:*:*) # Haiku running on Intel PC compatible. - echo i586-pc-haiku - exit ;; - SX-4:SUPER-UX:*:*) - echo sx4-nec-superux${UNAME_RELEASE} - exit ;; - SX-5:SUPER-UX:*:*) - echo sx5-nec-superux${UNAME_RELEASE} - exit ;; - SX-6:SUPER-UX:*:*) - echo sx6-nec-superux${UNAME_RELEASE} - exit ;; - SX-7:SUPER-UX:*:*) - echo sx7-nec-superux${UNAME_RELEASE} - exit ;; - SX-8:SUPER-UX:*:*) - echo sx8-nec-superux${UNAME_RELEASE} - exit ;; - SX-8R:SUPER-UX:*:*) - echo sx8r-nec-superux${UNAME_RELEASE} - exit ;; - Power*:Rhapsody:*:*) - echo powerpc-apple-rhapsody${UNAME_RELEASE} - exit ;; - *:Rhapsody:*:*) - echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} - exit ;; - *:Darwin:*:*) - UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown - case $UNAME_PROCESSOR in - i386) - eval $set_cc_for_build - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then - if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - UNAME_PROCESSOR="x86_64" - fi - fi ;; - unknown) UNAME_PROCESSOR=powerpc ;; - esac - echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} - exit ;; - *:procnto*:*:* | *:QNX:[0123456789]*:*) - UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = "x86"; then - UNAME_PROCESSOR=i386 - UNAME_MACHINE=pc - fi - echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} - exit ;; - *:QNX:*:4*) - echo i386-pc-qnx - exit ;; - NEO-?:NONSTOP_KERNEL:*:*) - echo neo-tandem-nsk${UNAME_RELEASE} - exit ;; - NSE-?:NONSTOP_KERNEL:*:*) - echo nse-tandem-nsk${UNAME_RELEASE} - exit ;; - NSR-?:NONSTOP_KERNEL:*:*) - echo nsr-tandem-nsk${UNAME_RELEASE} - exit ;; - *:NonStop-UX:*:*) - echo mips-compaq-nonstopux - exit ;; - BS2000:POSIX*:*:*) - echo bs2000-siemens-sysv - exit ;; - DS/*:UNIX_System_V:*:*) - echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} - exit ;; - *:Plan9:*:*) - # "uname -m" is not consistent, so use $cputype instead. 386 - # is converted to i386 for consistency with other x86 - # operating systems. - if test "$cputype" = "386"; then - UNAME_MACHINE=i386 - else - UNAME_MACHINE="$cputype" - fi - echo ${UNAME_MACHINE}-unknown-plan9 - exit ;; - *:TOPS-10:*:*) - echo pdp10-unknown-tops10 - exit ;; - *:TENEX:*:*) - echo pdp10-unknown-tenex - exit ;; - KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) - echo pdp10-dec-tops20 - exit ;; - XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) - echo pdp10-xkl-tops20 - exit ;; - *:TOPS-20:*:*) - echo pdp10-unknown-tops20 - exit ;; - *:ITS:*:*) - echo pdp10-unknown-its - exit ;; - SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} - exit ;; - *:DragonFly:*:*) - echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit ;; - *:*VMS:*:*) - UNAME_MACHINE=`(uname -p) 2>/dev/null` - case "${UNAME_MACHINE}" in - A*) echo alpha-dec-vms ; exit ;; - I*) echo ia64-dec-vms ; exit ;; - V*) echo vax-dec-vms ; exit ;; - esac ;; - *:XENIX:*:SysV) - echo i386-pc-xenix - exit ;; - i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' - exit ;; - i*86:rdos:*:*) - echo ${UNAME_MACHINE}-pc-rdos - exit ;; - i*86:AROS:*:*) - echo ${UNAME_MACHINE}-pc-aros - exit ;; - x86_64:VMkernel:*:*) - echo ${UNAME_MACHINE}-unknown-esx - exit ;; -esac - -#echo '(No uname command or uname output not recognized.)' 1>&2 -#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 - -eval $set_cc_for_build -cat >$dummy.c < -# include -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix\n"); exit (0); -#endif - -#if defined (hp300) && !defined (hpux) - printf ("m68k-hp-bsd\n"); exit (0); -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); - -#endif - -#if defined (vax) -# if !defined (ultrix) -# include -# if defined (BSD) -# if BSD == 43 - printf ("vax-dec-bsd4.3\n"); exit (0); -# else -# if BSD == 199006 - printf ("vax-dec-bsd4.3reno\n"); exit (0); -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# endif -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# else - printf ("vax-dec-ultrix\n"); exit (0); -# endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && - { echo "$SYSTEM_NAME"; exit; } - -# Apollos put the system type in the environment. - -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } - -# Convex versions that predate uname can use getsysinfo(1) - -if [ -x /usr/convex/getsysinfo ] -then - case `getsysinfo -f cpu_type` in - c1*) - echo c1-convex-bsd - exit ;; - c2*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - c34*) - echo c34-convex-bsd - exit ;; - c38*) - echo c38-convex-bsd - exit ;; - c4*) - echo c4-convex-bsd - exit ;; - esac -fi - -cat >&2 < in order to provide the needed -information to handle your system. - -config.guess timestamp = $timestamp - -uname -m = `(uname -m) 2>/dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` - -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null` - -hostinfo = `(hostinfo) 2>/dev/null` -/bin/universe = `(/bin/universe) 2>/dev/null` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` -/bin/arch = `(/bin/arch) 2>/dev/null` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` - -UNAME_MACHINE = ${UNAME_MACHINE} -UNAME_RELEASE = ${UNAME_RELEASE} -UNAME_SYSTEM = ${UNAME_SYSTEM} -UNAME_VERSION = ${UNAME_VERSION} -EOF - -exit 1 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/config.sub mariadb-5.5-5.5.40/extra/jemalloc/config.sub --- mariadb-5.5-5.5.39/extra/jemalloc/config.sub 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/config.sub 1970-01-01 00:00:00.000000000 +0000 @@ -1,1773 +0,0 @@ -#! /bin/sh -# Configuration validation subroutine script. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -# 2011, 2012 Free Software Foundation, Inc. - -timestamp='2012-02-10' - -# This file is (in principle) common to ALL GNU software. -# The presence of a machine in this file suggests that SOME GNU software -# can handle that machine. It does not imply ALL GNU software can. -# -# This file is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see . -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - - -# Please send patches to . Submit a context -# diff and a properly formatted GNU ChangeLog entry. -# -# Configuration subroutine to validate and canonicalize a configuration type. -# Supply the specified configuration type as an argument. -# If it is invalid, we print an error message on stderr and exit with code 1. -# Otherwise, we print the canonical config type on stdout and succeed. - -# You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD - -# This file is supposed to be the same for all GNU packages -# and recognize all the CPU types, system types and aliases -# that are meaningful with *any* GNU software. -# Each package is responsible for reporting which valid configurations -# it does not support. The user should be able to distinguish -# a failure to support a valid configuration from a meaningless -# configuration. - -# The goal of this file is to map all the various variations of a given -# machine specification into a single specification in the form: -# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM -# or in some cases, the newer four-part form: -# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM -# It is wrong to echo any other type of specification. - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS - $0 [OPTION] ALIAS - -Canonicalize a configuration name. - -Operation modes: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to ." - -version="\ -GNU config.sub ($timestamp) - -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 -Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit ;; - --version | -v ) - echo "$version" ; exit ;; - --help | --h* | -h ) - echo "$usage"; exit ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" - exit 1 ;; - - *local*) - # First pass through any local machine types. - echo $1 - exit ;; - - * ) - break ;; - esac -done - -case $# in - 0) echo "$me: missing argument$help" >&2 - exit 1;; - 1) ;; - *) echo "$me: too many arguments$help" >&2 - exit 1;; -esac - -# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). -# Here we must recognize all the valid KERNEL-OS combinations. -maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` -case $maybe_os in - nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ - linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ - knetbsd*-gnu* | netbsd*-gnu* | \ - kopensolaris*-gnu* | \ - storm-chaos* | os2-emx* | rtmk-nova*) - os=-$maybe_os - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` - ;; - android-linux) - os=-linux-android - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown - ;; - *) - basic_machine=`echo $1 | sed 's/-[^-]*$//'` - if [ $basic_machine != $1 ] - then os=`echo $1 | sed 's/.*-/-/'` - else os=; fi - ;; -esac - -### Let's recognize common machines as not being operating systems so -### that things like config.sub decstation-3100 work. We also -### recognize some manufacturers as not being operating systems, so we -### can provide default operating systems below. -case $os in - -sun*os*) - # Prevent following clause from handling this invalid input. - ;; - -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ - -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ - -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ - -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ - -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ - -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis | -knuth | -cray | -microblaze) - os= - basic_machine=$1 - ;; - -bluegene*) - os=-cnk - ;; - -sim | -cisco | -oki | -wec | -winbond) - os= - basic_machine=$1 - ;; - -scout) - ;; - -wrs) - os=-vxworks - basic_machine=$1 - ;; - -chorusos*) - os=-chorusos - basic_machine=$1 - ;; - -chorusrdb) - os=-chorusrdb - basic_machine=$1 - ;; - -hiux*) - os=-hiuxwe2 - ;; - -sco6) - os=-sco5v6 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco5) - os=-sco3.2v5 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco4) - os=-sco3.2v4 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2.[4-9]*) - os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2v[4-9]*) - # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco5v6*) - # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco*) - os=-sco3.2v2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -udk*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -isc) - os=-isc2.2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -clix*) - basic_machine=clipper-intergraph - ;; - -isc*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -lynx*) - os=-lynxos - ;; - -ptx*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` - ;; - -windowsnt*) - os=`echo $os | sed -e 's/windowsnt/winnt/'` - ;; - -psos*) - os=-psos - ;; - -mint | -mint[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; -esac - -# Decode aliases for certain CPU-COMPANY combinations. -case $basic_machine in - # Recognize the basic CPU types without company name. - # Some are omitted here because they have special meanings below. - 1750a | 580 \ - | a29k \ - | aarch64 | aarch64_be \ - | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ - | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ - | am33_2.0 \ - | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ - | be32 | be64 \ - | bfin \ - | c4x | clipper \ - | d10v | d30v | dlx | dsp16xx \ - | epiphany \ - | fido | fr30 | frv \ - | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ - | hexagon \ - | i370 | i860 | i960 | ia64 \ - | ip2k | iq2000 \ - | le32 | le64 \ - | lm32 \ - | m32c | m32r | m32rle | m68000 | m68k | m88k \ - | maxq | mb | microblaze | mcore | mep | metag \ - | mips | mipsbe | mipseb | mipsel | mipsle \ - | mips16 \ - | mips64 | mips64el \ - | mips64octeon | mips64octeonel \ - | mips64orion | mips64orionel \ - | mips64r5900 | mips64r5900el \ - | mips64vr | mips64vrel \ - | mips64vr4100 | mips64vr4100el \ - | mips64vr4300 | mips64vr4300el \ - | mips64vr5000 | mips64vr5000el \ - | mips64vr5900 | mips64vr5900el \ - | mipsisa32 | mipsisa32el \ - | mipsisa32r2 | mipsisa32r2el \ - | mipsisa64 | mipsisa64el \ - | mipsisa64r2 | mipsisa64r2el \ - | mipsisa64sb1 | mipsisa64sb1el \ - | mipsisa64sr71k | mipsisa64sr71kel \ - | mipstx39 | mipstx39el \ - | mn10200 | mn10300 \ - | moxie \ - | mt \ - | msp430 \ - | nds32 | nds32le | nds32be \ - | nios | nios2 \ - | ns16k | ns32k \ - | open8 \ - | or32 \ - | pdp10 | pdp11 | pj | pjl \ - | powerpc | powerpc64 | powerpc64le | powerpcle \ - | pyramid \ - | rl78 | rx \ - | score \ - | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ - | sh64 | sh64le \ - | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ - | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ - | spu \ - | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ - | ubicom32 \ - | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ - | we32k \ - | x86 | xc16x | xstormy16 | xtensa \ - | z8k | z80) - basic_machine=$basic_machine-unknown - ;; - c54x) - basic_machine=tic54x-unknown - ;; - c55x) - basic_machine=tic55x-unknown - ;; - c6x) - basic_machine=tic6x-unknown - ;; - m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) - basic_machine=$basic_machine-unknown - os=-none - ;; - m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) - ;; - ms1) - basic_machine=mt-unknown - ;; - - strongarm | thumb | xscale) - basic_machine=arm-unknown - ;; - xgate) - basic_machine=$basic_machine-unknown - os=-none - ;; - xscaleeb) - basic_machine=armeb-unknown - ;; - - xscaleel) - basic_machine=armel-unknown - ;; - - # We use `pc' rather than `unknown' - # because (1) that's what they normally are, and - # (2) the word "unknown" tends to confuse beginning users. - i*86 | x86_64) - basic_machine=$basic_machine-pc - ;; - # Object if more than one company name word. - *-*-*) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; - # Recognize the basic CPU types with company name. - 580-* \ - | a29k-* \ - | aarch64-* | aarch64_be-* \ - | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ - | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ - | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ - | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ - | avr-* | avr32-* \ - | be32-* | be64-* \ - | bfin-* | bs2000-* \ - | c[123]* | c30-* | [cjt]90-* | c4x-* \ - | clipper-* | craynv-* | cydra-* \ - | d10v-* | d30v-* | dlx-* \ - | elxsi-* \ - | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ - | h8300-* | h8500-* \ - | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ - | hexagon-* \ - | i*86-* | i860-* | i960-* | ia64-* \ - | ip2k-* | iq2000-* \ - | le32-* | le64-* \ - | lm32-* \ - | m32c-* | m32r-* | m32rle-* \ - | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ - | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ - | mips16-* \ - | mips64-* | mips64el-* \ - | mips64octeon-* | mips64octeonel-* \ - | mips64orion-* | mips64orionel-* \ - | mips64r5900-* | mips64r5900el-* \ - | mips64vr-* | mips64vrel-* \ - | mips64vr4100-* | mips64vr4100el-* \ - | mips64vr4300-* | mips64vr4300el-* \ - | mips64vr5000-* | mips64vr5000el-* \ - | mips64vr5900-* | mips64vr5900el-* \ - | mipsisa32-* | mipsisa32el-* \ - | mipsisa32r2-* | mipsisa32r2el-* \ - | mipsisa64-* | mipsisa64el-* \ - | mipsisa64r2-* | mipsisa64r2el-* \ - | mipsisa64sb1-* | mipsisa64sb1el-* \ - | mipsisa64sr71k-* | mipsisa64sr71kel-* \ - | mipstx39-* | mipstx39el-* \ - | mmix-* \ - | mt-* \ - | msp430-* \ - | nds32-* | nds32le-* | nds32be-* \ - | nios-* | nios2-* \ - | none-* | np1-* | ns16k-* | ns32k-* \ - | open8-* \ - | orion-* \ - | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ - | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ - | pyramid-* \ - | rl78-* | romp-* | rs6000-* | rx-* \ - | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ - | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ - | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ - | sparclite-* \ - | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ - | tahoe-* \ - | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ - | tile*-* \ - | tron-* \ - | ubicom32-* \ - | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ - | vax-* \ - | we32k-* \ - | x86-* | x86_64-* | xc16x-* | xps100-* \ - | xstormy16-* | xtensa*-* \ - | ymp-* \ - | z8k-* | z80-*) - ;; - # Recognize the basic CPU types without company name, with glob match. - xtensa*) - basic_machine=$basic_machine-unknown - ;; - # Recognize the various machine names and aliases which stand - # for a CPU type and a company and sometimes even an OS. - 386bsd) - basic_machine=i386-unknown - os=-bsd - ;; - 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) - basic_machine=m68000-att - ;; - 3b*) - basic_machine=we32k-att - ;; - a29khif) - basic_machine=a29k-amd - os=-udi - ;; - abacus) - basic_machine=abacus-unknown - ;; - adobe68k) - basic_machine=m68010-adobe - os=-scout - ;; - alliant | fx80) - basic_machine=fx80-alliant - ;; - altos | altos3068) - basic_machine=m68k-altos - ;; - am29k) - basic_machine=a29k-none - os=-bsd - ;; - amd64) - basic_machine=x86_64-pc - ;; - amd64-*) - basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - amdahl) - basic_machine=580-amdahl - os=-sysv - ;; - amiga | amiga-*) - basic_machine=m68k-unknown - ;; - amigaos | amigados) - basic_machine=m68k-unknown - os=-amigaos - ;; - amigaunix | amix) - basic_machine=m68k-unknown - os=-sysv4 - ;; - apollo68) - basic_machine=m68k-apollo - os=-sysv - ;; - apollo68bsd) - basic_machine=m68k-apollo - os=-bsd - ;; - aros) - basic_machine=i386-pc - os=-aros - ;; - aux) - basic_machine=m68k-apple - os=-aux - ;; - balance) - basic_machine=ns32k-sequent - os=-dynix - ;; - blackfin) - basic_machine=bfin-unknown - os=-linux - ;; - blackfin-*) - basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` - os=-linux - ;; - bluegene*) - basic_machine=powerpc-ibm - os=-cnk - ;; - c54x-*) - basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - c55x-*) - basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - c6x-*) - basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - c90) - basic_machine=c90-cray - os=-unicos - ;; - cegcc) - basic_machine=arm-unknown - os=-cegcc - ;; - convex-c1) - basic_machine=c1-convex - os=-bsd - ;; - convex-c2) - basic_machine=c2-convex - os=-bsd - ;; - convex-c32) - basic_machine=c32-convex - os=-bsd - ;; - convex-c34) - basic_machine=c34-convex - os=-bsd - ;; - convex-c38) - basic_machine=c38-convex - os=-bsd - ;; - cray | j90) - basic_machine=j90-cray - os=-unicos - ;; - craynv) - basic_machine=craynv-cray - os=-unicosmp - ;; - cr16 | cr16-*) - basic_machine=cr16-unknown - os=-elf - ;; - crds | unos) - basic_machine=m68k-crds - ;; - crisv32 | crisv32-* | etraxfs*) - basic_machine=crisv32-axis - ;; - cris | cris-* | etrax*) - basic_machine=cris-axis - ;; - crx) - basic_machine=crx-unknown - os=-elf - ;; - da30 | da30-*) - basic_machine=m68k-da30 - ;; - decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) - basic_machine=mips-dec - ;; - decsystem10* | dec10*) - basic_machine=pdp10-dec - os=-tops10 - ;; - decsystem20* | dec20*) - basic_machine=pdp10-dec - os=-tops20 - ;; - delta | 3300 | motorola-3300 | motorola-delta \ - | 3300-motorola | delta-motorola) - basic_machine=m68k-motorola - ;; - delta88) - basic_machine=m88k-motorola - os=-sysv3 - ;; - dicos) - basic_machine=i686-pc - os=-dicos - ;; - djgpp) - basic_machine=i586-pc - os=-msdosdjgpp - ;; - dpx20 | dpx20-*) - basic_machine=rs6000-bull - os=-bosx - ;; - dpx2* | dpx2*-bull) - basic_machine=m68k-bull - os=-sysv3 - ;; - ebmon29k) - basic_machine=a29k-amd - os=-ebmon - ;; - elxsi) - basic_machine=elxsi-elxsi - os=-bsd - ;; - encore | umax | mmax) - basic_machine=ns32k-encore - ;; - es1800 | OSE68k | ose68k | ose | OSE) - basic_machine=m68k-ericsson - os=-ose - ;; - fx2800) - basic_machine=i860-alliant - ;; - genix) - basic_machine=ns32k-ns - ;; - gmicro) - basic_machine=tron-gmicro - os=-sysv - ;; - go32) - basic_machine=i386-pc - os=-go32 - ;; - h3050r* | hiux*) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - h8300hms) - basic_machine=h8300-hitachi - os=-hms - ;; - h8300xray) - basic_machine=h8300-hitachi - os=-xray - ;; - h8500hms) - basic_machine=h8500-hitachi - os=-hms - ;; - harris) - basic_machine=m88k-harris - os=-sysv3 - ;; - hp300-*) - basic_machine=m68k-hp - ;; - hp300bsd) - basic_machine=m68k-hp - os=-bsd - ;; - hp300hpux) - basic_machine=m68k-hp - os=-hpux - ;; - hp3k9[0-9][0-9] | hp9[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k2[0-9][0-9] | hp9k31[0-9]) - basic_machine=m68000-hp - ;; - hp9k3[2-9][0-9]) - basic_machine=m68k-hp - ;; - hp9k6[0-9][0-9] | hp6[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k7[0-79][0-9] | hp7[0-79][0-9]) - basic_machine=hppa1.1-hp - ;; - hp9k78[0-9] | hp78[0-9]) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][13679] | hp8[0-9][13679]) - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][0-9] | hp8[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hppa-next) - os=-nextstep3 - ;; - hppaosf) - basic_machine=hppa1.1-hp - os=-osf - ;; - hppro) - basic_machine=hppa1.1-hp - os=-proelf - ;; - i370-ibm* | ibm*) - basic_machine=i370-ibm - ;; - i*86v32) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv32 - ;; - i*86v4*) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv4 - ;; - i*86v) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv - ;; - i*86sol2) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-solaris2 - ;; - i386mach) - basic_machine=i386-mach - os=-mach - ;; - i386-vsta | vsta) - basic_machine=i386-unknown - os=-vsta - ;; - iris | iris4d) - basic_machine=mips-sgi - case $os in - -irix*) - ;; - *) - os=-irix4 - ;; - esac - ;; - isi68 | isi) - basic_machine=m68k-isi - os=-sysv - ;; - m68knommu) - basic_machine=m68k-unknown - os=-linux - ;; - m68knommu-*) - basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` - os=-linux - ;; - m88k-omron*) - basic_machine=m88k-omron - ;; - magnum | m3230) - basic_machine=mips-mips - os=-sysv - ;; - merlin) - basic_machine=ns32k-utek - os=-sysv - ;; - microblaze) - basic_machine=microblaze-xilinx - ;; - mingw32) - basic_machine=i386-pc - os=-mingw32 - ;; - mingw32ce) - basic_machine=arm-unknown - os=-mingw32ce - ;; - miniframe) - basic_machine=m68000-convergent - ;; - *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; - mips3*-*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` - ;; - mips3*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown - ;; - monitor) - basic_machine=m68k-rom68k - os=-coff - ;; - morphos) - basic_machine=powerpc-unknown - os=-morphos - ;; - msdos) - basic_machine=i386-pc - os=-msdos - ;; - ms1-*) - basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` - ;; - msys) - basic_machine=i386-pc - os=-msys - ;; - mvs) - basic_machine=i370-ibm - os=-mvs - ;; - nacl) - basic_machine=le32-unknown - os=-nacl - ;; - ncr3000) - basic_machine=i486-ncr - os=-sysv4 - ;; - netbsd386) - basic_machine=i386-unknown - os=-netbsd - ;; - netwinder) - basic_machine=armv4l-rebel - os=-linux - ;; - news | news700 | news800 | news900) - basic_machine=m68k-sony - os=-newsos - ;; - news1000) - basic_machine=m68030-sony - os=-newsos - ;; - news-3600 | risc-news) - basic_machine=mips-sony - os=-newsos - ;; - necv70) - basic_machine=v70-nec - os=-sysv - ;; - next | m*-next ) - basic_machine=m68k-next - case $os in - -nextstep* ) - ;; - -ns2*) - os=-nextstep2 - ;; - *) - os=-nextstep3 - ;; - esac - ;; - nh3000) - basic_machine=m68k-harris - os=-cxux - ;; - nh[45]000) - basic_machine=m88k-harris - os=-cxux - ;; - nindy960) - basic_machine=i960-intel - os=-nindy - ;; - mon960) - basic_machine=i960-intel - os=-mon960 - ;; - nonstopux) - basic_machine=mips-compaq - os=-nonstopux - ;; - np1) - basic_machine=np1-gould - ;; - neo-tandem) - basic_machine=neo-tandem - ;; - nse-tandem) - basic_machine=nse-tandem - ;; - nsr-tandem) - basic_machine=nsr-tandem - ;; - op50n-* | op60c-*) - basic_machine=hppa1.1-oki - os=-proelf - ;; - openrisc | openrisc-*) - basic_machine=or32-unknown - ;; - os400) - basic_machine=powerpc-ibm - os=-os400 - ;; - OSE68000 | ose68000) - basic_machine=m68000-ericsson - os=-ose - ;; - os68k) - basic_machine=m68k-none - os=-os68k - ;; - pa-hitachi) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - paragon) - basic_machine=i860-intel - os=-osf - ;; - parisc) - basic_machine=hppa-unknown - os=-linux - ;; - parisc-*) - basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` - os=-linux - ;; - pbd) - basic_machine=sparc-tti - ;; - pbb) - basic_machine=m68k-tti - ;; - pc532 | pc532-*) - basic_machine=ns32k-pc532 - ;; - pc98) - basic_machine=i386-pc - ;; - pc98-*) - basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentium | p5 | k5 | k6 | nexgen | viac3) - basic_machine=i586-pc - ;; - pentiumpro | p6 | 6x86 | athlon | athlon_*) - basic_machine=i686-pc - ;; - pentiumii | pentium2 | pentiumiii | pentium3) - basic_machine=i686-pc - ;; - pentium4) - basic_machine=i786-pc - ;; - pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) - basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumpro-* | p6-* | 6x86-* | athlon-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentium4-*) - basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pn) - basic_machine=pn-gould - ;; - power) basic_machine=power-ibm - ;; - ppc | ppcbe) basic_machine=powerpc-unknown - ;; - ppc-* | ppcbe-*) - basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppcle | powerpclittle | ppc-le | powerpc-little) - basic_machine=powerpcle-unknown - ;; - ppcle-* | powerpclittle-*) - basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64) basic_machine=powerpc64-unknown - ;; - ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64le | powerpc64little | ppc64-le | powerpc64-little) - basic_machine=powerpc64le-unknown - ;; - ppc64le-* | powerpc64little-*) - basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ps2) - basic_machine=i386-ibm - ;; - pw32) - basic_machine=i586-unknown - os=-pw32 - ;; - rdos) - basic_machine=i386-pc - os=-rdos - ;; - rom68k) - basic_machine=m68k-rom68k - os=-coff - ;; - rm[46]00) - basic_machine=mips-siemens - ;; - rtpc | rtpc-*) - basic_machine=romp-ibm - ;; - s390 | s390-*) - basic_machine=s390-ibm - ;; - s390x | s390x-*) - basic_machine=s390x-ibm - ;; - sa29200) - basic_machine=a29k-amd - os=-udi - ;; - sb1) - basic_machine=mipsisa64sb1-unknown - ;; - sb1el) - basic_machine=mipsisa64sb1el-unknown - ;; - sde) - basic_machine=mipsisa32-sde - os=-elf - ;; - sei) - basic_machine=mips-sei - os=-seiux - ;; - sequent) - basic_machine=i386-sequent - ;; - sh) - basic_machine=sh-hitachi - os=-hms - ;; - sh5el) - basic_machine=sh5le-unknown - ;; - sh64) - basic_machine=sh64-unknown - ;; - sparclite-wrs | simso-wrs) - basic_machine=sparclite-wrs - os=-vxworks - ;; - sps7) - basic_machine=m68k-bull - os=-sysv2 - ;; - spur) - basic_machine=spur-unknown - ;; - st2000) - basic_machine=m68k-tandem - ;; - stratus) - basic_machine=i860-stratus - os=-sysv4 - ;; - strongarm-* | thumb-*) - basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - sun2) - basic_machine=m68000-sun - ;; - sun2os3) - basic_machine=m68000-sun - os=-sunos3 - ;; - sun2os4) - basic_machine=m68000-sun - os=-sunos4 - ;; - sun3os3) - basic_machine=m68k-sun - os=-sunos3 - ;; - sun3os4) - basic_machine=m68k-sun - os=-sunos4 - ;; - sun4os3) - basic_machine=sparc-sun - os=-sunos3 - ;; - sun4os4) - basic_machine=sparc-sun - os=-sunos4 - ;; - sun4sol2) - basic_machine=sparc-sun - os=-solaris2 - ;; - sun3 | sun3-*) - basic_machine=m68k-sun - ;; - sun4) - basic_machine=sparc-sun - ;; - sun386 | sun386i | roadrunner) - basic_machine=i386-sun - ;; - sv1) - basic_machine=sv1-cray - os=-unicos - ;; - symmetry) - basic_machine=i386-sequent - os=-dynix - ;; - t3e) - basic_machine=alphaev5-cray - os=-unicos - ;; - t90) - basic_machine=t90-cray - os=-unicos - ;; - tile*) - basic_machine=$basic_machine-unknown - os=-linux-gnu - ;; - tx39) - basic_machine=mipstx39-unknown - ;; - tx39el) - basic_machine=mipstx39el-unknown - ;; - toad1) - basic_machine=pdp10-xkl - os=-tops20 - ;; - tower | tower-32) - basic_machine=m68k-ncr - ;; - tpf) - basic_machine=s390x-ibm - os=-tpf - ;; - udi29k) - basic_machine=a29k-amd - os=-udi - ;; - ultra3) - basic_machine=a29k-nyu - os=-sym1 - ;; - v810 | necv810) - basic_machine=v810-nec - os=-none - ;; - vaxv) - basic_machine=vax-dec - os=-sysv - ;; - vms) - basic_machine=vax-dec - os=-vms - ;; - vpp*|vx|vx-*) - basic_machine=f301-fujitsu - ;; - vxworks960) - basic_machine=i960-wrs - os=-vxworks - ;; - vxworks68) - basic_machine=m68k-wrs - os=-vxworks - ;; - vxworks29k) - basic_machine=a29k-wrs - os=-vxworks - ;; - w65*) - basic_machine=w65-wdc - os=-none - ;; - w89k-*) - basic_machine=hppa1.1-winbond - os=-proelf - ;; - xbox) - basic_machine=i686-pc - os=-mingw32 - ;; - xps | xps100) - basic_machine=xps100-honeywell - ;; - xscale-* | xscalee[bl]-*) - basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` - ;; - ymp) - basic_machine=ymp-cray - os=-unicos - ;; - z8k-*-coff) - basic_machine=z8k-unknown - os=-sim - ;; - z80-*-coff) - basic_machine=z80-unknown - os=-sim - ;; - none) - basic_machine=none-none - os=-none - ;; - -# Here we handle the default manufacturer of certain CPU types. It is in -# some cases the only manufacturer, in others, it is the most popular. - w89k) - basic_machine=hppa1.1-winbond - ;; - op50n) - basic_machine=hppa1.1-oki - ;; - op60c) - basic_machine=hppa1.1-oki - ;; - romp) - basic_machine=romp-ibm - ;; - mmix) - basic_machine=mmix-knuth - ;; - rs6000) - basic_machine=rs6000-ibm - ;; - vax) - basic_machine=vax-dec - ;; - pdp10) - # there are many clones, so DEC is not a safe bet - basic_machine=pdp10-unknown - ;; - pdp11) - basic_machine=pdp11-dec - ;; - we32k) - basic_machine=we32k-att - ;; - sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) - basic_machine=sh-unknown - ;; - sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) - basic_machine=sparc-sun - ;; - cydra) - basic_machine=cydra-cydrome - ;; - orion) - basic_machine=orion-highlevel - ;; - orion105) - basic_machine=clipper-highlevel - ;; - mac | mpw | mac-mpw) - basic_machine=m68k-apple - ;; - pmac | pmac-mpw) - basic_machine=powerpc-apple - ;; - *-unknown) - # Make sure to match an already-canonicalized machine name. - ;; - *) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; -esac - -# Here we canonicalize certain aliases for manufacturers. -case $basic_machine in - *-digital*) - basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` - ;; - *-commodore*) - basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` - ;; - *) - ;; -esac - -# Decode manufacturer-specific aliases for certain operating systems. - -if [ x"$os" != x"" ] -then -case $os in - # First match some system type aliases - # that might get confused with valid system types. - # -solaris* is a basic system type, with this one exception. - -auroraux) - os=-auroraux - ;; - -solaris1 | -solaris1.*) - os=`echo $os | sed -e 's|solaris1|sunos4|'` - ;; - -solaris) - os=-solaris2 - ;; - -svr4*) - os=-sysv4 - ;; - -unixware*) - os=-sysv4.2uw - ;; - -gnu/linux*) - os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` - ;; - # First accept the basic system types. - # The portable systems comes first. - # Each alternative MUST END IN A *, to match a version number. - # -sysv* is not here because it comes later, after sysvr4. - -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ - | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ - | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ - | -sym* | -kopensolaris* \ - | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* | -aros* \ - | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ - | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -openbsd* | -solidbsd* \ - | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ - | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ - | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ - | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* | -cegcc* \ - | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -linux-android* \ - | -linux-newlib* | -linux-uclibc* \ - | -uxpv* | -beos* | -mpeix* | -udk* \ - | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ - | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ - | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ - | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ - | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ - | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) - # Remember, each alternative MUST END IN *, to match a version number. - ;; - -qnx*) - case $basic_machine in - x86-* | i*86-*) - ;; - *) - os=-nto$os - ;; - esac - ;; - -nto-qnx*) - ;; - -nto*) - os=`echo $os | sed -e 's|nto|nto-qnx|'` - ;; - -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ - | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ - | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) - ;; - -mac*) - os=`echo $os | sed -e 's|mac|macos|'` - ;; - -linux-dietlibc) - os=-linux-dietlibc - ;; - -linux*) - os=`echo $os | sed -e 's|linux|linux-gnu|'` - ;; - -sunos5*) - os=`echo $os | sed -e 's|sunos5|solaris2|'` - ;; - -sunos6*) - os=`echo $os | sed -e 's|sunos6|solaris3|'` - ;; - -opened*) - os=-openedition - ;; - -os400*) - os=-os400 - ;; - -wince*) - os=-wince - ;; - -osfrose*) - os=-osfrose - ;; - -osf*) - os=-osf - ;; - -utek*) - os=-bsd - ;; - -dynix*) - os=-bsd - ;; - -acis*) - os=-aos - ;; - -atheos*) - os=-atheos - ;; - -syllable*) - os=-syllable - ;; - -386bsd) - os=-bsd - ;; - -ctix* | -uts*) - os=-sysv - ;; - -nova*) - os=-rtmk-nova - ;; - -ns2 ) - os=-nextstep2 - ;; - -nsk*) - os=-nsk - ;; - # Preserve the version number of sinix5. - -sinix5.*) - os=`echo $os | sed -e 's|sinix|sysv|'` - ;; - -sinix*) - os=-sysv4 - ;; - -tpf*) - os=-tpf - ;; - -triton*) - os=-sysv3 - ;; - -oss*) - os=-sysv3 - ;; - -svr4) - os=-sysv4 - ;; - -svr3) - os=-sysv3 - ;; - -sysvr4) - os=-sysv4 - ;; - # This must come after -sysvr4. - -sysv*) - ;; - -ose*) - os=-ose - ;; - -es1800*) - os=-ose - ;; - -xenix) - os=-xenix - ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) - os=-mint - ;; - -aros*) - os=-aros - ;; - -kaos*) - os=-kaos - ;; - -zvmoe) - os=-zvmoe - ;; - -dicos*) - os=-dicos - ;; - -nacl*) - ;; - -none) - ;; - *) - # Get rid of the `-' at the beginning of $os. - os=`echo $os | sed 's/[^-]*-//'` - echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 - exit 1 - ;; -esac -else - -# Here we handle the default operating systems that come with various machines. -# The value should be what the vendor currently ships out the door with their -# machine or put another way, the most popular os provided with the machine. - -# Note that if you're going to try to match "-MANUFACTURER" here (say, -# "-sun"), then you have to tell the case statement up towards the top -# that MANUFACTURER isn't an operating system. Otherwise, code above -# will signal an error saying that MANUFACTURER isn't an operating -# system, and we'll never get to this point. - -case $basic_machine in - score-*) - os=-elf - ;; - spu-*) - os=-elf - ;; - *-acorn) - os=-riscix1.2 - ;; - arm*-rebel) - os=-linux - ;; - arm*-semi) - os=-aout - ;; - c4x-* | tic4x-*) - os=-coff - ;; - tic54x-*) - os=-coff - ;; - tic55x-*) - os=-coff - ;; - tic6x-*) - os=-coff - ;; - # This must come before the *-dec entry. - pdp10-*) - os=-tops20 - ;; - pdp11-*) - os=-none - ;; - *-dec | vax-*) - os=-ultrix4.2 - ;; - m68*-apollo) - os=-domain - ;; - i386-sun) - os=-sunos4.0.2 - ;; - m68000-sun) - os=-sunos3 - ;; - m68*-cisco) - os=-aout - ;; - mep-*) - os=-elf - ;; - mips*-cisco) - os=-elf - ;; - mips*-*) - os=-elf - ;; - or32-*) - os=-coff - ;; - *-tti) # must be before sparc entry or we get the wrong os. - os=-sysv3 - ;; - sparc-* | *-sun) - os=-sunos4.1.1 - ;; - *-be) - os=-beos - ;; - *-haiku) - os=-haiku - ;; - *-ibm) - os=-aix - ;; - *-knuth) - os=-mmixware - ;; - *-wec) - os=-proelf - ;; - *-winbond) - os=-proelf - ;; - *-oki) - os=-proelf - ;; - *-hp) - os=-hpux - ;; - *-hitachi) - os=-hiux - ;; - i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) - os=-sysv - ;; - *-cbm) - os=-amigaos - ;; - *-dg) - os=-dgux - ;; - *-dolphin) - os=-sysv3 - ;; - m68k-ccur) - os=-rtu - ;; - m88k-omron*) - os=-luna - ;; - *-next ) - os=-nextstep - ;; - *-sequent) - os=-ptx - ;; - *-crds) - os=-unos - ;; - *-ns) - os=-genix - ;; - i370-*) - os=-mvs - ;; - *-next) - os=-nextstep3 - ;; - *-gould) - os=-sysv - ;; - *-highlevel) - os=-bsd - ;; - *-encore) - os=-bsd - ;; - *-sgi) - os=-irix - ;; - *-siemens) - os=-sysv4 - ;; - *-masscomp) - os=-rtu - ;; - f30[01]-fujitsu | f700-fujitsu) - os=-uxpv - ;; - *-rom68k) - os=-coff - ;; - *-*bug) - os=-coff - ;; - *-apple) - os=-macos - ;; - *-atari*) - os=-mint - ;; - *) - os=-none - ;; -esac -fi - -# Here we handle the case where we know the os, and the CPU type, but not the -# manufacturer. We pick the logical manufacturer. -vendor=unknown -case $basic_machine in - *-unknown) - case $os in - -riscix*) - vendor=acorn - ;; - -sunos*) - vendor=sun - ;; - -cnk*|-aix*) - vendor=ibm - ;; - -beos*) - vendor=be - ;; - -hpux*) - vendor=hp - ;; - -mpeix*) - vendor=hp - ;; - -hiux*) - vendor=hitachi - ;; - -unos*) - vendor=crds - ;; - -dgux*) - vendor=dg - ;; - -luna*) - vendor=omron - ;; - -genix*) - vendor=ns - ;; - -mvs* | -opened*) - vendor=ibm - ;; - -os400*) - vendor=ibm - ;; - -ptx*) - vendor=sequent - ;; - -tpf*) - vendor=ibm - ;; - -vxsim* | -vxworks* | -windiss*) - vendor=wrs - ;; - -aux*) - vendor=apple - ;; - -hms*) - vendor=hitachi - ;; - -mpw* | -macos*) - vendor=apple - ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) - vendor=atari - ;; - -vos*) - vendor=stratus - ;; - esac - basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` - ;; -esac - -echo $basic_machine$os -exit - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/configure mariadb-5.5-5.5.40/extra/jemalloc/configure --- mariadb-5.5-5.5.39/extra/jemalloc/configure 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/configure 1970-01-01 00:00:00.000000000 +0000 @@ -1,8339 +0,0 @@ -#! /bin/sh -# Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.68. -# -# -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, -# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software -# Foundation, Inc. -# -# -# This configure script is free software; the Free Software Foundation -# gives unlimited permission to copy, distribute and modify it. -## -------------------- ## -## M4sh Initialization. ## -## -------------------- ## - -# Be more Bourne compatible -DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : - emulate sh - NULLCMD=: - # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in #( - *posix*) : - set -o posix ;; #( - *) : - ;; -esac -fi - - -as_nl=' -' -export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi - -# The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then - PATH_SEPARATOR=: - (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { - (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || - PATH_SEPARATOR=';' - } -fi - - -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - -# Find who we are. Look in the path if we contain no directory separator. -as_myself= -case $0 in #(( - *[\\/]* ) as_myself=$0 ;; - *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break - done -IFS=$as_save_IFS - - ;; -esac -# We did not find ourselves, most probably we were run as `sh COMMAND' -# in which case we are not to be found in the path. -if test "x$as_myself" = x; then - as_myself=$0 -fi -if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 - exit 1 -fi - -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH - -if test "x$CONFIG_SHELL" = x; then - as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : - emulate sh - NULLCMD=: - # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which - # is contrary to our usage. Disable this feature. - alias -g '\${1+\"\$@\"}'='\"\$@\"' - setopt NO_GLOB_SUBST -else - case \`(set -o) 2>/dev/null\` in #( - *posix*) : - set -o posix ;; #( - *) : - ;; -esac -fi -" - as_required="as_fn_return () { (exit \$1); } -as_fn_success () { as_fn_return 0; } -as_fn_failure () { as_fn_return 1; } -as_fn_ret_success () { return 0; } -as_fn_ret_failure () { return 1; } - -exitcode=0 -as_fn_success || { exitcode=1; echo as_fn_success failed.; } -as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } -as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } -as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } -if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : - -else - exitcode=1; echo positional parameters were not saved. -fi -test x\$exitcode = x0 || exit 1" - as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO - as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO - eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && - test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 -test \$(( 1 + 1 )) = 2 || exit 1" - if (eval "$as_required") 2>/dev/null; then : - as_have_required=yes -else - as_have_required=no -fi - if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : - -else - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -as_found=false -for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - as_found=: - case $as_dir in #( - /*) - for as_base in sh bash ksh sh5; do - # Try only shells that exist, to save several forks. - as_shell=$as_dir/$as_base - if { test -f "$as_shell" || test -f "$as_shell.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : - CONFIG_SHELL=$as_shell as_have_required=yes - if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : - break 2 -fi -fi - done;; - esac - as_found=false -done -$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : - CONFIG_SHELL=$SHELL as_have_required=yes -fi; } -IFS=$as_save_IFS - - - if test "x$CONFIG_SHELL" != x; then : - # We cannot yet assume a decent shell, so we have to provide a - # neutralization value for shells without unset; and this also - # works around shells that cannot unset nonexistent variables. - # Preserve -v and -x to the replacement shell. - BASH_ENV=/dev/null - ENV=/dev/null - (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV - export CONFIG_SHELL - case $- in # (((( - *v*x* | *x*v* ) as_opts=-vx ;; - *v* ) as_opts=-v ;; - *x* ) as_opts=-x ;; - * ) as_opts= ;; - esac - exec "$CONFIG_SHELL" $as_opts "$as_myself" ${1+"$@"} -fi - - if test x$as_have_required = xno; then : - $as_echo "$0: This script requires a shell more modern than all" - $as_echo "$0: the shells that I found on your system." - if test x${ZSH_VERSION+set} = xset ; then - $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" - $as_echo "$0: be upgraded to zsh 4.3.4 or later." - else - $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, -$0: including any error possibly output before this -$0: message. Then install a modern shell, or manually run -$0: the script under such a shell if you do have one." - fi - exit 1 -fi -fi -fi -SHELL=${CONFIG_SHELL-/bin/sh} -export SHELL -# Unset more variables known to interfere with behavior of common tools. -CLICOLOR_FORCE= GREP_OPTIONS= -unset CLICOLOR_FORCE GREP_OPTIONS - -## --------------------- ## -## M4sh Shell Functions. ## -## --------------------- ## -# as_fn_unset VAR -# --------------- -# Portably unset VAR. -as_fn_unset () -{ - { eval $1=; unset $1;} -} -as_unset=as_fn_unset - -# as_fn_set_status STATUS -# ----------------------- -# Set $? to STATUS, without forking. -as_fn_set_status () -{ - return $1 -} # as_fn_set_status - -# as_fn_exit STATUS -# ----------------- -# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. -as_fn_exit () -{ - set +e - as_fn_set_status $1 - exit $1 -} # as_fn_exit - -# as_fn_mkdir_p -# ------------- -# Create "$as_dir" as a directory, including parents if necessary. -as_fn_mkdir_p () -{ - - case $as_dir in #( - -*) as_dir=./$as_dir;; - esac - test -d "$as_dir" || eval $as_mkdir_p || { - as_dirs= - while :; do - case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( - *) as_qdir=$as_dir;; - esac - as_dirs="'$as_qdir' $as_dirs" - as_dir=`$as_dirname -- "$as_dir" || -$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_dir" : 'X\(//\)[^/]' \| \ - X"$as_dir" : 'X\(//\)$' \| \ - X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - test -d "$as_dir" && break - done - test -z "$as_dirs" || eval "mkdir $as_dirs" - } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" - - -} # as_fn_mkdir_p -# as_fn_append VAR VALUE -# ---------------------- -# Append the text in VALUE to the end of the definition contained in VAR. Take -# advantage of any shell optimizations that allow amortized linear growth over -# repeated appends, instead of the typical quadratic growth present in naive -# implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : - eval 'as_fn_append () - { - eval $1+=\$2 - }' -else - as_fn_append () - { - eval $1=\$$1\$2 - } -fi # as_fn_append - -# as_fn_arith ARG... -# ------------------ -# Perform arithmetic evaluation on the ARGs, and store the result in the -# global $as_val. Take advantage of shells that can avoid forks. The arguments -# must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : - eval 'as_fn_arith () - { - as_val=$(( $* )) - }' -else - as_fn_arith () - { - as_val=`expr "$@" || test $? -eq 1` - } -fi # as_fn_arith - - -# as_fn_error STATUS ERROR [LINENO LOG_FD] -# ---------------------------------------- -# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are -# provided, also output the error to LOG_FD, referencing LINENO. Then exit the -# script with STATUS, using 1 if that was 0. -as_fn_error () -{ - as_status=$1; test $as_status -eq 0 && as_status=1 - if test "$4"; then - as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 - fi - $as_echo "$as_me: error: $2" >&2 - as_fn_exit $as_status -} # as_fn_error - -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then - as_basename=basename -else - as_basename=false -fi - -if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then - as_dirname=dirname -else - as_dirname=false -fi - -as_me=`$as_basename -- "$0" || -$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - -# Avoid depending upon Character Ranges. -as_cr_letters='abcdefghijklmnopqrstuvwxyz' -as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' -as_cr_Letters=$as_cr_letters$as_cr_LETTERS -as_cr_digits='0123456789' -as_cr_alnum=$as_cr_Letters$as_cr_digits - - - as_lineno_1=$LINENO as_lineno_1a=$LINENO - as_lineno_2=$LINENO as_lineno_2a=$LINENO - eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && - test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { - # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) - sed -n ' - p - /[$]LINENO/= - ' <$as_myself | - sed ' - s/[$]LINENO.*/&-/ - t lineno - b - :lineno - N - :loop - s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ - t loop - s/-\n.*// - ' >$as_me.lineno && - chmod +x "$as_me.lineno" || - { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } - - # Don't try to exec as it changes $[0], causing all sort of problems - # (the dirname of $[0] is not the place where we might find the - # original and so on. Autoconf is especially sensitive to this). - . "./$as_me.lineno" - # Exit status is that of the last command. - exit -} - -ECHO_C= ECHO_N= ECHO_T= -case `echo -n x` in #((((( --n*) - case `echo 'xy\c'` in - *c*) ECHO_T=' ';; # ECHO_T is single tab character. - xy) ECHO_C='\c';; - *) echo `echo ksh88 bug on AIX 6.1` > /dev/null - ECHO_T=' ';; - esac;; -*) - ECHO_N='-n';; -esac - -rm -f conf$$ conf$$.exe conf$$.file -if test -d conf$$.dir; then - rm -f conf$$.dir/conf$$.file -else - rm -f conf$$.dir - mkdir conf$$.dir 2>/dev/null -fi -if (echo >conf$$.file) 2>/dev/null; then - if ln -s conf$$.file conf$$ 2>/dev/null; then - as_ln_s='ln -s' - # ... but there are two gotchas: - # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. - # In both cases, we have to default to `cp -p'. - ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || - as_ln_s='cp -p' - elif ln conf$$.file conf$$ 2>/dev/null; then - as_ln_s=ln - else - as_ln_s='cp -p' - fi -else - as_ln_s='cp -p' -fi -rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file -rmdir conf$$.dir 2>/dev/null - -if mkdir -p . 2>/dev/null; then - as_mkdir_p='mkdir -p "$as_dir"' -else - test -d ./-p && rmdir ./-p - as_mkdir_p=false -fi - -if test -x / >/dev/null 2>&1; then - as_test_x='test -x' -else - if ls -dL / >/dev/null 2>&1; then - as_ls_L_option=L - else - as_ls_L_option= - fi - as_test_x=' - eval sh -c '\'' - if test -d "$1"; then - test -d "$1/."; - else - case $1 in #( - -*)set "./$1";; - esac; - case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #(( - ???[sx]*):;;*)false;;esac;fi - '\'' sh - ' -fi -as_executable_p=$as_test_x - -# Sed expression to map a string onto a valid CPP name. -as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" - -# Sed expression to map a string onto a valid variable name. -as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" - - -test -n "$DJDIR" || exec 7<&0 &1 - -# Name of the host. -# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, -# so uname gets run too. -ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` - -# -# Initializations. -# -ac_default_prefix=/usr/local -ac_clean_files= -ac_config_libobj_dir=. -LIBOBJS= -cross_compiling=no -subdirs= -MFLAGS= -MAKEFLAGS= - -# Identity of this package. -PACKAGE_NAME= -PACKAGE_TARNAME= -PACKAGE_VERSION= -PACKAGE_STRING= -PACKAGE_BUGREPORT= -PACKAGE_URL= - -ac_unique_file="Makefile.in" -# Factoring default headers for most tests. -ac_includes_default="\ -#include -#ifdef HAVE_SYS_TYPES_H -# include -#endif -#ifdef HAVE_SYS_STAT_H -# include -#endif -#ifdef STDC_HEADERS -# include -# include -#else -# ifdef HAVE_STDLIB_H -# include -# endif -#endif -#ifdef HAVE_STRING_H -# if !defined STDC_HEADERS && defined HAVE_MEMORY_H -# include -# endif -# include -#endif -#ifdef HAVE_STRINGS_H -# include -#endif -#ifdef HAVE_INTTYPES_H -# include -#endif -#ifdef HAVE_STDINT_H -# include -#endif -#ifdef HAVE_UNISTD_H -# include -#endif" - -ac_subst_vars='LTLIBOBJS -LIBOBJS -cfgoutputs_out -cfgoutputs_in -cfghdrs_out -cfghdrs_in -enable_zone_allocator -enable_tls -enable_lazy_lock -jemalloc_version_gid -jemalloc_version_nrev -jemalloc_version_bugfix -jemalloc_version_minor -jemalloc_version_major -jemalloc_version -enable_xmalloc -enable_valgrind -enable_utrace -enable_fill -enable_dss -enable_munmap -enable_mremap -enable_tcache -enable_prof -enable_stats -enable_debug -install_suffix -enable_experimental -AUTOCONF -LD -AR -RANLIB -INSTALL_DATA -INSTALL_SCRIPT -INSTALL_PROGRAM -enable_autogen -RPATH_EXTRA -CC_MM -MKLIB -LDTARGET -CTARGET -PIC_CFLAGS -SOREV -EXTRA_LDFLAGS -DSO_LDFLAGS -libprefix -exe -a -o -importlib -so -LD_PRELOAD_VAR -RPATH -abi -host_os -host_vendor -host_cpu -host -build_os -build_vendor -build_cpu -build -EGREP -GREP -CPP -OBJEXT -EXEEXT -ac_ct_CC -CPPFLAGS -LDFLAGS -CFLAGS -CC -XSLROOT -XSLTPROC -MANDIR -DATADIR -LIBDIR -INCLUDEDIR -BINDIR -PREFIX -abs_objroot -objroot -abs_srcroot -srcroot -rev -target_alias -host_alias -build_alias -LIBS -ECHO_T -ECHO_N -ECHO_C -DEFS -mandir -localedir -libdir -psdir -pdfdir -dvidir -htmldir -infodir -docdir -oldincludedir -includedir -localstatedir -sharedstatedir -sysconfdir -datadir -datarootdir -libexecdir -sbindir -bindir -program_transform_name -prefix -exec_prefix -PACKAGE_URL -PACKAGE_BUGREPORT -PACKAGE_STRING -PACKAGE_VERSION -PACKAGE_TARNAME -PACKAGE_NAME -PATH_SEPARATOR -SHELL' -ac_subst_files='' -ac_user_opts=' -enable_option_checking -with_xslroot -with_rpath -enable_autogen -enable_experimental -with_mangling -with_jemalloc_prefix -with_export -with_private_namespace -with_install_suffix -enable_cc_silence -enable_debug -enable_ivsalloc -enable_stats -enable_prof -enable_prof_libunwind -with_static_libunwind -enable_prof_libgcc -enable_prof_gcc -enable_tcache -enable_mremap -enable_munmap -enable_dss -enable_fill -enable_utrace -enable_valgrind -enable_xmalloc -enable_lazy_lock -enable_tls -enable_zone_allocator -' - ac_precious_vars='build_alias -host_alias -target_alias -CC -CFLAGS -LDFLAGS -LIBS -CPPFLAGS -CPP' - - -# Initialize some variables set by options. -ac_init_help= -ac_init_version=false -ac_unrecognized_opts= -ac_unrecognized_sep= -# The variables have the same names as the options, with -# dashes changed to underlines. -cache_file=/dev/null -exec_prefix=NONE -no_create= -no_recursion= -prefix=NONE -program_prefix=NONE -program_suffix=NONE -program_transform_name=s,x,x, -silent= -site= -srcdir= -verbose= -x_includes=NONE -x_libraries=NONE - -# Installation directory options. -# These are left unexpanded so users can "make install exec_prefix=/foo" -# and all the variables that are supposed to be based on exec_prefix -# by default will actually change. -# Use braces instead of parens because sh, perl, etc. also accept them. -# (The list follows the same order as the GNU Coding Standards.) -bindir='${exec_prefix}/bin' -sbindir='${exec_prefix}/sbin' -libexecdir='${exec_prefix}/libexec' -datarootdir='${prefix}/share' -datadir='${datarootdir}' -sysconfdir='${prefix}/etc' -sharedstatedir='${prefix}/com' -localstatedir='${prefix}/var' -includedir='${prefix}/include' -oldincludedir='/usr/include' -docdir='${datarootdir}/doc/${PACKAGE}' -infodir='${datarootdir}/info' -htmldir='${docdir}' -dvidir='${docdir}' -pdfdir='${docdir}' -psdir='${docdir}' -libdir='${exec_prefix}/lib' -localedir='${datarootdir}/locale' -mandir='${datarootdir}/man' - -ac_prev= -ac_dashdash= -for ac_option -do - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval $ac_prev=\$ac_option - ac_prev= - continue - fi - - case $ac_option in - *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; - *=) ac_optarg= ;; - *) ac_optarg=yes ;; - esac - - # Accept the important Cygnus configure options, so we can diagnose typos. - - case $ac_dashdash$ac_option in - --) - ac_dashdash=yes ;; - - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=bindir ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - bindir=$ac_optarg ;; - - -build | --build | --buil | --bui | --bu) - ac_prev=build_alias ;; - -build=* | --build=* | --buil=* | --bui=* | --bu=*) - build_alias=$ac_optarg ;; - - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - cache_file=$ac_optarg ;; - - --config-cache | -C) - cache_file=config.cache ;; - - -datadir | --datadir | --datadi | --datad) - ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=*) - datadir=$ac_optarg ;; - - -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ - | --dataroo | --dataro | --datar) - ac_prev=datarootdir ;; - -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ - | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) - datarootdir=$ac_optarg ;; - - -disable-* | --disable-*) - ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" - ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"enable_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval enable_$ac_useropt=no ;; - - -docdir | --docdir | --docdi | --doc | --do) - ac_prev=docdir ;; - -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) - docdir=$ac_optarg ;; - - -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) - ac_prev=dvidir ;; - -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) - dvidir=$ac_optarg ;; - - -enable-* | --enable-*) - ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" - ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"enable_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval enable_$ac_useropt=\$ac_optarg ;; - - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ - | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ - | --exec | --exe | --ex) - ac_prev=exec_prefix ;; - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ - | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ - | --exec=* | --exe=* | --ex=*) - exec_prefix=$ac_optarg ;; - - -gas | --gas | --ga | --g) - # Obsolete; use --with-gas. - with_gas=yes ;; - - -help | --help | --hel | --he | -h) - ac_init_help=long ;; - -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) - ac_init_help=recursive ;; - -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) - ac_init_help=short ;; - - -host | --host | --hos | --ho) - ac_prev=host_alias ;; - -host=* | --host=* | --hos=* | --ho=*) - host_alias=$ac_optarg ;; - - -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) - ac_prev=htmldir ;; - -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ - | --ht=*) - htmldir=$ac_optarg ;; - - -includedir | --includedir | --includedi | --included | --include \ - | --includ | --inclu | --incl | --inc) - ac_prev=includedir ;; - -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ - | --includ=* | --inclu=* | --incl=* | --inc=*) - includedir=$ac_optarg ;; - - -infodir | --infodir | --infodi | --infod | --info | --inf) - ac_prev=infodir ;; - -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) - infodir=$ac_optarg ;; - - -libdir | --libdir | --libdi | --libd) - ac_prev=libdir ;; - -libdir=* | --libdir=* | --libdi=* | --libd=*) - libdir=$ac_optarg ;; - - -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ - | --libexe | --libex | --libe) - ac_prev=libexecdir ;; - -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ - | --libexe=* | --libex=* | --libe=*) - libexecdir=$ac_optarg ;; - - -localedir | --localedir | --localedi | --localed | --locale) - ac_prev=localedir ;; - -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) - localedir=$ac_optarg ;; - - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst | --locals) - ac_prev=localstatedir ;; - -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) - localstatedir=$ac_optarg ;; - - -mandir | --mandir | --mandi | --mand | --man | --ma | --m) - ac_prev=mandir ;; - -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) - mandir=$ac_optarg ;; - - -nfp | --nfp | --nf) - # Obsolete; use --without-fp. - with_fp=no ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c | -n) - no_create=yes ;; - - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) - no_recursion=yes ;; - - -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ - | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ - | --oldin | --oldi | --old | --ol | --o) - ac_prev=oldincludedir ;; - -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ - | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ - | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) - oldincludedir=$ac_optarg ;; - - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - ac_prev=prefix ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix=$ac_optarg ;; - - -program-prefix | --program-prefix | --program-prefi | --program-pref \ - | --program-pre | --program-pr | --program-p) - ac_prev=program_prefix ;; - -program-prefix=* | --program-prefix=* | --program-prefi=* \ - | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) - program_prefix=$ac_optarg ;; - - -program-suffix | --program-suffix | --program-suffi | --program-suff \ - | --program-suf | --program-su | --program-s) - ac_prev=program_suffix ;; - -program-suffix=* | --program-suffix=* | --program-suffi=* \ - | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) - program_suffix=$ac_optarg ;; - - -program-transform-name | --program-transform-name \ - | --program-transform-nam | --program-transform-na \ - | --program-transform-n | --program-transform- \ - | --program-transform | --program-transfor \ - | --program-transfo | --program-transf \ - | --program-trans | --program-tran \ - | --progr-tra | --program-tr | --program-t) - ac_prev=program_transform_name ;; - -program-transform-name=* | --program-transform-name=* \ - | --program-transform-nam=* | --program-transform-na=* \ - | --program-transform-n=* | --program-transform-=* \ - | --program-transform=* | --program-transfor=* \ - | --program-transfo=* | --program-transf=* \ - | --program-trans=* | --program-tran=* \ - | --progr-tra=* | --program-tr=* | --program-t=*) - program_transform_name=$ac_optarg ;; - - -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) - ac_prev=pdfdir ;; - -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) - pdfdir=$ac_optarg ;; - - -psdir | --psdir | --psdi | --psd | --ps) - ac_prev=psdir ;; - -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) - psdir=$ac_optarg ;; - - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - silent=yes ;; - - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) - ac_prev=sbindir ;; - -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ - | --sbi=* | --sb=*) - sbindir=$ac_optarg ;; - - -sharedstatedir | --sharedstatedir | --sharedstatedi \ - | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ - | --sharedst | --shareds | --shared | --share | --shar \ - | --sha | --sh) - ac_prev=sharedstatedir ;; - -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ - | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ - | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ - | --sha=* | --sh=*) - sharedstatedir=$ac_optarg ;; - - -site | --site | --sit) - ac_prev=site ;; - -site=* | --site=* | --sit=*) - site=$ac_optarg ;; - - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - srcdir=$ac_optarg ;; - - -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ - | --syscon | --sysco | --sysc | --sys | --sy) - ac_prev=sysconfdir ;; - -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ - | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) - sysconfdir=$ac_optarg ;; - - -target | --target | --targe | --targ | --tar | --ta | --t) - ac_prev=target_alias ;; - -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) - target_alias=$ac_optarg ;; - - -v | -verbose | --verbose | --verbos | --verbo | --verb) - verbose=yes ;; - - -version | --version | --versio | --versi | --vers | -V) - ac_init_version=: ;; - - -with-* | --with-*) - ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" - ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"with_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval with_$ac_useropt=\$ac_optarg ;; - - -without-* | --without-*) - ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` - # Reject names that are not valid shell variable names. - expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" - ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` - case $ac_user_opts in - *" -"with_$ac_useropt" -"*) ;; - *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" - ac_unrecognized_sep=', ';; - esac - eval with_$ac_useropt=no ;; - - --x) - # Obsolete; use --with-x. - with_x=yes ;; - - -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ - | --x-incl | --x-inc | --x-in | --x-i) - ac_prev=x_includes ;; - -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ - | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) - x_includes=$ac_optarg ;; - - -x-libraries | --x-libraries | --x-librarie | --x-librari \ - | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) - ac_prev=x_libraries ;; - -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ - | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) - x_libraries=$ac_optarg ;; - - -*) as_fn_error $? "unrecognized option: \`$ac_option' -Try \`$0 --help' for more information" - ;; - - *=*) - ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` - # Reject names that are not valid shell variable names. - case $ac_envvar in #( - '' | [0-9]* | *[!_$as_cr_alnum]* ) - as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; - esac - eval $ac_envvar=\$ac_optarg - export $ac_envvar ;; - - *) - # FIXME: should be removed in autoconf 3.0. - $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 - expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && - $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 - : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" - ;; - - esac -done - -if test -n "$ac_prev"; then - ac_option=--`echo $ac_prev | sed 's/_/-/g'` - as_fn_error $? "missing argument to $ac_option" -fi - -if test -n "$ac_unrecognized_opts"; then - case $enable_option_checking in - no) ;; - fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; - *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; - esac -fi - -# Check all directory arguments for consistency. -for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ - datadir sysconfdir sharedstatedir localstatedir includedir \ - oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir -do - eval ac_val=\$$ac_var - # Remove trailing slashes. - case $ac_val in - */ ) - ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` - eval $ac_var=\$ac_val;; - esac - # Be sure to have absolute directory names. - case $ac_val in - [\\/$]* | ?:[\\/]* ) continue;; - NONE | '' ) case $ac_var in *prefix ) continue;; esac;; - esac - as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" -done - -# There might be people who depend on the old broken behavior: `$host' -# used to hold the argument of --host etc. -# FIXME: To remove some day. -build=$build_alias -host=$host_alias -target=$target_alias - -# FIXME: To remove some day. -if test "x$host_alias" != x; then - if test "x$build_alias" = x; then - cross_compiling=maybe - $as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host. - If a cross compiler is detected then cross compile mode will be used" >&2 - elif test "x$build_alias" != "x$host_alias"; then - cross_compiling=yes - fi -fi - -ac_tool_prefix= -test -n "$host_alias" && ac_tool_prefix=$host_alias- - -test "$silent" = yes && exec 6>/dev/null - - -ac_pwd=`pwd` && test -n "$ac_pwd" && -ac_ls_di=`ls -di .` && -ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || - as_fn_error $? "working directory cannot be determined" -test "X$ac_ls_di" = "X$ac_pwd_ls_di" || - as_fn_error $? "pwd does not report name of working directory" - - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - ac_srcdir_defaulted=yes - # Try the directory containing this script, then the parent directory. - ac_confdir=`$as_dirname -- "$as_myself" || -$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_myself" : 'X\(//\)[^/]' \| \ - X"$as_myself" : 'X\(//\)$' \| \ - X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_myself" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - srcdir=$ac_confdir - if test ! -r "$srcdir/$ac_unique_file"; then - srcdir=.. - fi -else - ac_srcdir_defaulted=no -fi -if test ! -r "$srcdir/$ac_unique_file"; then - test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." - as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" -fi -ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" -ac_abs_confdir=`( - cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" - pwd)` -# When building in place, set srcdir=. -if test "$ac_abs_confdir" = "$ac_pwd"; then - srcdir=. -fi -# Remove unnecessary trailing slashes from srcdir. -# Double slashes in file names in object file debugging info -# mess up M-x gdb in Emacs. -case $srcdir in -*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; -esac -for ac_var in $ac_precious_vars; do - eval ac_env_${ac_var}_set=\${${ac_var}+set} - eval ac_env_${ac_var}_value=\$${ac_var} - eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} - eval ac_cv_env_${ac_var}_value=\$${ac_var} -done - -# -# Report the --help message. -# -if test "$ac_init_help" = "long"; then - # Omit some internal or obsolete options to make the list less imposing. - # This message is too long to be a string in the A/UX 3.1 sh. - cat <<_ACEOF -\`configure' configures this package to adapt to many kinds of systems. - -Usage: $0 [OPTION]... [VAR=VALUE]... - -To assign environment variables (e.g., CC, CFLAGS...), specify them as -VAR=VALUE. See below for descriptions of some of the useful variables. - -Defaults for the options are specified in brackets. - -Configuration: - -h, --help display this help and exit - --help=short display options specific to this package - --help=recursive display the short help of all the included packages - -V, --version display version information and exit - -q, --quiet, --silent do not print \`checking ...' messages - --cache-file=FILE cache test results in FILE [disabled] - -C, --config-cache alias for \`--cache-file=config.cache' - -n, --no-create do not create output files - --srcdir=DIR find the sources in DIR [configure dir or \`..'] - -Installation directories: - --prefix=PREFIX install architecture-independent files in PREFIX - [$ac_default_prefix] - --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX - [PREFIX] - -By default, \`make install' will install all the files in -\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify -an installation prefix other than \`$ac_default_prefix' using \`--prefix', -for instance \`--prefix=\$HOME'. - -For better control, use the options below. - -Fine tuning of the installation directories: - --bindir=DIR user executables [EPREFIX/bin] - --sbindir=DIR system admin executables [EPREFIX/sbin] - --libexecdir=DIR program executables [EPREFIX/libexec] - --sysconfdir=DIR read-only single-machine data [PREFIX/etc] - --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] - --localstatedir=DIR modifiable single-machine data [PREFIX/var] - --libdir=DIR object code libraries [EPREFIX/lib] - --includedir=DIR C header files [PREFIX/include] - --oldincludedir=DIR C header files for non-gcc [/usr/include] - --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] - --datadir=DIR read-only architecture-independent data [DATAROOTDIR] - --infodir=DIR info documentation [DATAROOTDIR/info] - --localedir=DIR locale-dependent data [DATAROOTDIR/locale] - --mandir=DIR man documentation [DATAROOTDIR/man] - --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE] - --htmldir=DIR html documentation [DOCDIR] - --dvidir=DIR dvi documentation [DOCDIR] - --pdfdir=DIR pdf documentation [DOCDIR] - --psdir=DIR ps documentation [DOCDIR] -_ACEOF - - cat <<\_ACEOF - -System types: - --build=BUILD configure for building on BUILD [guessed] - --host=HOST cross-compile to build programs to run on HOST [BUILD] -_ACEOF -fi - -if test -n "$ac_init_help"; then - - cat <<\_ACEOF - -Optional Features: - --disable-option-checking ignore unrecognized --enable/--with options - --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) - --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --enable-autogen Automatically regenerate configure output - --disable-experimental Disable support for the experimental API - --enable-cc-silence Silence irrelevant compiler warnings - --enable-debug Build debugging code (implies --enable-ivsalloc) - --enable-ivsalloc Validate pointers passed through the public API - --disable-stats Disable statistics calculation/reporting - --enable-prof Enable allocation profiling - --enable-prof-libunwind Use libunwind for backtracing - --disable-prof-libgcc Do not use libgcc for backtracing - --disable-prof-gcc Do not use gcc intrinsics for backtracing - --disable-tcache Disable per thread caches - --enable-mremap Enable mremap(2) for huge realloc() - --disable-munmap Disable VM deallocation via munmap(2) - --enable-dss Enable allocation from DSS - --disable-fill Disable support for junk/zero filling, quarantine, - and redzones - --enable-utrace Enable utrace(2)-based tracing - --disable-valgrind Disable support for Valgrind - --enable-xmalloc Support xmalloc option - --enable-lazy-lock Enable lazy locking (only lock when multi-threaded) - --disable-tls Disable thread-local storage (__thread keyword) - --disable-zone-allocator - Disable zone allocator for Darwin - -Optional Packages: - --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] - --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) - --with-xslroot= XSL stylesheet root path - --with-rpath= Colon-separated rpath (ELF systems only) - --with-mangling= Mangle symbols in - --with-jemalloc-prefix= - Prefix to prepend to all public APIs - --without-export disable exporting jemalloc public APIs - --with-private-namespace= - Prefix to prepend to all library-private APIs - --with-install-suffix= - Suffix to append to all installed files - --with-static-libunwind= - Path to static libunwind library; use rather than - dynamically linking - -Some influential environment variables: - CC C compiler command - CFLAGS C compiler flags - LDFLAGS linker flags, e.g. -L if you have libraries in a - nonstandard directory - LIBS libraries to pass to the linker, e.g. -l - CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if - you have headers in a nonstandard directory - CPP C preprocessor - -Use these variables to override the choices made by `configure' or to help -it to find libraries and programs with nonstandard names/locations. - -Report bugs to the package provider. -_ACEOF -ac_status=$? -fi - -if test "$ac_init_help" = "recursive"; then - # If there are subdirs, report their specific --help. - for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue - test -d "$ac_dir" || - { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || - continue - ac_builddir=. - -case "$ac_dir" in -.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; -*) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` - # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` - case $ac_top_builddir_sub in - "") ac_top_builddir_sub=. ac_top_build_prefix= ;; - *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; - esac ;; -esac -ac_abs_top_builddir=$ac_pwd -ac_abs_builddir=$ac_pwd$ac_dir_suffix -# for backward compatibility: -ac_top_builddir=$ac_top_build_prefix - -case $srcdir in - .) # We are building in place. - ac_srcdir=. - ac_top_srcdir=$ac_top_builddir_sub - ac_abs_top_srcdir=$ac_pwd ;; - [\\/]* | ?:[\\/]* ) # Absolute name. - ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir - ac_abs_top_srcdir=$srcdir ;; - *) # Relative name. - ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_build_prefix$srcdir - ac_abs_top_srcdir=$ac_pwd/$srcdir ;; -esac -ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix - - cd "$ac_dir" || { ac_status=$?; continue; } - # Check for guested configure. - if test -f "$ac_srcdir/configure.gnu"; then - echo && - $SHELL "$ac_srcdir/configure.gnu" --help=recursive - elif test -f "$ac_srcdir/configure"; then - echo && - $SHELL "$ac_srcdir/configure" --help=recursive - else - $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 - fi || ac_status=$? - cd "$ac_pwd" || { ac_status=$?; break; } - done -fi - -test -n "$ac_init_help" && exit $ac_status -if $ac_init_version; then - cat <<\_ACEOF -configure -generated by GNU Autoconf 2.68 - -Copyright (C) 2010 Free Software Foundation, Inc. -This configure script is free software; the Free Software Foundation -gives unlimited permission to copy, distribute and modify it. -_ACEOF - exit -fi - -## ------------------------ ## -## Autoconf initialization. ## -## ------------------------ ## - -# ac_fn_c_try_compile LINENO -# -------------------------- -# Try to compile conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_compile () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext - if { { ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_compile") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - grep -v '^ *+' conftest.err >conftest.er1 - cat conftest.er1 >&5 - mv -f conftest.er1 conftest.err - fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then : - ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=1 -fi - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_compile - -# ac_fn_c_try_cpp LINENO -# ---------------------- -# Try to preprocess conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_cpp () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if { { ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - grep -v '^ *+' conftest.err >conftest.er1 - cat conftest.er1 >&5 - mv -f conftest.er1 conftest.err - fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } > conftest.i && { - test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || - test ! -s conftest.err - }; then : - ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=1 -fi - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_cpp - -# ac_fn_c_try_run LINENO -# ---------------------- -# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes -# that executables *can* be run. -ac_fn_c_try_run () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' - { { case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : - ac_retval=0 -else - $as_echo "$as_me: program exited with status $ac_status" >&5 - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=$ac_status -fi - rm -rf conftest.dSYM conftest_ipa8_conftest.oo - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_run - -# ac_fn_c_compute_int LINENO EXPR VAR INCLUDES -# -------------------------------------------- -# Tries to find the compile-time value of EXPR in a program that includes -# INCLUDES, setting VAR accordingly. Returns whether the value could be -# computed -ac_fn_c_compute_int () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if test "$cross_compiling" = yes; then - # Depending upon the size, compute the lo and hi bounds. -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main () -{ -static int test_array [1 - 2 * !(($2) >= 0)]; -test_array [0] = 0 - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_lo=0 ac_mid=0 - while :; do - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main () -{ -static int test_array [1 - 2 * !(($2) <= $ac_mid)]; -test_array [0] = 0 - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_hi=$ac_mid; break -else - as_fn_arith $ac_mid + 1 && ac_lo=$as_val - if test $ac_lo -le $ac_mid; then - ac_lo= ac_hi= - break - fi - as_fn_arith 2 '*' $ac_mid + 1 && ac_mid=$as_val -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - done -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main () -{ -static int test_array [1 - 2 * !(($2) < 0)]; -test_array [0] = 0 - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_hi=-1 ac_mid=-1 - while :; do - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main () -{ -static int test_array [1 - 2 * !(($2) >= $ac_mid)]; -test_array [0] = 0 - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_lo=$ac_mid; break -else - as_fn_arith '(' $ac_mid ')' - 1 && ac_hi=$as_val - if test $ac_mid -le $ac_hi; then - ac_lo= ac_hi= - break - fi - as_fn_arith 2 '*' $ac_mid && ac_mid=$as_val -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - done -else - ac_lo= ac_hi= -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -# Binary search between lo and hi bounds. -while test "x$ac_lo" != "x$ac_hi"; do - as_fn_arith '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo && ac_mid=$as_val - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main () -{ -static int test_array [1 - 2 * !(($2) <= $ac_mid)]; -test_array [0] = 0 - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_hi=$ac_mid -else - as_fn_arith '(' $ac_mid ')' + 1 && ac_lo=$as_val -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -done -case $ac_lo in #(( -?*) eval "$3=\$ac_lo"; ac_retval=0 ;; -'') ac_retval=1 ;; -esac - else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -static long int longval () { return $2; } -static unsigned long int ulongval () { return $2; } -#include -#include -int -main () -{ - - FILE *f = fopen ("conftest.val", "w"); - if (! f) - return 1; - if (($2) < 0) - { - long int i = longval (); - if (i != ($2)) - return 1; - fprintf (f, "%ld", i); - } - else - { - unsigned long int i = ulongval (); - if (i != ($2)) - return 1; - fprintf (f, "%lu", i); - } - /* Do not output a trailing newline, as this causes \r\n confusion - on some platforms. */ - return ferror (f) || fclose (f) != 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_run "$LINENO"; then : - echo >>conftest.val; read $3 &5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -#include <$2> -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - eval "$3=yes" -else - eval "$3=no" -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_header_compile - -# ac_fn_c_try_link LINENO -# ----------------------- -# Try to link conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_link () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext conftest$ac_exeext - if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - grep -v '^ *+' conftest.err >conftest.er1 - cat conftest.er1 >&5 - mv -f conftest.er1 conftest.err - fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && { - test "$cross_compiling" = yes || - $as_test_x conftest$ac_exeext - }; then : - ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=1 -fi - # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information - # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would - # interfere with the next link command; also delete a directory that is - # left behind by Apple's compiler. We do this before executing the actions. - rm -rf conftest.dSYM conftest_ipa8_conftest.oo - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_link - -# ac_fn_c_check_func LINENO FUNC VAR -# ---------------------------------- -# Tests whether FUNC exists, setting the cache variable VAR accordingly -ac_fn_c_check_func () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -/* Define $2 to an innocuous variant, in case declares $2. - For example, HP-UX 11i declares gettimeofday. */ -#define $2 innocuous_$2 - -/* System header to define __stub macros and hopefully few prototypes, - which can conflict with char $2 (); below. - Prefer to if __STDC__ is defined, since - exists even on freestanding compilers. */ - -#ifdef __STDC__ -# include -#else -# include -#endif - -#undef $2 - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char $2 (); -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined __stub_$2 || defined __stub___$2 -choke me -#endif - -int -main () -{ -return $2 (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - eval "$3=yes" -else - eval "$3=no" -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_func - -# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES -# ------------------------------------------------------- -# Tests whether HEADER exists, giving a warning if it cannot be compiled using -# the include files in INCLUDES and setting the cache variable VAR -# accordingly. -ac_fn_c_check_header_mongrel () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if eval \${$3+:} false; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } -else - # Is the header compilable? -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 -$as_echo_n "checking $2 usability... " >&6; } -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -#include <$2> -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_header_compiler=yes -else - ac_header_compiler=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 -$as_echo "$ac_header_compiler" >&6; } - -# Is the header present? -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 -$as_echo_n "checking $2 presence... " >&6; } -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include <$2> -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - ac_header_preproc=yes -else - ac_header_preproc=no -fi -rm -f conftest.err conftest.i conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 -$as_echo "$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( - yes:no: ) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 -$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 -$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} - ;; - no:yes:* ) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 -$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 -$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 -$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 -$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 -$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} - ;; -esac - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else - eval "$3=\$ac_header_compiler" -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } -fi - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_header_mongrel - -# ac_fn_c_check_type LINENO TYPE VAR INCLUDES -# ------------------------------------------- -# Tests whether TYPE exists after having included INCLUDES, setting cache -# variable VAR accordingly. -ac_fn_c_check_type () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -$as_echo_n "checking for $2... " >&6; } -if eval \${$3+:} false; then : - $as_echo_n "(cached) " >&6 -else - eval "$3=no" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main () -{ -if (sizeof ($2)) - return 0; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main () -{ -if (sizeof (($2))) - return 0; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - -else - eval "$3=yes" -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -eval ac_res=\$$3 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_type -cat >config.log <<_ACEOF -This file contains any messages produced by compilers while -running configure, to aid debugging if configure makes a mistake. - -It was created by $as_me, which was -generated by GNU Autoconf 2.68. Invocation command line was - - $ $0 $@ - -_ACEOF -exec 5>>config.log -{ -cat <<_ASUNAME -## --------- ## -## Platform. ## -## --------- ## - -hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` -uname -m = `(uname -m) 2>/dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` - -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` - -/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` -/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` -/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` -/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` - -_ASUNAME - -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - $as_echo "PATH: $as_dir" - done -IFS=$as_save_IFS - -} >&5 - -cat >&5 <<_ACEOF - - -## ----------- ## -## Core tests. ## -## ----------- ## - -_ACEOF - - -# Keep a trace of the command line. -# Strip out --no-create and --no-recursion so they do not pile up. -# Strip out --silent because we don't want to record it for future runs. -# Also quote any args containing shell meta-characters. -# Make two passes to allow for proper duplicate-argument suppression. -ac_configure_args= -ac_configure_args0= -ac_configure_args1= -ac_must_keep_next=false -for ac_pass in 1 2 -do - for ac_arg - do - case $ac_arg in - -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - continue ;; - *\'*) - ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; - esac - case $ac_pass in - 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; - 2) - as_fn_append ac_configure_args1 " '$ac_arg'" - if test $ac_must_keep_next = true; then - ac_must_keep_next=false # Got value, back to normal. - else - case $ac_arg in - *=* | --config-cache | -C | -disable-* | --disable-* \ - | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ - | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ - | -with-* | --with-* | -without-* | --without-* | --x) - case "$ac_configure_args0 " in - "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; - esac - ;; - -* ) ac_must_keep_next=true ;; - esac - fi - as_fn_append ac_configure_args " '$ac_arg'" - ;; - esac - done -done -{ ac_configure_args0=; unset ac_configure_args0;} -{ ac_configure_args1=; unset ac_configure_args1;} - -# When interrupted or exit'd, cleanup temporary files, and complete -# config.log. We remove comments because anyway the quotes in there -# would cause problems or look ugly. -# WARNING: Use '\'' to represent an apostrophe within the trap. -# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. -trap 'exit_status=$? - # Save into config.log some information that might help in debugging. - { - echo - - $as_echo "## ---------------- ## -## Cache variables. ## -## ---------------- ##" - echo - # The following way of writing the cache mishandles newlines in values, -( - for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do - eval ac_val=\$$ac_var - case $ac_val in #( - *${as_nl}*) - case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; - esac - case $ac_var in #( - _ | IFS | as_nl) ;; #( - BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( - *) { eval $ac_var=; unset $ac_var;} ;; - esac ;; - esac - done - (set) 2>&1 | - case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( - *${as_nl}ac_space=\ *) - sed -n \ - "s/'\''/'\''\\\\'\'''\''/g; - s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" - ;; #( - *) - sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" - ;; - esac | - sort -) - echo - - $as_echo "## ----------------- ## -## Output variables. ## -## ----------------- ##" - echo - for ac_var in $ac_subst_vars - do - eval ac_val=\$$ac_var - case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; - esac - $as_echo "$ac_var='\''$ac_val'\''" - done | sort - echo - - if test -n "$ac_subst_files"; then - $as_echo "## ------------------- ## -## File substitutions. ## -## ------------------- ##" - echo - for ac_var in $ac_subst_files - do - eval ac_val=\$$ac_var - case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; - esac - $as_echo "$ac_var='\''$ac_val'\''" - done | sort - echo - fi - - if test -s confdefs.h; then - $as_echo "## ----------- ## -## confdefs.h. ## -## ----------- ##" - echo - cat confdefs.h - echo - fi - test "$ac_signal" != 0 && - $as_echo "$as_me: caught signal $ac_signal" - $as_echo "$as_me: exit $exit_status" - } >&5 - rm -f core *.core core.conftest.* && - rm -f -r conftest* confdefs* conf$$* $ac_clean_files && - exit $exit_status -' 0 -for ac_signal in 1 2 13 15; do - trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal -done -ac_signal=0 - -# confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -f -r conftest* confdefs.h - -$as_echo "/* confdefs.h */" > confdefs.h - -# Predefined preprocessor variables. - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_NAME "$PACKAGE_NAME" -_ACEOF - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_TARNAME "$PACKAGE_TARNAME" -_ACEOF - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_VERSION "$PACKAGE_VERSION" -_ACEOF - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_STRING "$PACKAGE_STRING" -_ACEOF - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" -_ACEOF - -cat >>confdefs.h <<_ACEOF -#define PACKAGE_URL "$PACKAGE_URL" -_ACEOF - - -# Let the site file select an alternate cache file if it wants to. -# Prefer an explicitly selected file to automatically selected ones. -ac_site_file1=NONE -ac_site_file2=NONE -if test -n "$CONFIG_SITE"; then - # We do not want a PATH search for config.site. - case $CONFIG_SITE in #(( - -*) ac_site_file1=./$CONFIG_SITE;; - */*) ac_site_file1=$CONFIG_SITE;; - *) ac_site_file1=./$CONFIG_SITE;; - esac -elif test "x$prefix" != xNONE; then - ac_site_file1=$prefix/share/config.site - ac_site_file2=$prefix/etc/config.site -else - ac_site_file1=$ac_default_prefix/share/config.site - ac_site_file2=$ac_default_prefix/etc/config.site -fi -for ac_site_file in "$ac_site_file1" "$ac_site_file2" -do - test "x$ac_site_file" = xNONE && continue - if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 -$as_echo "$as_me: loading site script $ac_site_file" >&6;} - sed 's/^/| /' "$ac_site_file" >&5 - . "$ac_site_file" \ - || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "failed to load site script $ac_site_file -See \`config.log' for more details" "$LINENO" 5; } - fi -done - -if test -r "$cache_file"; then - # Some versions of bash will fail to source /dev/null (special files - # actually), so we avoid doing that. DJGPP emulates it as a regular file. - if test /dev/null != "$cache_file" && test -f "$cache_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 -$as_echo "$as_me: loading cache $cache_file" >&6;} - case $cache_file in - [\\/]* | ?:[\\/]* ) . "$cache_file";; - *) . "./$cache_file";; - esac - fi -else - { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 -$as_echo "$as_me: creating cache $cache_file" >&6;} - >$cache_file -fi - -# Check that the precious variables saved in the cache have kept the same -# value. -ac_cache_corrupted=false -for ac_var in $ac_precious_vars; do - eval ac_old_set=\$ac_cv_env_${ac_var}_set - eval ac_new_set=\$ac_env_${ac_var}_set - eval ac_old_val=\$ac_cv_env_${ac_var}_value - eval ac_new_val=\$ac_env_${ac_var}_value - case $ac_old_set,$ac_new_set in - set,) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} - ac_cache_corrupted=: ;; - ,set) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} - ac_cache_corrupted=: ;; - ,);; - *) - if test "x$ac_old_val" != "x$ac_new_val"; then - # differences in whitespace do not lead to failure. - ac_old_val_w=`echo x $ac_old_val` - ac_new_val_w=`echo x $ac_new_val` - if test "$ac_old_val_w" != "$ac_new_val_w"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 -$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} - ac_cache_corrupted=: - else - { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 -$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} - eval $ac_var=\$ac_old_val - fi - { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 -$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 -$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} - fi;; - esac - # Pass precious variables to config.status. - if test "$ac_new_set" = set; then - case $ac_new_val in - *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; - *) ac_arg=$ac_var=$ac_new_val ;; - esac - case " $ac_configure_args " in - *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. - *) as_fn_append ac_configure_args " '$ac_arg'" ;; - esac - fi -done -if $ac_cache_corrupted; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 -$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} - as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 -fi -## -------------------- ## -## Main body of script. ## -## -------------------- ## - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - - - - - - - - -rev=1 - - -srcroot=$srcdir -if test "x${srcroot}" = "x." ; then - srcroot="" -else - srcroot="${srcroot}/" -fi - -abs_srcroot="`cd \"${srcdir}\"; pwd`/" - - -objroot="" - -abs_objroot="`pwd`/" - - -if test "x$prefix" = "xNONE" ; then - prefix="/usr/local" -fi -if test "x$exec_prefix" = "xNONE" ; then - exec_prefix=$prefix -fi -PREFIX=$prefix - -BINDIR=`eval echo $bindir` -BINDIR=`eval echo $BINDIR` - -INCLUDEDIR=`eval echo $includedir` -INCLUDEDIR=`eval echo $INCLUDEDIR` - -LIBDIR=`eval echo $libdir` -LIBDIR=`eval echo $LIBDIR` - -DATADIR=`eval echo $datadir` -DATADIR=`eval echo $DATADIR` - -MANDIR=`eval echo $mandir` -MANDIR=`eval echo $MANDIR` - - -# Extract the first word of "xsltproc", so it can be a program name with args. -set dummy xsltproc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_path_XSLTPROC+:} false; then : - $as_echo_n "(cached) " >&6 -else - case $XSLTPROC in - [\\/]* | ?:[\\/]*) - ac_cv_path_XSLTPROC="$XSLTPROC" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_path_XSLTPROC="$as_dir/$ac_word$ac_exec_ext" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - - test -z "$ac_cv_path_XSLTPROC" && ac_cv_path_XSLTPROC="false" - ;; -esac -fi -XSLTPROC=$ac_cv_path_XSLTPROC -if test -n "$XSLTPROC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $XSLTPROC" >&5 -$as_echo "$XSLTPROC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -if test -d "/usr/share/xml/docbook/stylesheet/docbook-xsl" ; then - DEFAULT_XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" -elif test -d "/usr/share/sgml/docbook/xsl-stylesheets" ; then - DEFAULT_XSLROOT="/usr/share/sgml/docbook/xsl-stylesheets" -else - DEFAULT_XSLROOT="" -fi - -# Check whether --with-xslroot was given. -if test "${with_xslroot+set}" = set; then : - withval=$with_xslroot; -if test "x$with_xslroot" = "xno" ; then - XSLROOT="${DEFAULT_XSLROOT}" -else - XSLROOT="${with_xslroot}" -fi - -else - XSLROOT="${DEFAULT_XSLROOT}" - -fi - - - -CFLAGS=$CFLAGS -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. -set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CC="${ac_tool_prefix}gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_CC"; then - ac_ct_CC=$CC - # Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_CC="gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -else - CC="$ac_cv_prog_CC" -fi - -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. -set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CC="${ac_tool_prefix}cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - fi -fi -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - ac_prog_rejected=no -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# != 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" - fi -fi -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - for ac_prog in cl.exe - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$CC" && break - done -fi -if test -z "$CC"; then - ac_ct_CC=$CC - for ac_prog in cl.exe -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_CC="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$ac_ct_CC" && break -done - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -fi - -fi - - -test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "no acceptable C compiler found in \$PATH -See \`config.log' for more details" "$LINENO" 5; } - -# Provide some information about the compiler. -$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 -set X $ac_compile -ac_compiler=$2 -for ac_option in --version -v -V -qversion; do - { { ac_try="$ac_compiler $ac_option >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_compiler $ac_option >&5") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - sed '10a\ -... rest of stderr output deleted ... - 10q' conftest.err >conftest.er1 - cat conftest.er1 >&5 - fi - rm -f conftest.er1 conftest.err - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } -done - -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -ac_clean_files_save=$ac_clean_files -ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" -# Try to create an executable without -o first, disregard a.out. -# It will help us diagnose broken compilers, and finding out an intuition -# of exeext. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 -$as_echo_n "checking whether the C compiler works... " >&6; } -ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` - -# The possible output files: -ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" - -ac_rmfiles= -for ac_file in $ac_files -do - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; - * ) ac_rmfiles="$ac_rmfiles $ac_file";; - esac -done -rm -f $ac_rmfiles - -if { { ac_try="$ac_link_default" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link_default") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : - # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. -# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' -# in a Makefile. We should not override ac_cv_exeext if it was cached, -# so that the user can short-circuit this test for compilers unknown to -# Autoconf. -for ac_file in $ac_files '' -do - test -f "$ac_file" || continue - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) - ;; - [ab].out ) - # We found the default executable, but exeext='' is most - # certainly right. - break;; - *.* ) - if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; - then :; else - ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - fi - # We set ac_cv_exeext here because the later test for it is not - # safe: cross compilers may not add the suffix if given an `-o' - # argument, so we may need to know it at that point already. - # Even if this section looks crufty: it has the advantage of - # actually working. - break;; - * ) - break;; - esac -done -test "$ac_cv_exeext" = no && ac_cv_exeext= - -else - ac_file='' -fi -if test -z "$ac_file"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -$as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error 77 "C compiler cannot create executables -See \`config.log' for more details" "$LINENO" 5; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 -$as_echo_n "checking for C compiler default output file name... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 -$as_echo "$ac_file" >&6; } -ac_exeext=$ac_cv_exeext - -rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out -ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 -$as_echo_n "checking for suffix of executables... " >&6; } -if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : - # If both `conftest.exe' and `conftest' are `present' (well, observable) -# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will -# work properly (i.e., refer to `conftest.exe'), while it won't with -# `rm'. -for ac_file in conftest.exe conftest conftest.*; do - test -f "$ac_file" || continue - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; - *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - break;; - * ) break;; - esac -done -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot compute suffix of executables: cannot compile and link -See \`config.log' for more details" "$LINENO" 5; } -fi -rm -f conftest conftest$ac_cv_exeext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 -$as_echo "$ac_cv_exeext" >&6; } - -rm -f conftest.$ac_ext -EXEEXT=$ac_cv_exeext -ac_exeext=$EXEEXT -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -FILE *f = fopen ("conftest.out", "w"); - return ferror (f) || fclose (f) != 0; - - ; - return 0; -} -_ACEOF -ac_clean_files="$ac_clean_files conftest.out" -# Check that the compiler produces executables we can run. If not, either -# the compiler is broken, or we cross compile. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 -$as_echo_n "checking whether we are cross compiling... " >&6; } -if test "$cross_compiling" != yes; then - { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - if { ac_try='./conftest$ac_cv_exeext' - { { case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then - cross_compiling=no - else - if test "$cross_compiling" = maybe; then - cross_compiling=yes - else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot run C compiled programs. -If you meant to cross compile, use \`--host'. -See \`config.log' for more details" "$LINENO" 5; } - fi - fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 -$as_echo "$cross_compiling" >&6; } - -rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out -ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 -$as_echo_n "checking for suffix of object files... " >&6; } -if ${ac_cv_objext+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.o conftest.obj -if { { ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_compile") 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : - for ac_file in conftest.o conftest.obj conftest.*; do - test -f "$ac_file" || continue; - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; - *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` - break;; - esac -done -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot compute suffix of object files: cannot compile -See \`config.log' for more details" "$LINENO" 5; } -fi -rm -f conftest.$ac_cv_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 -$as_echo "$ac_cv_objext" >&6; } -OBJEXT=$ac_cv_objext -ac_objext=$OBJEXT -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 -$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } -if ${ac_cv_c_compiler_gnu+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ -#ifndef __GNUC__ - choke me -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_compiler_gnu=yes -else - ac_compiler_gnu=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -ac_cv_c_compiler_gnu=$ac_compiler_gnu - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 -$as_echo "$ac_cv_c_compiler_gnu" >&6; } -if test $ac_compiler_gnu = yes; then - GCC=yes -else - GCC= -fi -ac_test_CFLAGS=${CFLAGS+set} -ac_save_CFLAGS=$CFLAGS -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 -$as_echo_n "checking whether $CC accepts -g... " >&6; } -if ${ac_cv_prog_cc_g+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_save_c_werror_flag=$ac_c_werror_flag - ac_c_werror_flag=yes - ac_cv_prog_cc_g=no - CFLAGS="-g" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_g=yes -else - CFLAGS="" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - -else - ac_c_werror_flag=$ac_save_c_werror_flag - CFLAGS="-g" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_g=yes -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 -$as_echo "$ac_cv_prog_cc_g" >&6; } -if test "$ac_test_CFLAGS" = set; then - CFLAGS=$ac_save_CFLAGS -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 -$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } -if ${ac_cv_prog_cc_c89+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_cv_prog_cc_c89=no -ac_save_CC=$CC -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -#include -#include -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not '\xHH' hex character constants. - These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get - proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an - array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ -int osf4_cc_array ['\x00' == 0 ? 1 : -1]; - -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; - -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; -int -main () -{ -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - ; - return 0; -} -_ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" -do - CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_c89=$ac_arg -fi -rm -f core conftest.err conftest.$ac_objext - test "x$ac_cv_prog_cc_c89" != "xno" && break -done -rm -f conftest.$ac_ext -CC=$ac_save_CC - -fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -$as_echo "none needed" >&6; } ;; - xno) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -$as_echo "unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c89" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 -$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; -esac -if test "x$ac_cv_prog_cc_c89" != xno; then : - -fi - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -if test "x$GCC" != "xyes" ; then - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is MSVC" >&5 -$as_echo_n "checking whether compiler is MSVC... " >&6; } -if ${je_cv_msvc+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - -#ifndef _MSC_VER - int fail-1; -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - je_cv_msvc=yes -else - je_cv_msvc=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_msvc" >&5 -$as_echo "$je_cv_msvc" >&6; } -fi - -if test "x$CFLAGS" = "x" ; then - no_CFLAGS="yes" - if test "x$GCC" = "xyes" ; then - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu99" >&5 -$as_echo_n "checking whether compiler supports -std=gnu99... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-std=gnu99" -else - CFLAGS="${CFLAGS} -std=gnu99" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wall" >&5 -$as_echo_n "checking whether compiler supports -Wall... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-Wall" -else - CFLAGS="${CFLAGS} -Wall" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -pipe" >&5 -$as_echo_n "checking whether compiler supports -pipe... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-pipe" -else - CFLAGS="${CFLAGS} -pipe" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -g3" >&5 -$as_echo_n "checking whether compiler supports -g3... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-g3" -else - CFLAGS="${CFLAGS} -g3" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - elif test "x$je_cv_msvc" = "xyes" ; then - CC="$CC -nologo" - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Zi" >&5 -$as_echo_n "checking whether compiler supports -Zi... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-Zi" -else - CFLAGS="${CFLAGS} -Zi" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -MT" >&5 -$as_echo_n "checking whether compiler supports -MT... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-MT" -else - CFLAGS="${CFLAGS} -MT" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -W3" >&5 -$as_echo_n "checking whether compiler supports -W3... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-W3" -else - CFLAGS="${CFLAGS} -W3" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat" - fi -fi -if test "x$EXTRA_CFLAGS" != "x" ; then - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports $EXTRA_CFLAGS" >&5 -$as_echo_n "checking whether compiler supports $EXTRA_CFLAGS... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="$EXTRA_CFLAGS" -else - CFLAGS="${CFLAGS} $EXTRA_CFLAGS" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - -fi -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 -$as_echo_n "checking how to run the C preprocessor... " >&6; } -# On Suns, sometimes $CPP names a directory. -if test -n "$CPP" && test -d "$CPP"; then - CPP= -fi -if test -z "$CPP"; then - if ${ac_cv_prog_CPP+:} false; then : - $as_echo_n "(cached) " >&6 -else - # Double quotes because CPP needs to be expanded - for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" - do - ac_preproc_ok=false -for ac_c_preproc_warn_flag in '' yes -do - # Use a header file that comes with gcc, so configuring glibc - # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. "Syntax error" is here to catch this case. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif - Syntax error -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - -else - # Broken: fails on valid input. -continue -fi -rm -f conftest.err conftest.i conftest.$ac_ext - - # OK, works on sane cases. Now check whether nonexistent headers - # can be detected and how. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - # Broken: success on invalid input. -continue -else - # Passes both tests. -ac_preproc_ok=: -break -fi -rm -f conftest.err conftest.i conftest.$ac_ext - -done -# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. -rm -f conftest.i conftest.err conftest.$ac_ext -if $ac_preproc_ok; then : - break -fi - - done - ac_cv_prog_CPP=$CPP - -fi - CPP=$ac_cv_prog_CPP -else - ac_cv_prog_CPP=$CPP -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 -$as_echo "$CPP" >&6; } -ac_preproc_ok=false -for ac_c_preproc_warn_flag in '' yes -do - # Use a header file that comes with gcc, so configuring glibc - # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. "Syntax error" is here to catch this case. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif - Syntax error -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - -else - # Broken: fails on valid input. -continue -fi -rm -f conftest.err conftest.i conftest.$ac_ext - - # OK, works on sane cases. Now check whether nonexistent headers - # can be detected and how. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -_ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : - # Broken: success on invalid input. -continue -else - # Passes both tests. -ac_preproc_ok=: -break -fi -rm -f conftest.err conftest.i conftest.$ac_ext - -done -# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. -rm -f conftest.i conftest.err conftest.$ac_ext -if $ac_preproc_ok; then : - -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "C preprocessor \"$CPP\" fails sanity check -See \`config.log' for more details" "$LINENO" 5; } -fi - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 -$as_echo_n "checking for grep that handles long lines and -e... " >&6; } -if ${ac_cv_path_GREP+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -z "$GREP"; then - ac_path_GREP_found=false - # Loop through the user's path and test for each of PROGNAME-LIST - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in grep ggrep; do - for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" - { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue -# Check for GNU ac_path_GREP and select it if it is found. - # Check for GNU $ac_path_GREP -case `"$ac_path_GREP" --version 2>&1` in -*GNU*) - ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; -*) - ac_count=0 - $as_echo_n 0123456789 >"conftest.in" - while : - do - cat "conftest.in" "conftest.in" >"conftest.tmp" - mv "conftest.tmp" "conftest.in" - cp "conftest.in" "conftest.nl" - $as_echo 'GREP' >> "conftest.nl" - "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break - diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break - as_fn_arith $ac_count + 1 && ac_count=$as_val - if test $ac_count -gt ${ac_path_GREP_max-0}; then - # Best one so far, save it but keep looking for a better one - ac_cv_path_GREP="$ac_path_GREP" - ac_path_GREP_max=$ac_count - fi - # 10*(2^10) chars as input seems more than enough - test $ac_count -gt 10 && break - done - rm -f conftest.in conftest.tmp conftest.nl conftest.out;; -esac - - $ac_path_GREP_found && break 3 - done - done - done -IFS=$as_save_IFS - if test -z "$ac_cv_path_GREP"; then - as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 - fi -else - ac_cv_path_GREP=$GREP -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 -$as_echo "$ac_cv_path_GREP" >&6; } - GREP="$ac_cv_path_GREP" - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 -$as_echo_n "checking for egrep... " >&6; } -if ${ac_cv_path_EGREP+:} false; then : - $as_echo_n "(cached) " >&6 -else - if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 - then ac_cv_path_EGREP="$GREP -E" - else - if test -z "$EGREP"; then - ac_path_EGREP_found=false - # Loop through the user's path and test for each of PROGNAME-LIST - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_prog in egrep; do - for ac_exec_ext in '' $ac_executable_extensions; do - ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" - { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue -# Check for GNU ac_path_EGREP and select it if it is found. - # Check for GNU $ac_path_EGREP -case `"$ac_path_EGREP" --version 2>&1` in -*GNU*) - ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; -*) - ac_count=0 - $as_echo_n 0123456789 >"conftest.in" - while : - do - cat "conftest.in" "conftest.in" >"conftest.tmp" - mv "conftest.tmp" "conftest.in" - cp "conftest.in" "conftest.nl" - $as_echo 'EGREP' >> "conftest.nl" - "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break - diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break - as_fn_arith $ac_count + 1 && ac_count=$as_val - if test $ac_count -gt ${ac_path_EGREP_max-0}; then - # Best one so far, save it but keep looking for a better one - ac_cv_path_EGREP="$ac_path_EGREP" - ac_path_EGREP_max=$ac_count - fi - # 10*(2^10) chars as input seems more than enough - test $ac_count -gt 10 && break - done - rm -f conftest.in conftest.tmp conftest.nl conftest.out;; -esac - - $ac_path_EGREP_found && break 3 - done - done - done -IFS=$as_save_IFS - if test -z "$ac_cv_path_EGREP"; then - as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 - fi -else - ac_cv_path_EGREP=$EGREP -fi - - fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 -$as_echo "$ac_cv_path_EGREP" >&6; } - EGREP="$ac_cv_path_EGREP" - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 -$as_echo_n "checking for ANSI C header files... " >&6; } -if ${ac_cv_header_stdc+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -#include -#include - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_header_stdc=yes -else - ac_cv_header_stdc=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - -if test $ac_cv_header_stdc = yes; then - # SunOS 4.x string.h does not declare mem*, contrary to ANSI. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include - -_ACEOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "memchr" >/dev/null 2>&1; then : - -else - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include - -_ACEOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "free" >/dev/null 2>&1; then : - -else - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. - if test "$cross_compiling" = yes; then : - : -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -#include -#if ((' ' & 0x0FF) == 0x020) -# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') -# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) -#else -# define ISLOWER(c) \ - (('a' <= (c) && (c) <= 'i') \ - || ('j' <= (c) && (c) <= 'r') \ - || ('s' <= (c) && (c) <= 'z')) -# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) -#endif - -#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) -int -main () -{ - int i; - for (i = 0; i < 256; i++) - if (XOR (islower (i), ISLOWER (i)) - || toupper (i) != TOUPPER (i)) - return 2; - return 0; -} -_ACEOF -if ac_fn_c_try_run "$LINENO"; then : - -else - ac_cv_header_stdc=no -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext -fi - -fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 -$as_echo "$ac_cv_header_stdc" >&6; } -if test $ac_cv_header_stdc = yes; then - -$as_echo "#define STDC_HEADERS 1" >>confdefs.h - -fi - -# On IRIX 5.3, sys/types and inttypes.h are conflicting. -for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ - inttypes.h stdint.h unistd.h -do : - as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` -ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default -" -if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : - cat >>confdefs.h <<_ACEOF -#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF - -fi - -done - - -# The cast to long int works around a bug in the HP C Compiler -# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects -# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. -# This bug is HP SR number 8606223364. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5 -$as_echo_n "checking size of void *... " >&6; } -if ${ac_cv_sizeof_void_p+:} false; then : - $as_echo_n "(cached) " >&6 -else - if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (void *))" "ac_cv_sizeof_void_p" "$ac_includes_default"; then : - -else - if test "$ac_cv_type_void_p" = yes; then - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error 77 "cannot compute sizeof (void *) -See \`config.log' for more details" "$LINENO" 5; } - else - ac_cv_sizeof_void_p=0 - fi -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_void_p" >&5 -$as_echo "$ac_cv_sizeof_void_p" >&6; } - - - -cat >>confdefs.h <<_ACEOF -#define SIZEOF_VOID_P $ac_cv_sizeof_void_p -_ACEOF - - -if test "x${ac_cv_sizeof_void_p}" = "x8" ; then - LG_SIZEOF_PTR=3 -elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then - LG_SIZEOF_PTR=2 -else - as_fn_error $? "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5 -fi -cat >>confdefs.h <<_ACEOF -#define LG_SIZEOF_PTR $LG_SIZEOF_PTR -_ACEOF - - -# The cast to long int works around a bug in the HP C Compiler -# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects -# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. -# This bug is HP SR number 8606223364. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of int" >&5 -$as_echo_n "checking size of int... " >&6; } -if ${ac_cv_sizeof_int+:} false; then : - $as_echo_n "(cached) " >&6 -else - if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (int))" "ac_cv_sizeof_int" "$ac_includes_default"; then : - -else - if test "$ac_cv_type_int" = yes; then - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error 77 "cannot compute sizeof (int) -See \`config.log' for more details" "$LINENO" 5; } - else - ac_cv_sizeof_int=0 - fi -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_int" >&5 -$as_echo "$ac_cv_sizeof_int" >&6; } - - - -cat >>confdefs.h <<_ACEOF -#define SIZEOF_INT $ac_cv_sizeof_int -_ACEOF - - -if test "x${ac_cv_sizeof_int}" = "x8" ; then - LG_SIZEOF_INT=3 -elif test "x${ac_cv_sizeof_int}" = "x4" ; then - LG_SIZEOF_INT=2 -else - as_fn_error $? "Unsupported int size: ${ac_cv_sizeof_int}" "$LINENO" 5 -fi -cat >>confdefs.h <<_ACEOF -#define LG_SIZEOF_INT $LG_SIZEOF_INT -_ACEOF - - -# The cast to long int works around a bug in the HP C Compiler -# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects -# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. -# This bug is HP SR number 8606223364. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long" >&5 -$as_echo_n "checking size of long... " >&6; } -if ${ac_cv_sizeof_long+:} false; then : - $as_echo_n "(cached) " >&6 -else - if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (long))" "ac_cv_sizeof_long" "$ac_includes_default"; then : - -else - if test "$ac_cv_type_long" = yes; then - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error 77 "cannot compute sizeof (long) -See \`config.log' for more details" "$LINENO" 5; } - else - ac_cv_sizeof_long=0 - fi -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long" >&5 -$as_echo "$ac_cv_sizeof_long" >&6; } - - - -cat >>confdefs.h <<_ACEOF -#define SIZEOF_LONG $ac_cv_sizeof_long -_ACEOF - - -if test "x${ac_cv_sizeof_long}" = "x8" ; then - LG_SIZEOF_LONG=3 -elif test "x${ac_cv_sizeof_long}" = "x4" ; then - LG_SIZEOF_LONG=2 -else - as_fn_error $? "Unsupported long size: ${ac_cv_sizeof_long}" "$LINENO" 5 -fi -cat >>confdefs.h <<_ACEOF -#define LG_SIZEOF_LONG $LG_SIZEOF_LONG -_ACEOF - - -# The cast to long int works around a bug in the HP C Compiler -# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects -# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. -# This bug is HP SR number 8606223364. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of intmax_t" >&5 -$as_echo_n "checking size of intmax_t... " >&6; } -if ${ac_cv_sizeof_intmax_t+:} false; then : - $as_echo_n "(cached) " >&6 -else - if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (intmax_t))" "ac_cv_sizeof_intmax_t" "$ac_includes_default"; then : - -else - if test "$ac_cv_type_intmax_t" = yes; then - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error 77 "cannot compute sizeof (intmax_t) -See \`config.log' for more details" "$LINENO" 5; } - else - ac_cv_sizeof_intmax_t=0 - fi -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_intmax_t" >&5 -$as_echo "$ac_cv_sizeof_intmax_t" >&6; } - - - -cat >>confdefs.h <<_ACEOF -#define SIZEOF_INTMAX_T $ac_cv_sizeof_intmax_t -_ACEOF - - -if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then - LG_SIZEOF_INTMAX_T=4 -elif test "x${ac_cv_sizeof_intmax_t}" = "x8" ; then - LG_SIZEOF_INTMAX_T=3 -elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then - LG_SIZEOF_INTMAX_T=2 -else - as_fn_error $? "Unsupported intmax_t size: ${ac_cv_sizeof_intmax_t}" "$LINENO" 5 -fi -cat >>confdefs.h <<_ACEOF -#define LG_SIZEOF_INTMAX_T $LG_SIZEOF_INTMAX_T -_ACEOF - - -ac_aux_dir= -for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do - if test -f "$ac_dir/install-sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install-sh -c" - break - elif test -f "$ac_dir/install.sh"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install.sh -c" - break - elif test -f "$ac_dir/shtool"; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/shtool install -c" - break - fi -done -if test -z "$ac_aux_dir"; then - as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 -fi - -# These three variables are undocumented and unsupported, -# and are intended to be withdrawn in a future Autoconf release. -# They can cause serious problems if a builder's source tree is in a directory -# whose full name contains unusual characters. -ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. -ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. -ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. - - -# Make sure we can run config.sub. -$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || - as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 -$as_echo_n "checking build system type... " >&6; } -if ${ac_cv_build+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_build_alias=$build_alias -test "x$ac_build_alias" = x && - ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` -test "x$ac_build_alias" = x && - as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 -ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || - as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 -$as_echo "$ac_cv_build" >&6; } -case $ac_cv_build in -*-*-*) ;; -*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; -esac -build=$ac_cv_build -ac_save_IFS=$IFS; IFS='-' -set x $ac_cv_build -shift -build_cpu=$1 -build_vendor=$2 -shift; shift -# Remember, the first character of IFS is used to create $*, -# except with old shells: -build_os=$* -IFS=$ac_save_IFS -case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 -$as_echo_n "checking host system type... " >&6; } -if ${ac_cv_host+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test "x$host_alias" = x; then - ac_cv_host=$ac_cv_build -else - ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || - as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 -$as_echo "$ac_cv_host" >&6; } -case $ac_cv_host in -*-*-*) ;; -*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; -esac -host=$ac_cv_host -ac_save_IFS=$IFS; IFS='-' -set x $ac_cv_host -shift -host_cpu=$1 -host_vendor=$2 -shift; shift -# Remember, the first character of IFS is used to create $*, -# except with old shells: -host_os=$* -IFS=$ac_save_IFS -case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac - - -CPU_SPINWAIT="" -case "${host_cpu}" in - i[345]86) - ;; - i686) - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __asm__ is compilable" >&5 -$as_echo_n "checking whether __asm__ is compilable... " >&6; } -if ${je_cv_asm+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ -__asm__ volatile("pause"); return 0; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_asm=yes -else - je_cv_asm=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_asm" >&5 -$as_echo "$je_cv_asm" >&6; } - - if test "x${je_cv_asm}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' - fi - ;; - x86_64) - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __asm__ syntax is compilable" >&5 -$as_echo_n "checking whether __asm__ syntax is compilable... " >&6; } -if ${je_cv_asm+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ -__asm__ volatile("pause"); return 0; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_asm=yes -else - je_cv_asm=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_asm" >&5 -$as_echo "$je_cv_asm" >&6; } - - if test "x${je_cv_asm}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' - fi - ;; - *) - ;; -esac -cat >>confdefs.h <<_ACEOF -#define CPU_SPINWAIT $CPU_SPINWAIT -_ACEOF - - -LD_PRELOAD_VAR="LD_PRELOAD" -so="so" -importlib="${so}" -o="$ac_objext" -a="a" -exe="$ac_exeext" -libprefix="lib" -DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' -RPATH='-Wl,-rpath,$(1)' -SOREV="${so}.${rev}" -PIC_CFLAGS='-fPIC -DPIC' -CTARGET='-o $@' -LDTARGET='-o $@' -EXTRA_LDFLAGS= -MKLIB='ar crus $@' -CC_MM=1 - -default_munmap="1" -JEMALLOC_USABLE_SIZE_CONST="const" -case "${host}" in - *-*-darwin*) - CFLAGS="$CFLAGS" - abi="macho" - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h - - RPATH="" - LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" - so="dylib" - importlib="${so}" - force_tls="0" - DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' - SOREV="${rev}.${so}" - ;; - *-*-freebsd*) - CFLAGS="$CFLAGS" - abi="elf" - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h - - force_lazy_lock="1" - ;; - *-*-linux*) - CFLAGS="$CFLAGS" - CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" - abi="elf" - $as_echo "#define JEMALLOC_HAS_ALLOCA_H 1" >>confdefs.h - - $as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED " >>confdefs.h - - $as_echo "#define JEMALLOC_THREADED_INIT " >>confdefs.h - - JEMALLOC_USABLE_SIZE_CONST="" - default_munmap="0" - ;; - *-*-netbsd*) - { $as_echo "$as_me:${as_lineno-$LINENO}: checking ABI" >&5 -$as_echo_n "checking ABI... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#ifdef __ELF__ -/* ELF */ -#else -#error aout -#endif - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - CFLAGS="$CFLAGS"; abi="elf" -else - abi="aout" -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $abi" >&5 -$as_echo "$abi" >&6; } - $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h - - ;; - *-*-solaris2*) - CFLAGS="$CFLAGS" - abi="elf" - RPATH='-Wl,-R,$(1)' - CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" - LIBS="$LIBS -lposix4 -lsocket -lnsl" - ;; - *-ibm-aix*) - if "$LG_SIZEOF_PTR" = "8"; then - LD_PRELOAD_VAR="LDR_PRELOAD64" - else - LD_PRELOAD_VAR="LDR_PRELOAD" - fi - abi="xcoff" - ;; - *-*-mingw*) - abi="pecoff" - force_tls="0" - RPATH="" - so="dll" - if test "x$je_cv_msvc" = "xyes" ; then - importlib="lib" - DSO_LDFLAGS="-LD" - EXTRA_LDFLAGS="-link -DEBUG" - CTARGET='-Fo$@' - LDTARGET='-Fe$@' - MKLIB='lib -nologo -out:$@' - CC_MM= - else - importlib="${so}" - DSO_LDFLAGS="-shared" - fi - a="lib" - libprefix="" - SOREV="${so}" - PIC_CFLAGS="" - ;; - *) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: Unsupported operating system: ${host}" >&5 -$as_echo "Unsupported operating system: ${host}" >&6; } - abi="elf" - ;; -esac -cat >>confdefs.h <<_ACEOF -#define JEMALLOC_USABLE_SIZE_CONST $JEMALLOC_USABLE_SIZE_CONST -_ACEOF - - - - - - - - - - - - - - - - - - - -if test "x$abi" != "xpecoff"; then - LIBS="$LIBS -lm" -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __attribute__ syntax is compilable" >&5 -$as_echo_n "checking whether __attribute__ syntax is compilable... " >&6; } -if ${je_cv_attribute+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -static __attribute__((unused)) void foo(void){} -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_attribute=yes -else - je_cv_attribute=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_attribute" >&5 -$as_echo "$je_cv_attribute" >&6; } - -if test "x${je_cv_attribute}" = "xyes" ; then - $as_echo "#define JEMALLOC_HAVE_ATTR " >>confdefs.h - - if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -fvisibility=hidden" >&5 -$as_echo_n "checking whether compiler supports -fvisibility=hidden... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-fvisibility=hidden" -else - CFLAGS="${CFLAGS} -fvisibility=hidden" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - fi -fi -SAVED_CFLAGS="${CFLAGS}" - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5 -$as_echo_n "checking whether compiler supports -Werror... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-Werror" -else - CFLAGS="${CFLAGS} -Werror" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether tls_model attribute is compilable" >&5 -$as_echo_n "checking whether tls_model attribute is compilable... " >&6; } -if ${je_cv_tls_model+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ -static __thread int - __attribute__((tls_model("initial-exec"))) foo; - foo = 0; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_tls_model=yes -else - je_cv_tls_model=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_tls_model" >&5 -$as_echo "$je_cv_tls_model" >&6; } - -CFLAGS="${SAVED_CFLAGS}" -if test "x${je_cv_tls_model}" = "xyes" ; then - $as_echo "#define JEMALLOC_TLS_MODEL __attribute__((tls_model(\"initial-exec\")))" >>confdefs.h - -else - $as_echo "#define JEMALLOC_TLS_MODEL " >>confdefs.h - -fi - - -# Check whether --with-rpath was given. -if test "${with_rpath+set}" = set; then : - withval=$with_rpath; if test "x$with_rpath" = "xno" ; then - RPATH_EXTRA= -else - RPATH_EXTRA="`echo $with_rpath | tr \":\" \" \"`" -fi -else - RPATH_EXTRA= - -fi - - - -# Check whether --enable-autogen was given. -if test "${enable_autogen+set}" = set; then : - enableval=$enable_autogen; if test "x$enable_autogen" = "xno" ; then - enable_autogen="0" -else - enable_autogen="1" -fi - -else - enable_autogen="0" - -fi - - - -# Find a good install program. We prefer a C program (faster), -# so one script is as good as another. But avoid the broken or -# incompatible versions: -# SysV /etc/install, /usr/sbin/install -# SunOS /usr/etc/install -# IRIX /sbin/install -# AIX /bin/install -# AmigaOS /C/install, which installs bootblocks on floppy discs -# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag -# AFS /usr/afsws/bin/install, which mishandles nonexistent args -# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" -# OS/2's system install, which has a completely different semantic -# ./install, which can be erroneously created by make from ./install.sh. -# Reject install programs that cannot install multiple files. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 -$as_echo_n "checking for a BSD-compatible install... " >&6; } -if test -z "$INSTALL"; then -if ${ac_cv_path_install+:} false; then : - $as_echo_n "(cached) " >&6 -else - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - # Account for people who put trailing slashes in PATH elements. -case $as_dir/ in #(( - ./ | .// | /[cC]/* | \ - /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ - ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ - /usr/ucb/* ) ;; - *) - # OSF1 and SCO ODT 3.0 have their own names for install. - # Don't use installbsd from OSF since it installs stuff as root - # by default. - for ac_prog in ginstall scoinst install; do - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then - if test $ac_prog = install && - grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then - # AIX install. It has an incompatible calling convention. - : - elif test $ac_prog = install && - grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then - # program-specific install script used by HP pwplus--don't use. - : - else - rm -rf conftest.one conftest.two conftest.dir - echo one > conftest.one - echo two > conftest.two - mkdir conftest.dir - if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && - test -s conftest.one && test -s conftest.two && - test -s conftest.dir/conftest.one && - test -s conftest.dir/conftest.two - then - ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" - break 3 - fi - fi - fi - done - done - ;; -esac - - done -IFS=$as_save_IFS - -rm -rf conftest.one conftest.two conftest.dir - -fi - if test "${ac_cv_path_install+set}" = set; then - INSTALL=$ac_cv_path_install - else - # As a last resort, use the slow shell script. Don't cache a - # value for INSTALL within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the value is a relative name. - INSTALL=$ac_install_sh - fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 -$as_echo "$INSTALL" >&6; } - -# Use test -z because SunOS4 sh mishandles braces in ${var-val}. -# It thinks the first close brace ends the variable substitution. -test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' - -test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' - -test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' - -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. -set dummy ${ac_tool_prefix}ranlib; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_RANLIB+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$RANLIB"; then - ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -RANLIB=$ac_cv_prog_RANLIB -if test -n "$RANLIB"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 -$as_echo "$RANLIB" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_RANLIB"; then - ac_ct_RANLIB=$RANLIB - # Extract the first word of "ranlib", so it can be a program name with args. -set dummy ranlib; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_RANLIB"; then - ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_RANLIB="ranlib" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB -if test -n "$ac_ct_RANLIB"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 -$as_echo "$ac_ct_RANLIB" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - if test "x$ac_ct_RANLIB" = x; then - RANLIB=":" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - RANLIB=$ac_ct_RANLIB - fi -else - RANLIB="$ac_cv_prog_RANLIB" -fi - -# Extract the first word of "ar", so it can be a program name with args. -set dummy ar; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_path_AR+:} false; then : - $as_echo_n "(cached) " >&6 -else - case $AR in - [\\/]* | ?:[\\/]*) - ac_cv_path_AR="$AR" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_path_AR="$as_dir/$ac_word$ac_exec_ext" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - - test -z "$ac_cv_path_AR" && ac_cv_path_AR="false" - ;; -esac -fi -AR=$ac_cv_path_AR -if test -n "$AR"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 -$as_echo "$AR" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -# Extract the first word of "ld", so it can be a program name with args. -set dummy ld; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_path_LD+:} false; then : - $as_echo_n "(cached) " >&6 -else - case $LD in - [\\/]* | ?:[\\/]*) - ac_cv_path_LD="$LD" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_path_LD="$as_dir/$ac_word$ac_exec_ext" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - - test -z "$ac_cv_path_LD" && ac_cv_path_LD="false" - ;; -esac -fi -LD=$ac_cv_path_LD -if test -n "$LD"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LD" >&5 -$as_echo "$LD" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - -# Extract the first word of "autoconf", so it can be a program name with args. -set dummy autoconf; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_path_AUTOCONF+:} false; then : - $as_echo_n "(cached) " >&6 -else - case $AUTOCONF in - [\\/]* | ?:[\\/]*) - ac_cv_path_AUTOCONF="$AUTOCONF" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_path_AUTOCONF="$as_dir/$ac_word$ac_exec_ext" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - - test -z "$ac_cv_path_AUTOCONF" && ac_cv_path_AUTOCONF="false" - ;; -esac -fi -AUTOCONF=$ac_cv_path_AUTOCONF -if test -n "$AUTOCONF"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AUTOCONF" >&5 -$as_echo "$AUTOCONF" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - -public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" - -ac_fn_c_check_func "$LINENO" "memalign" "ac_cv_func_memalign" -if test "x$ac_cv_func_memalign" = xyes; then : - $as_echo "#define JEMALLOC_OVERRIDE_MEMALIGN " >>confdefs.h - - public_syms="${public_syms} memalign" -fi - -ac_fn_c_check_func "$LINENO" "valloc" "ac_cv_func_valloc" -if test "x$ac_cv_func_valloc" = xyes; then : - $as_echo "#define JEMALLOC_OVERRIDE_VALLOC " >>confdefs.h - - public_syms="${public_syms} valloc" -fi - - -# Check whether --enable-experimental was given. -if test "${enable_experimental+set}" = set; then : - enableval=$enable_experimental; if test "x$enable_experimental" = "xno" ; then - enable_experimental="0" -else - enable_experimental="1" -fi - -else - enable_experimental="1" - -fi - -if test "x$enable_experimental" = "x1" ; then - $as_echo "#define JEMALLOC_EXPERIMENTAL " >>confdefs.h - - public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm" -fi - - - -# Check whether --with-mangling was given. -if test "${with_mangling+set}" = set; then : - withval=$with_mangling; mangling_map="$with_mangling" -else - mangling_map="" -fi - -for nm in `echo ${mangling_map} |tr ',' ' '` ; do - k="`echo ${nm} |tr ':' ' ' |awk '{print $1}'`" - n="je_${k}" - m=`echo ${nm} |tr ':' ' ' |awk '{print $2}'` - cat >>confdefs.h <<_ACEOF -#define ${n} ${m} -_ACEOF - - public_syms=`for sym in ${public_syms}; do echo "${sym}"; done |grep -v "^${k}\$" |tr '\n' ' '` -done - - -# Check whether --with-jemalloc_prefix was given. -if test "${with_jemalloc_prefix+set}" = set; then : - withval=$with_jemalloc_prefix; JEMALLOC_PREFIX="$with_jemalloc_prefix" -else - if test "x$abi" != "xmacho" -a "x$abi" != "xpecoff"; then - JEMALLOC_PREFIX="" -else - JEMALLOC_PREFIX="je_" -fi - -fi - -if test "x$JEMALLOC_PREFIX" != "x" ; then - JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"` - cat >>confdefs.h <<_ACEOF -#define JEMALLOC_PREFIX "$JEMALLOC_PREFIX" -_ACEOF - - cat >>confdefs.h <<_ACEOF -#define JEMALLOC_CPREFIX "$JEMALLOC_CPREFIX" -_ACEOF - -fi -for stem in ${public_syms}; do - n="je_${stem}" - m="${JEMALLOC_PREFIX}${stem}" - cat >>confdefs.h <<_ACEOF -#define ${n} ${m} -_ACEOF - -done - - -# Check whether --with-export was given. -if test "${with_export+set}" = set; then : - withval=$with_export; if test "x$with_export" = "xno"; then - $as_echo "#define JEMALLOC_EXPORT /**/" >>confdefs.h - -fi - -fi - - - -# Check whether --with-private_namespace was given. -if test "${with_private_namespace+set}" = set; then : - withval=$with_private_namespace; JEMALLOC_PRIVATE_NAMESPACE="$with_private_namespace" -else - JEMALLOC_PRIVATE_NAMESPACE="" - -fi - -cat >>confdefs.h <<_ACEOF -#define JEMALLOC_PRIVATE_NAMESPACE "$JEMALLOC_PRIVATE_NAMESPACE" -_ACEOF - -if test "x$JEMALLOC_PRIVATE_NAMESPACE" != "x" ; then - cat >>confdefs.h <<_ACEOF -#define JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix) ${JEMALLOC_PRIVATE_NAMESPACE}##string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix -_ACEOF - -else - cat >>confdefs.h <<_ACEOF -#define JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix) string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix -_ACEOF - -fi - - -# Check whether --with-install_suffix was given. -if test "${with_install_suffix+set}" = set; then : - withval=$with_install_suffix; INSTALL_SUFFIX="$with_install_suffix" -else - INSTALL_SUFFIX= - -fi - -install_suffix="$INSTALL_SUFFIX" - - -cfgoutputs_in="${srcroot}Makefile.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/jemalloc_test.h.in" - -cfgoutputs_out="Makefile" -cfgoutputs_out="${cfgoutputs_out} doc/html.xsl" -cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl" -cfgoutputs_out="${cfgoutputs_out} doc/jemalloc${install_suffix}.xml" -cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc${install_suffix}.h" -cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h" -cfgoutputs_out="${cfgoutputs_out} test/jemalloc_test.h" - -cfgoutputs_tup="Makefile" -cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in" -cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in" -cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc${install_suffix}.xml:doc/jemalloc.xml.in" -cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc${install_suffix}.h:include/jemalloc/jemalloc.h.in" -cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" -cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in" - -cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh" - -cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h" -cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h" - -cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in" - -# Check whether --enable-cc-silence was given. -if test "${enable_cc_silence+set}" = set; then : - enableval=$enable_cc_silence; if test "x$enable_cc_silence" = "xno" ; then - enable_cc_silence="0" -else - enable_cc_silence="1" -fi - -else - enable_cc_silence="0" - -fi - -if test "x$enable_cc_silence" = "x1" ; then - $as_echo "#define JEMALLOC_CC_SILENCE " >>confdefs.h - -fi - -# Check whether --enable-debug was given. -if test "${enable_debug+set}" = set; then : - enableval=$enable_debug; if test "x$enable_debug" = "xno" ; then - enable_debug="0" -else - enable_debug="1" -fi - -else - enable_debug="0" - -fi - -if test "x$enable_debug" = "x1" ; then - $as_echo "#define JEMALLOC_DEBUG " >>confdefs.h - - enable_ivsalloc="1" -fi - - -# Check whether --enable-ivsalloc was given. -if test "${enable_ivsalloc+set}" = set; then : - enableval=$enable_ivsalloc; if test "x$enable_ivsalloc" = "xno" ; then - enable_ivsalloc="0" -else - enable_ivsalloc="1" -fi - -else - enable_ivsalloc="0" - -fi - -if test "x$enable_ivsalloc" = "x1" ; then - $as_echo "#define JEMALLOC_IVSALLOC " >>confdefs.h - -fi - -if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then - optimize="no" - echo "$EXTRA_CFLAGS" | grep "\-O" >/dev/null || optimize="yes" - if test "x${optimize}" = "xyes" ; then - if test "x$GCC" = "xyes" ; then - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O3" >&5 -$as_echo_n "checking whether compiler supports -O3... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-O3" -else - CFLAGS="${CFLAGS} -O3" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -funroll-loops" >&5 -$as_echo_n "checking whether compiler supports -funroll-loops... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-funroll-loops" -else - CFLAGS="${CFLAGS} -funroll-loops" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - elif test "x$je_cv_msvc" = "xyes" ; then - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O2" >&5 -$as_echo_n "checking whether compiler supports -O2... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-O2" -else - CFLAGS="${CFLAGS} -O2" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - else - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O" >&5 -$as_echo_n "checking whether compiler supports -O... " >&6; } -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="-O" -else - CFLAGS="${CFLAGS} -O" -fi -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - -int -main () -{ - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - CFLAGS="${TCFLAGS}" - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - fi - fi -fi - -# Check whether --enable-stats was given. -if test "${enable_stats+set}" = set; then : - enableval=$enable_stats; if test "x$enable_stats" = "xno" ; then - enable_stats="0" -else - enable_stats="1" -fi - -else - enable_stats="1" - -fi - -if test "x$enable_stats" = "x1" ; then - $as_echo "#define JEMALLOC_STATS " >>confdefs.h - -fi - - -# Check whether --enable-prof was given. -if test "${enable_prof+set}" = set; then : - enableval=$enable_prof; if test "x$enable_prof" = "xno" ; then - enable_prof="0" -else - enable_prof="1" -fi - -else - enable_prof="0" - -fi - -if test "x$enable_prof" = "x1" ; then - backtrace_method="" -else - backtrace_method="N/A" -fi - -# Check whether --enable-prof-libunwind was given. -if test "${enable_prof_libunwind+set}" = set; then : - enableval=$enable_prof_libunwind; if test "x$enable_prof_libunwind" = "xno" ; then - enable_prof_libunwind="0" -else - enable_prof_libunwind="1" -fi - -else - enable_prof_libunwind="0" - -fi - - -# Check whether --with-static_libunwind was given. -if test "${with_static_libunwind+set}" = set; then : - withval=$with_static_libunwind; if test "x$with_static_libunwind" = "xno" ; then - LUNWIND="-lunwind" -else - if test ! -f "$with_static_libunwind" ; then - as_fn_error $? "Static libunwind not found: $with_static_libunwind" "$LINENO" 5 - fi - LUNWIND="$with_static_libunwind" -fi -else - LUNWIND="-lunwind" - -fi - -if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then - for ac_header in libunwind.h -do : - ac_fn_c_check_header_mongrel "$LINENO" "libunwind.h" "ac_cv_header_libunwind_h" "$ac_includes_default" -if test "x$ac_cv_header_libunwind_h" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBUNWIND_H 1 -_ACEOF - -else - enable_prof_libunwind="0" -fi - -done - - if test "x$LUNWIND" = "x-lunwind" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for backtrace in -lunwind" >&5 -$as_echo_n "checking for backtrace in -lunwind... " >&6; } -if ${ac_cv_lib_unwind_backtrace+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lunwind $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char backtrace (); -int -main () -{ -return backtrace (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_unwind_backtrace=yes -else - ac_cv_lib_unwind_backtrace=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_unwind_backtrace" >&5 -$as_echo "$ac_cv_lib_unwind_backtrace" >&6; } -if test "x$ac_cv_lib_unwind_backtrace" = xyes; then : - LIBS="$LIBS $LUNWIND" -else - enable_prof_libunwind="0" -fi - - else - LIBS="$LIBS $LUNWIND" - fi - if test "x${enable_prof_libunwind}" = "x1" ; then - backtrace_method="libunwind" - $as_echo "#define JEMALLOC_PROF_LIBUNWIND " >>confdefs.h - - fi -fi - -# Check whether --enable-prof-libgcc was given. -if test "${enable_prof_libgcc+set}" = set; then : - enableval=$enable_prof_libgcc; if test "x$enable_prof_libgcc" = "xno" ; then - enable_prof_libgcc="0" -else - enable_prof_libgcc="1" -fi - -else - enable_prof_libgcc="1" - -fi - -if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \ - -a "x$GCC" = "xyes" ; then - for ac_header in unwind.h -do : - ac_fn_c_check_header_mongrel "$LINENO" "unwind.h" "ac_cv_header_unwind_h" "$ac_includes_default" -if test "x$ac_cv_header_unwind_h" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_UNWIND_H 1 -_ACEOF - -else - enable_prof_libgcc="0" -fi - -done - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _Unwind_Backtrace in -lgcc" >&5 -$as_echo_n "checking for _Unwind_Backtrace in -lgcc... " >&6; } -if ${ac_cv_lib_gcc__Unwind_Backtrace+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lgcc $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char _Unwind_Backtrace (); -int -main () -{ -return _Unwind_Backtrace (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_gcc__Unwind_Backtrace=yes -else - ac_cv_lib_gcc__Unwind_Backtrace=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gcc__Unwind_Backtrace" >&5 -$as_echo "$ac_cv_lib_gcc__Unwind_Backtrace" >&6; } -if test "x$ac_cv_lib_gcc__Unwind_Backtrace" = xyes; then : - LIBS="$LIBS -lgcc" -else - enable_prof_libgcc="0" -fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking libgcc-based backtracing reliability on ${host_cpu}" >&5 -$as_echo_n "checking libgcc-based backtracing reliability on ${host_cpu}... " >&6; } - case "${host_cpu}" in - i[3456]86) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unreliable" >&5 -$as_echo "unreliable" >&6; } - enable_prof_libgcc="0"; - ;; - x86_64) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: reliable" >&5 -$as_echo "reliable" >&6; } - ;; - *) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unreliable" >&5 -$as_echo "unreliable" >&6; } - enable_prof_libgcc="0"; - ;; - esac - if test "x${enable_prof_libgcc}" = "x1" ; then - backtrace_method="libgcc" - $as_echo "#define JEMALLOC_PROF_LIBGCC " >>confdefs.h - - fi -else - enable_prof_libgcc="0" -fi - -# Check whether --enable-prof-gcc was given. -if test "${enable_prof_gcc+set}" = set; then : - enableval=$enable_prof_gcc; if test "x$enable_prof_gcc" = "xno" ; then - enable_prof_gcc="0" -else - enable_prof_gcc="1" -fi - -else - enable_prof_gcc="1" - -fi - -if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \ - -a "x$GCC" = "xyes" ; then - backtrace_method="gcc intrinsics" - $as_echo "#define JEMALLOC_PROF_GCC " >>confdefs.h - -else - enable_prof_gcc="0" -fi - -if test "x$backtrace_method" = "x" ; then - backtrace_method="none (disabling profiling)" - enable_prof="0" -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking configured backtracing method" >&5 -$as_echo_n "checking configured backtracing method... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $backtrace_method" >&5 -$as_echo "$backtrace_method" >&6; } -if test "x$enable_prof" = "x1" ; then - if test "x${force_tls}" = "x0" ; then - as_fn_error $? "Heap profiling requires TLS" "$LINENO" 5; - fi - force_tls="1" - $as_echo "#define JEMALLOC_PROF " >>confdefs.h - -fi - - -# Check whether --enable-tcache was given. -if test "${enable_tcache+set}" = set; then : - enableval=$enable_tcache; if test "x$enable_tcache" = "xno" ; then - enable_tcache="0" -else - enable_tcache="1" -fi - -else - enable_tcache="1" - -fi - -if test "x$enable_tcache" = "x1" ; then - $as_echo "#define JEMALLOC_TCACHE " >>confdefs.h - -fi - - -# Check whether --enable-mremap was given. -if test "${enable_mremap+set}" = set; then : - enableval=$enable_mremap; if test "x$enable_mremap" = "xno" ; then - enable_mremap="0" -else - enable_mremap="1" -fi - -else - enable_mremap="0" - -fi - -if test "x$enable_mremap" = "x1" ; then - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mremap(...MREMAP_FIXED...) is compilable" >&5 -$as_echo_n "checking whether mremap(...MREMAP_FIXED...) is compilable... " >&6; } -if ${je_cv_mremap_fixed+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#define _GNU_SOURCE -#include - -int -main () -{ - -void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_mremap_fixed=yes -else - je_cv_mremap_fixed=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_mremap_fixed" >&5 -$as_echo "$je_cv_mremap_fixed" >&6; } - - if test "x${je_cv_mremap_fixed}" = "xno" ; then - enable_mremap="0" - fi -fi -if test "x$enable_mremap" = "x1" ; then - $as_echo "#define JEMALLOC_MREMAP " >>confdefs.h - -fi - - -# Check whether --enable-munmap was given. -if test "${enable_munmap+set}" = set; then : - enableval=$enable_munmap; if test "x$enable_munmap" = "xno" ; then - enable_munmap="0" -else - enable_munmap="1" -fi - -else - enable_munmap="${default_munmap}" - -fi - -if test "x$enable_munmap" = "x1" ; then - $as_echo "#define JEMALLOC_MUNMAP " >>confdefs.h - -fi - - -# Check whether --enable-dss was given. -if test "${enable_dss+set}" = set; then : - enableval=$enable_dss; if test "x$enable_dss" = "xno" ; then - enable_dss="0" -else - enable_dss="1" -fi - -else - enable_dss="0" - -fi - -ac_fn_c_check_func "$LINENO" "sbrk" "ac_cv_func_sbrk" -if test "x$ac_cv_func_sbrk" = xyes; then : - have_sbrk="1" -else - have_sbrk="0" -fi - -if test "x$have_sbrk" = "x1" ; then - $as_echo "#define JEMALLOC_HAVE_SBRK " >>confdefs.h - -else - enable_dss="0" -fi - -if test "x$enable_dss" = "x1" ; then - $as_echo "#define JEMALLOC_DSS " >>confdefs.h - -fi - - -# Check whether --enable-fill was given. -if test "${enable_fill+set}" = set; then : - enableval=$enable_fill; if test "x$enable_fill" = "xno" ; then - enable_fill="0" -else - enable_fill="1" -fi - -else - enable_fill="1" - -fi - -if test "x$enable_fill" = "x1" ; then - $as_echo "#define JEMALLOC_FILL " >>confdefs.h - -fi - - -# Check whether --enable-utrace was given. -if test "${enable_utrace+set}" = set; then : - enableval=$enable_utrace; if test "x$enable_utrace" = "xno" ; then - enable_utrace="0" -else - enable_utrace="1" -fi - -else - enable_utrace="0" - -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether utrace(2) is compilable" >&5 -$as_echo_n "checking whether utrace(2) is compilable... " >&6; } -if ${je_cv_utrace+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include -#include -#include -#include - -int -main () -{ - - utrace((void *)0, 0); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_utrace=yes -else - je_cv_utrace=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_utrace" >&5 -$as_echo "$je_cv_utrace" >&6; } - -if test "x${je_cv_utrace}" = "xno" ; then - enable_utrace="0" -fi -if test "x$enable_utrace" = "x1" ; then - $as_echo "#define JEMALLOC_UTRACE " >>confdefs.h - -fi - - -# Check whether --enable-valgrind was given. -if test "${enable_valgrind+set}" = set; then : - enableval=$enable_valgrind; if test "x$enable_valgrind" = "xno" ; then - enable_valgrind="0" -else - enable_valgrind="1" -fi - -else - enable_valgrind="1" - -fi - -if test "x$enable_valgrind" = "x1" ; then - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether valgrind is compilable" >&5 -$as_echo_n "checking whether valgrind is compilable... " >&6; } -if ${je_cv_valgrind+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include - -#if !defined(VALGRIND_RESIZEINPLACE_BLOCK) -# error "Incompatible Valgrind version" -#endif - -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_valgrind=yes -else - je_cv_valgrind=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_valgrind" >&5 -$as_echo "$je_cv_valgrind" >&6; } - - if test "x${je_cv_valgrind}" = "xno" ; then - enable_valgrind="0" - fi - if test "x$enable_valgrind" = "x1" ; then - $as_echo "#define JEMALLOC_VALGRIND " >>confdefs.h - - fi -fi - - -# Check whether --enable-xmalloc was given. -if test "${enable_xmalloc+set}" = set; then : - enableval=$enable_xmalloc; if test "x$enable_xmalloc" = "xno" ; then - enable_xmalloc="0" -else - enable_xmalloc="1" -fi - -else - enable_xmalloc="0" - -fi - -if test "x$enable_xmalloc" = "x1" ; then - $as_echo "#define JEMALLOC_XMALLOC " >>confdefs.h - -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking STATIC_PAGE_SHIFT" >&5 -$as_echo_n "checking STATIC_PAGE_SHIFT... " >&6; } -if ${je_cv_static_page_shift+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test "$cross_compiling" = yes; then : - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot run test program while cross compiling -See \`config.log' for more details" "$LINENO" 5; } -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include - -int -main () -{ - - int result; - FILE *f; - -#ifdef _WIN32 - SYSTEM_INFO si; - GetSystemInfo(&si); - result = si.dwPageSize; -#else - result = sysconf(_SC_PAGESIZE); -#endif - if (result == -1) { - return 1; - } - result = ffsl(result) - 1; - - f = fopen("conftest.out", "w"); - if (f == NULL) { - return 1; - } - fprintf(f, "%d\n", result); - fclose(f); - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_run "$LINENO"; then : - je_cv_static_page_shift=`cat conftest.out` -else - je_cv_static_page_shift=undefined -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext -fi - -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_static_page_shift" >&5 -$as_echo "$je_cv_static_page_shift" >&6; } - -if test "x$je_cv_static_page_shift" != "xundefined"; then - cat >>confdefs.h <<_ACEOF -#define STATIC_PAGE_SHIFT $je_cv_static_page_shift -_ACEOF - -else - as_fn_error $? "cannot determine value for STATIC_PAGE_SHIFT" "$LINENO" 5 -fi - - -if test -d "${srcroot}.git" ; then - git describe --long --abbrev=40 > ${srcroot}VERSION -fi -jemalloc_version=`cat ${srcroot}VERSION` -jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $1}'` -jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $2}'` -jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $3}'` -jemalloc_version_nrev=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $4}'` -jemalloc_version_gid=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $5}'` - - - - - - - - -if test "x$abi" != "xpecoff" ; then - for ac_header in pthread.h -do : - ac_fn_c_check_header_mongrel "$LINENO" "pthread.h" "ac_cv_header_pthread_h" "$ac_includes_default" -if test "x$ac_cv_header_pthread_h" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_PTHREAD_H 1 -_ACEOF - -else - as_fn_error $? "pthread.h is missing" "$LINENO" 5 -fi - -done - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 -$as_echo_n "checking for pthread_create in -lpthread... " >&6; } -if ${ac_cv_lib_pthread_pthread_create+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lpthread $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char pthread_create (); -int -main () -{ -return pthread_create (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_pthread_pthread_create=yes -else - ac_cv_lib_pthread_pthread_create=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 -$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; } -if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then : - LIBS="$LIBS -lpthread" -else - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing pthread_create" >&5 -$as_echo_n "checking for library containing pthread_create... " >&6; } -if ${ac_cv_search_pthread_create+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_func_search_save_LIBS=$LIBS -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char pthread_create (); -int -main () -{ -return pthread_create (); - ; - return 0; -} -_ACEOF -for ac_lib in '' ; do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_c_try_link "$LINENO"; then : - ac_cv_search_pthread_create=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext - if ${ac_cv_search_pthread_create+:} false; then : - break -fi -done -if ${ac_cv_search_pthread_create+:} false; then : - -else - ac_cv_search_pthread_create=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_pthread_create" >&5 -$as_echo "$ac_cv_search_pthread_create" >&6; } -ac_res=$ac_cv_search_pthread_create -if test "$ac_res" != no; then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - -else - as_fn_error $? "libpthread is missing" "$LINENO" 5 -fi - -fi - -fi - -CPPFLAGS="$CPPFLAGS -D_REENTRANT" - -ac_fn_c_check_func "$LINENO" "_malloc_thread_cleanup" "ac_cv_func__malloc_thread_cleanup" -if test "x$ac_cv_func__malloc_thread_cleanup" = xyes; then : - have__malloc_thread_cleanup="1" -else - have__malloc_thread_cleanup="0" - -fi - -if test "x$have__malloc_thread_cleanup" = "x1" ; then - $as_echo "#define JEMALLOC_MALLOC_THREAD_CLEANUP " >>confdefs.h - - force_tls="1" -fi - -ac_fn_c_check_func "$LINENO" "_pthread_mutex_init_calloc_cb" "ac_cv_func__pthread_mutex_init_calloc_cb" -if test "x$ac_cv_func__pthread_mutex_init_calloc_cb" = xyes; then : - have__pthread_mutex_init_calloc_cb="1" -else - have__pthread_mutex_init_calloc_cb="0" - -fi - -if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then - $as_echo "#define JEMALLOC_MUTEX_INIT_CB 1" >>confdefs.h - -fi - -# Check whether --enable-lazy_lock was given. -if test "${enable_lazy_lock+set}" = set; then : - enableval=$enable_lazy_lock; if test "x$enable_lazy_lock" = "xno" ; then - enable_lazy_lock="0" -else - enable_lazy_lock="1" -fi - -else - enable_lazy_lock="0" - -fi - -if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5 -$as_echo "Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&6; } - enable_lazy_lock="1" -fi -if test "x$enable_lazy_lock" = "x1" ; then - if test "x$abi" != "xpecoff" ; then - for ac_header in dlfcn.h -do : - ac_fn_c_check_header_mongrel "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default" -if test "x$ac_cv_header_dlfcn_h" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_DLFCN_H 1 -_ACEOF - -else - as_fn_error $? "dlfcn.h is missing" "$LINENO" 5 -fi - -done - - ac_fn_c_check_func "$LINENO" "dlsym" "ac_cv_func_dlsym" -if test "x$ac_cv_func_dlsym" = xyes; then : - -else - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl" >&5 -$as_echo_n "checking for dlsym in -ldl... " >&6; } -if ${ac_cv_lib_dl_dlsym+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-ldl $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char dlsym (); -int -main () -{ -return dlsym (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_dl_dlsym=yes -else - ac_cv_lib_dl_dlsym=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlsym" >&5 -$as_echo "$ac_cv_lib_dl_dlsym" >&6; } -if test "x$ac_cv_lib_dl_dlsym" = xyes; then : - LIBS="$LIBS -ldl" -else - as_fn_error $? "libdl is missing" "$LINENO" 5 -fi - - -fi - - fi - $as_echo "#define JEMALLOC_LAZY_LOCK " >>confdefs.h - -fi - - -# Check whether --enable-tls was given. -if test "${enable_tls+set}" = set; then : - enableval=$enable_tls; if test "x$enable_tls" = "xno" ; then - enable_tls="0" -else - enable_tls="1" -fi - -else - enable_tls="1" - -fi - -if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing TLS to avoid allocator/threading bootstrap issues" >&5 -$as_echo "Forcing TLS to avoid allocator/threading bootstrap issues" >&6; } - enable_tls="1" -fi -if test "x${enable_tls}" = "x1" -a "x${force_tls}" = "x0" ; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing no TLS to avoid allocator/threading bootstrap issues" >&5 -$as_echo "Forcing no TLS to avoid allocator/threading bootstrap issues" >&6; } - enable_tls="0" -fi -if test "x${enable_tls}" = "x1" ; then -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for TLS" >&5 -$as_echo_n "checking for TLS... " >&6; } -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - __thread int x; - -int -main () -{ - - x = 42; - - return 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - enable_tls="0" -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi - -if test "x${enable_tls}" = "x1" ; then - cat >>confdefs.h <<_ACEOF -#define JEMALLOC_TLS -_ACEOF - -elif test "x${force_tls}" = "x1" ; then - as_fn_error $? "Failed to configure TLS, which is mandatory for correct function" "$LINENO" 5 -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using ffsl is compilable" >&5 -$as_echo_n "checking whether a program using ffsl is compilable... " >&6; } -if ${je_cv_function_ffsl+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include -#include - -int -main () -{ - - { - int rv = ffsl(0x08); - printf("%d\n", rv); - } - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_function_ffsl=yes -else - je_cv_function_ffsl=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_function_ffsl" >&5 -$as_echo "$je_cv_function_ffsl" >&6; } - -if test "x${je_cv_function_ffsl}" != "xyes" ; then - as_fn_error $? "Cannot build without ffsl(3)" "$LINENO" 5 -fi - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether atomic(9) is compilable" >&5 -$as_echo_n "checking whether atomic(9) is compilable... " >&6; } -if ${je_cv_atomic9+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include -#include - -int -main () -{ - - { - uint32_t x32 = 0; - volatile uint32_t *x32p = &x32; - atomic_fetchadd_32(x32p, 1); - } - { - unsigned long xlong = 0; - volatile unsigned long *xlongp = &xlong; - atomic_fetchadd_long(xlongp, 1); - } - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_atomic9=yes -else - je_cv_atomic9=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_atomic9" >&5 -$as_echo "$je_cv_atomic9" >&6; } - -if test "x${je_cv_atomic9}" = "xyes" ; then - $as_echo "#define JEMALLOC_ATOMIC9 1" >>confdefs.h - -fi - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSAtomic*() is compilable" >&5 -$as_echo_n "checking whether Darwin OSAtomic*() is compilable... " >&6; } -if ${je_cv_osatomic+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include - -int -main () -{ - - { - int32_t x32 = 0; - volatile int32_t *x32p = &x32; - OSAtomicAdd32(1, x32p); - } - { - int64_t x64 = 0; - volatile int64_t *x64p = &x64; - OSAtomicAdd64(1, x64p); - } - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_osatomic=yes -else - je_cv_osatomic=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_osatomic" >&5 -$as_echo "$je_cv_osatomic" >&6; } - -if test "x${je_cv_osatomic}" = "xyes" ; then - $as_echo "#define JEMALLOC_OSATOMIC " >>confdefs.h - -fi - - - - -if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to force 32-bit __sync_{add,sub}_and_fetch()" >&5 -$as_echo_n "checking whether to force 32-bit __sync_{add,sub}_and_fetch()... " >&6; } -if ${je_cv_sync_compare_and_swap_4+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #include - -int -main () -{ - - #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 - { - uint32_t x32 = 0; - __sync_add_and_fetch(&x32, 42); - __sync_sub_and_fetch(&x32, 1); - } - #else - #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 is defined, no need to force - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_sync_compare_and_swap_4=yes -else - je_cv_sync_compare_and_swap_4=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_sync_compare_and_swap_4" >&5 -$as_echo "$je_cv_sync_compare_and_swap_4" >&6; } - - if test "x${je_cv_sync_compare_and_swap_4}" = "xyes" ; then - $as_echo "#define JE_FORCE_SYNC_COMPARE_AND_SWAP_4 " >>confdefs.h - - fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to force 64-bit __sync_{add,sub}_and_fetch()" >&5 -$as_echo_n "checking whether to force 64-bit __sync_{add,sub}_and_fetch()... " >&6; } -if ${je_cv_sync_compare_and_swap_8+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #include - -int -main () -{ - - #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 - { - uint64_t x64 = 0; - __sync_add_and_fetch(&x64, 42); - __sync_sub_and_fetch(&x64, 1); - } - #else - #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 is defined, no need to force - #endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_sync_compare_and_swap_8=yes -else - je_cv_sync_compare_and_swap_8=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_sync_compare_and_swap_8" >&5 -$as_echo "$je_cv_sync_compare_and_swap_8" >&6; } - - if test "x${je_cv_sync_compare_and_swap_8}" = "xyes" ; then - $as_echo "#define JE_FORCE_SYNC_COMPARE_AND_SWAP_8 " >>confdefs.h - - fi - -fi - - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSSpin*() is compilable" >&5 -$as_echo_n "checking whether Darwin OSSpin*() is compilable... " >&6; } -if ${je_cv_osspin+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#include - -int -main () -{ - - OSSpinLock lock = 0; - OSSpinLockLock(&lock); - OSSpinLockUnlock(&lock); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - je_cv_osspin=yes -else - je_cv_osspin=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_osspin" >&5 -$as_echo "$je_cv_osspin" >&6; } - -if test "x${je_cv_osspin}" = "xyes" ; then - $as_echo "#define JEMALLOC_OSSPIN " >>confdefs.h - -fi - - -# Check whether --enable-zone-allocator was given. -if test "${enable_zone_allocator+set}" = set; then : - enableval=$enable_zone_allocator; if test "x$enable_zone_allocator" = "xno" ; then - enable_zone_allocator="0" -else - enable_zone_allocator="1" -fi - -else - if test "x${abi}" = "xmacho"; then - enable_zone_allocator="1" -fi - - -fi - - - -if test "x${enable_zone_allocator}" = "x1" ; then - if test "x${abi}" != "xmacho"; then - as_fn_error $? "--enable-zone-allocator is only supported on Darwin" "$LINENO" 5 - fi - $as_echo "#define JEMALLOC_IVSALLOC " >>confdefs.h - - $as_echo "#define JEMALLOC_ZONE " >>confdefs.h - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking malloc zone version" >&5 -$as_echo_n "checking malloc zone version... " >&6; } - - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -static foo[sizeof(malloc_zone_t) == sizeof(void *) * 14 ? 1 : -1] - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - JEMALLOC_ZONE_VERSION=3 -else - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -static foo[sizeof(malloc_zone_t) == sizeof(void *) * 15 ? 1 : -1] - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - JEMALLOC_ZONE_VERSION=5 -else - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -static foo[sizeof(malloc_zone_t) == sizeof(void *) * 16 ? 1 : -1] - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -static foo[sizeof(malloc_introspection_t) == sizeof(void *) * 9 ? 1 : -1] - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - JEMALLOC_ZONE_VERSION=6 -else - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -static foo[sizeof(malloc_introspection_t) == sizeof(void *) * 13 ? 1 : -1] - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - JEMALLOC_ZONE_VERSION=7 -else - JEMALLOC_ZONE_VERSION= - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -else - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -static foo[sizeof(malloc_zone_t) == sizeof(void *) * 17 ? 1 : -1] - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - JEMALLOC_ZONE_VERSION=8 -else - - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -static foo[sizeof(malloc_zone_t) > sizeof(void *) * 17 ? 1 : -1] - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - JEMALLOC_ZONE_VERSION=9 -else - JEMALLOC_ZONE_VERSION= - -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - if test "x${JEMALLOC_ZONE_VERSION}" = "x"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -$as_echo "unsupported" >&6; } - as_fn_error $? "Unsupported malloc zone version" "$LINENO" 5 - fi - if test "${JEMALLOC_ZONE_VERSION}" = 9; then - JEMALLOC_ZONE_VERSION=8 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: > 8" >&5 -$as_echo "> 8" >&6; } - else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JEMALLOC_ZONE_VERSION" >&5 -$as_echo "$JEMALLOC_ZONE_VERSION" >&6; } - fi - cat >>confdefs.h <<_ACEOF -#define JEMALLOC_ZONE_VERSION $JEMALLOC_ZONE_VERSION -_ACEOF - -fi - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for stdbool.h that conforms to C99" >&5 -$as_echo_n "checking for stdbool.h that conforms to C99... " >&6; } -if ${ac_cv_header_stdbool_h+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include -#ifndef bool - "error: bool is not defined" -#endif -#ifndef false - "error: false is not defined" -#endif -#if false - "error: false is not 0" -#endif -#ifndef true - "error: true is not defined" -#endif -#if true != 1 - "error: true is not 1" -#endif -#ifndef __bool_true_false_are_defined - "error: __bool_true_false_are_defined is not defined" -#endif - - struct s { _Bool s: 1; _Bool t; } s; - - char a[true == 1 ? 1 : -1]; - char b[false == 0 ? 1 : -1]; - char c[__bool_true_false_are_defined == 1 ? 1 : -1]; - char d[(bool) 0.5 == true ? 1 : -1]; - /* See body of main program for 'e'. */ - char f[(_Bool) 0.0 == false ? 1 : -1]; - char g[true]; - char h[sizeof (_Bool)]; - char i[sizeof s.t]; - enum { j = false, k = true, l = false * true, m = true * 256 }; - /* The following fails for - HP aC++/ANSI C B3910B A.05.55 [Dec 04 2003]. */ - _Bool n[m]; - char o[sizeof n == m * sizeof n[0] ? 1 : -1]; - char p[-1 - (_Bool) 0 < 0 && -1 - (bool) 0 < 0 ? 1 : -1]; - /* Catch a bug in an HP-UX C compiler. See - http://gcc.gnu.org/ml/gcc-patches/2003-12/msg02303.html - http://lists.gnu.org/archive/html/bug-coreutils/2005-11/msg00161.html - */ - _Bool q = true; - _Bool *pq = &q; - -int -main () -{ - - bool e = &s; - *pq |= q; - *pq |= ! q; - /* Refer to every declared value, to avoid compiler optimizations. */ - return (!a + !b + !c + !d + !e + !f + !g + !h + !i + !!j + !k + !!l - + !m + !n + !o + !p + !q + !pq); - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_header_stdbool_h=yes -else - ac_cv_header_stdbool_h=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdbool_h" >&5 -$as_echo "$ac_cv_header_stdbool_h" >&6; } -ac_fn_c_check_type "$LINENO" "_Bool" "ac_cv_type__Bool" "$ac_includes_default" -if test "x$ac_cv_type__Bool" = xyes; then : - -cat >>confdefs.h <<_ACEOF -#define HAVE__BOOL 1 -_ACEOF - - -fi - -if test $ac_cv_header_stdbool_h = yes; then - -$as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h - -fi - - -ac_config_commands="$ac_config_commands include/jemalloc/internal/size_classes.h" - - - - -ac_config_headers="$ac_config_headers $cfghdrs_tup" - - -ac_config_files="$ac_config_files $cfgoutputs_tup config.stamp bin/jemalloc.sh" - - - -cat >confcache <<\_ACEOF -# This file is a shell script that caches the results of configure -# tests run on this system so they can be shared between configure -# scripts and configure runs, see configure's option --config-cache. -# It is not useful on other systems. If it contains results you don't -# want to keep, you may remove or edit it. -# -# config.status only pays attention to the cache file if you give it -# the --recheck option to rerun configure. -# -# `ac_cv_env_foo' variables (set or unset) will be overridden when -# loading this file, other *unset* `ac_cv_foo' will be assigned the -# following values. - -_ACEOF - -# The following way of writing the cache mishandles newlines in values, -# but we know of no workaround that is simple, portable, and efficient. -# So, we kill variables containing newlines. -# Ultrix sh set writes to stderr and can't be redirected directly, -# and sets the high bit in the cache file unless we assign to the vars. -( - for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do - eval ac_val=\$$ac_var - case $ac_val in #( - *${as_nl}*) - case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; - esac - case $ac_var in #( - _ | IFS | as_nl) ;; #( - BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( - *) { eval $ac_var=; unset $ac_var;} ;; - esac ;; - esac - done - - (set) 2>&1 | - case $as_nl`(ac_space=' '; set) 2>&1` in #( - *${as_nl}ac_space=\ *) - # `set' does not quote correctly, so add quotes: double-quote - # substitution turns \\\\ into \\, and sed turns \\ into \. - sed -n \ - "s/'/'\\\\''/g; - s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" - ;; #( - *) - # `set' quotes correctly as required by POSIX, so do not add quotes. - sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" - ;; - esac | - sort -) | - sed ' - /^ac_cv_env_/b end - t clear - :clear - s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ - t end - s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ - :end' >>confcache -if diff "$cache_file" confcache >/dev/null 2>&1; then :; else - if test -w "$cache_file"; then - if test "x$cache_file" != "x/dev/null"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 -$as_echo "$as_me: updating cache $cache_file" >&6;} - if test ! -f "$cache_file" || test -h "$cache_file"; then - cat confcache >"$cache_file" - else - case $cache_file in #( - */* | ?:*) - mv -f confcache "$cache_file"$$ && - mv -f "$cache_file"$$ "$cache_file" ;; #( - *) - mv -f confcache "$cache_file" ;; - esac - fi - fi - else - { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 -$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} - fi -fi -rm -f confcache - -test "x$prefix" = xNONE && prefix=$ac_default_prefix -# Let make expand exec_prefix. -test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' - -DEFS=-DHAVE_CONFIG_H - -ac_libobjs= -ac_ltlibobjs= -U= -for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue - # 1. Remove the extension, and $U if already installed. - ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' - ac_i=`$as_echo "$ac_i" | sed "$ac_script"` - # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR - # will be set to the directory where LIBOBJS objects are built. - as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" - as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' -done -LIBOBJS=$ac_libobjs - -LTLIBOBJS=$ac_ltlibobjs - - - -: "${CONFIG_STATUS=./config.status}" -ac_write_fail=0 -ac_clean_files_save=$ac_clean_files -ac_clean_files="$ac_clean_files $CONFIG_STATUS" -{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 -$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} -as_write_fail=0 -cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 -#! $SHELL -# Generated by $as_me. -# Run this file to recreate the current configuration. -# Compiler output produced by configure, useful for debugging -# configure, is in config.log if it exists. - -debug=false -ac_cs_recheck=false -ac_cs_silent=false - -SHELL=\${CONFIG_SHELL-$SHELL} -export SHELL -_ASEOF -cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 -## -------------------- ## -## M4sh Initialization. ## -## -------------------- ## - -# Be more Bourne compatible -DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : - emulate sh - NULLCMD=: - # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in #( - *posix*) : - set -o posix ;; #( - *) : - ;; -esac -fi - - -as_nl=' -' -export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi - -# The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then - PATH_SEPARATOR=: - (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { - (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || - PATH_SEPARATOR=';' - } -fi - - -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - -# Find who we are. Look in the path if we contain no directory separator. -as_myself= -case $0 in #(( - *[\\/]* ) as_myself=$0 ;; - *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break - done -IFS=$as_save_IFS - - ;; -esac -# We did not find ourselves, most probably we were run as `sh COMMAND' -# in which case we are not to be found in the path. -if test "x$as_myself" = x; then - as_myself=$0 -fi -if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 - exit 1 -fi - -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH - - -# as_fn_error STATUS ERROR [LINENO LOG_FD] -# ---------------------------------------- -# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are -# provided, also output the error to LOG_FD, referencing LINENO. Then exit the -# script with STATUS, using 1 if that was 0. -as_fn_error () -{ - as_status=$1; test $as_status -eq 0 && as_status=1 - if test "$4"; then - as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 - fi - $as_echo "$as_me: error: $2" >&2 - as_fn_exit $as_status -} # as_fn_error - - -# as_fn_set_status STATUS -# ----------------------- -# Set $? to STATUS, without forking. -as_fn_set_status () -{ - return $1 -} # as_fn_set_status - -# as_fn_exit STATUS -# ----------------- -# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. -as_fn_exit () -{ - set +e - as_fn_set_status $1 - exit $1 -} # as_fn_exit - -# as_fn_unset VAR -# --------------- -# Portably unset VAR. -as_fn_unset () -{ - { eval $1=; unset $1;} -} -as_unset=as_fn_unset -# as_fn_append VAR VALUE -# ---------------------- -# Append the text in VALUE to the end of the definition contained in VAR. Take -# advantage of any shell optimizations that allow amortized linear growth over -# repeated appends, instead of the typical quadratic growth present in naive -# implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : - eval 'as_fn_append () - { - eval $1+=\$2 - }' -else - as_fn_append () - { - eval $1=\$$1\$2 - } -fi # as_fn_append - -# as_fn_arith ARG... -# ------------------ -# Perform arithmetic evaluation on the ARGs, and store the result in the -# global $as_val. Take advantage of shells that can avoid forks. The arguments -# must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : - eval 'as_fn_arith () - { - as_val=$(( $* )) - }' -else - as_fn_arith () - { - as_val=`expr "$@" || test $? -eq 1` - } -fi # as_fn_arith - - -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then - as_basename=basename -else - as_basename=false -fi - -if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then - as_dirname=dirname -else - as_dirname=false -fi - -as_me=`$as_basename -- "$0" || -$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - -# Avoid depending upon Character Ranges. -as_cr_letters='abcdefghijklmnopqrstuvwxyz' -as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' -as_cr_Letters=$as_cr_letters$as_cr_LETTERS -as_cr_digits='0123456789' -as_cr_alnum=$as_cr_Letters$as_cr_digits - -ECHO_C= ECHO_N= ECHO_T= -case `echo -n x` in #((((( --n*) - case `echo 'xy\c'` in - *c*) ECHO_T=' ';; # ECHO_T is single tab character. - xy) ECHO_C='\c';; - *) echo `echo ksh88 bug on AIX 6.1` > /dev/null - ECHO_T=' ';; - esac;; -*) - ECHO_N='-n';; -esac - -rm -f conf$$ conf$$.exe conf$$.file -if test -d conf$$.dir; then - rm -f conf$$.dir/conf$$.file -else - rm -f conf$$.dir - mkdir conf$$.dir 2>/dev/null -fi -if (echo >conf$$.file) 2>/dev/null; then - if ln -s conf$$.file conf$$ 2>/dev/null; then - as_ln_s='ln -s' - # ... but there are two gotchas: - # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. - # In both cases, we have to default to `cp -p'. - ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || - as_ln_s='cp -p' - elif ln conf$$.file conf$$ 2>/dev/null; then - as_ln_s=ln - else - as_ln_s='cp -p' - fi -else - as_ln_s='cp -p' -fi -rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file -rmdir conf$$.dir 2>/dev/null - - -# as_fn_mkdir_p -# ------------- -# Create "$as_dir" as a directory, including parents if necessary. -as_fn_mkdir_p () -{ - - case $as_dir in #( - -*) as_dir=./$as_dir;; - esac - test -d "$as_dir" || eval $as_mkdir_p || { - as_dirs= - while :; do - case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( - *) as_qdir=$as_dir;; - esac - as_dirs="'$as_qdir' $as_dirs" - as_dir=`$as_dirname -- "$as_dir" || -$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_dir" : 'X\(//\)[^/]' \| \ - X"$as_dir" : 'X\(//\)$' \| \ - X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - test -d "$as_dir" && break - done - test -z "$as_dirs" || eval "mkdir $as_dirs" - } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" - - -} # as_fn_mkdir_p -if mkdir -p . 2>/dev/null; then - as_mkdir_p='mkdir -p "$as_dir"' -else - test -d ./-p && rmdir ./-p - as_mkdir_p=false -fi - -if test -x / >/dev/null 2>&1; then - as_test_x='test -x' -else - if ls -dL / >/dev/null 2>&1; then - as_ls_L_option=L - else - as_ls_L_option= - fi - as_test_x=' - eval sh -c '\'' - if test -d "$1"; then - test -d "$1/."; - else - case $1 in #( - -*)set "./$1";; - esac; - case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #(( - ???[sx]*):;;*)false;;esac;fi - '\'' sh - ' -fi -as_executable_p=$as_test_x - -# Sed expression to map a string onto a valid CPP name. -as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" - -# Sed expression to map a string onto a valid variable name. -as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" - - -exec 6>&1 -## ----------------------------------- ## -## Main body of $CONFIG_STATUS script. ## -## ----------------------------------- ## -_ASEOF -test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -# Save the log message, to keep $0 and so on meaningful, and to -# report actual input values of CONFIG_FILES etc. instead of their -# values after options handling. -ac_log=" -This file was extended by $as_me, which was -generated by GNU Autoconf 2.68. Invocation command line was - - CONFIG_FILES = $CONFIG_FILES - CONFIG_HEADERS = $CONFIG_HEADERS - CONFIG_LINKS = $CONFIG_LINKS - CONFIG_COMMANDS = $CONFIG_COMMANDS - $ $0 $@ - -on `(hostname || uname -n) 2>/dev/null | sed 1q` -" - -_ACEOF - -case $ac_config_files in *" -"*) set x $ac_config_files; shift; ac_config_files=$*;; -esac - -case $ac_config_headers in *" -"*) set x $ac_config_headers; shift; ac_config_headers=$*;; -esac - - -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -# Files that config.status was made for. -config_files="$ac_config_files" -config_headers="$ac_config_headers" -config_commands="$ac_config_commands" - -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -ac_cs_usage="\ -\`$as_me' instantiates files and other configuration actions -from templates according to the current configuration. Unless the files -and actions are specified as TAGs, all are instantiated by default. - -Usage: $0 [OPTION]... [TAG]... - - -h, --help print this help, then exit - -V, --version print version number and configuration settings, then exit - --config print configuration, then exit - -q, --quiet, --silent - do not print progress messages - -d, --debug don't remove temporary files - --recheck update $as_me by reconfiguring in the same conditions - --file=FILE[:TEMPLATE] - instantiate the configuration file FILE - --header=FILE[:TEMPLATE] - instantiate the configuration header FILE - -Configuration files: -$config_files - -Configuration headers: -$config_headers - -Configuration commands: -$config_commands - -Report bugs to the package provider." - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" -ac_cs_version="\\ -config.status -configured by $0, generated by GNU Autoconf 2.68, - with options \\"\$ac_cs_config\\" - -Copyright (C) 2010 Free Software Foundation, Inc. -This config.status script is free software; the Free Software Foundation -gives unlimited permission to copy, distribute and modify it." - -ac_pwd='$ac_pwd' -srcdir='$srcdir' -INSTALL='$INSTALL' -test -n "\$AWK" || AWK=awk -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -# The default lists apply if the user does not specify any file. -ac_need_defaults=: -while test $# != 0 -do - case $1 in - --*=?*) - ac_option=`expr "X$1" : 'X\([^=]*\)='` - ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` - ac_shift=: - ;; - --*=) - ac_option=`expr "X$1" : 'X\([^=]*\)='` - ac_optarg= - ac_shift=: - ;; - *) - ac_option=$1 - ac_optarg=$2 - ac_shift=shift - ;; - esac - - case $ac_option in - # Handling of the options. - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - ac_cs_recheck=: ;; - --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) - $as_echo "$ac_cs_version"; exit ;; - --config | --confi | --conf | --con | --co | --c ) - $as_echo "$ac_cs_config"; exit ;; - --debug | --debu | --deb | --de | --d | -d ) - debug=: ;; - --file | --fil | --fi | --f ) - $ac_shift - case $ac_optarg in - *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; - '') as_fn_error $? "missing file argument" ;; - esac - as_fn_append CONFIG_FILES " '$ac_optarg'" - ac_need_defaults=false;; - --header | --heade | --head | --hea ) - $ac_shift - case $ac_optarg in - *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; - esac - as_fn_append CONFIG_HEADERS " '$ac_optarg'" - ac_need_defaults=false;; - --he | --h) - # Conflict between --help and --header - as_fn_error $? "ambiguous option: \`$1' -Try \`$0 --help' for more information.";; - --help | --hel | -h ) - $as_echo "$ac_cs_usage"; exit ;; - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil | --si | --s) - ac_cs_silent=: ;; - - # This is an error. - -*) as_fn_error $? "unrecognized option: \`$1' -Try \`$0 --help' for more information." ;; - - *) as_fn_append ac_config_targets " $1" - ac_need_defaults=false ;; - - esac - shift -done - -ac_configure_extra_args= - -if $ac_cs_silent; then - exec 6>/dev/null - ac_configure_extra_args="$ac_configure_extra_args --silent" -fi - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -if \$ac_cs_recheck; then - set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion - shift - \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 - CONFIG_SHELL='$SHELL' - export CONFIG_SHELL - exec "\$@" -fi - -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -exec 5>>config.log -{ - echo - sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX -## Running $as_me. ## -_ASBOX - $as_echo "$ac_log" -} >&5 - -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 - -# Handling of arguments. -for ac_config_target in $ac_config_targets -do - case $ac_config_target in - "include/jemalloc/internal/size_classes.h") CONFIG_COMMANDS="$CONFIG_COMMANDS include/jemalloc/internal/size_classes.h" ;; - "$cfghdrs_tup") CONFIG_HEADERS="$CONFIG_HEADERS $cfghdrs_tup" ;; - "$cfgoutputs_tup") CONFIG_FILES="$CONFIG_FILES $cfgoutputs_tup" ;; - "config.stamp") CONFIG_FILES="$CONFIG_FILES config.stamp" ;; - "bin/jemalloc.sh") CONFIG_FILES="$CONFIG_FILES bin/jemalloc.sh" ;; - - *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; - esac -done - - -# If the user did not use the arguments to specify the items to instantiate, -# then the envvar interface is used. Set only those that are not. -# We use the long form for the default assignment because of an extremely -# bizarre bug on SunOS 4.1.3. -if $ac_need_defaults; then - test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files - test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers - test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands -fi - -# Have a temporary directory for convenience. Make it in the build tree -# simply because there is no reason against having it here, and in addition, -# creating and moving files from /tmp can sometimes cause problems. -# Hook for its removal unless debugging. -# Note that there is a small window in which the directory will not be cleaned: -# after its creation but before its name has been assigned to `$tmp'. -$debug || -{ - tmp= ac_tmp= - trap 'exit_status=$? - : "${ac_tmp:=$tmp}" - { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status -' 0 - trap 'as_fn_exit 1' 1 2 13 15 -} -# Create a (secure) tmp directory for tmp files. - -{ - tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && - test -d "$tmp" -} || -{ - tmp=./conf$$-$RANDOM - (umask 077 && mkdir "$tmp") -} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 -ac_tmp=$tmp - -# Set up the scripts for CONFIG_FILES section. -# No need to generate them if there are no CONFIG_FILES. -# This happens for instance with `./config.status config.h'. -if test -n "$CONFIG_FILES"; then - - -ac_cr=`echo X | tr X '\015'` -# On cygwin, bash can eat \r inside `` if the user requested igncr. -# But we know of no other shell where ac_cr would be empty at this -# point, so we can use a bashism as a fallback. -if test "x$ac_cr" = x; then - eval ac_cr=\$\'\\r\' -fi -ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` -if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then - ac_cs_awk_cr='\\r' -else - ac_cs_awk_cr=$ac_cr -fi - -echo 'BEGIN {' >"$ac_tmp/subs1.awk" && -_ACEOF - - -{ - echo "cat >conf$$subs.awk <<_ACEOF" && - echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && - echo "_ACEOF" -} >conf$$subs.sh || - as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 -ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` -ac_delim='%!_!# ' -for ac_last_try in false false false false false :; do - . ./conf$$subs.sh || - as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 - - ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` - if test $ac_delim_n = $ac_delim_num; then - break - elif $ac_last_try; then - as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 - else - ac_delim="$ac_delim!$ac_delim _$ac_delim!! " - fi -done -rm -f conf$$subs.sh - -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && -_ACEOF -sed -n ' -h -s/^/S["/; s/!.*/"]=/ -p -g -s/^[^!]*!// -:repl -t repl -s/'"$ac_delim"'$// -t delim -:nl -h -s/\(.\{148\}\)..*/\1/ -t more1 -s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ -p -n -b repl -:more1 -s/["\\]/\\&/g; s/^/"/; s/$/"\\/ -p -g -s/.\{148\}// -t nl -:delim -h -s/\(.\{148\}\)..*/\1/ -t more2 -s/["\\]/\\&/g; s/^/"/; s/$/"/ -p -b -:more2 -s/["\\]/\\&/g; s/^/"/; s/$/"\\/ -p -g -s/.\{148\}// -t delim -' >$CONFIG_STATUS || ac_write_fail=1 -rm -f conf$$subs.awk -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -_ACAWK -cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && - for (key in S) S_is_set[key] = 1 - FS = "" - -} -{ - line = $ 0 - nfields = split(line, field, "@") - substed = 0 - len = length(field[1]) - for (i = 2; i < nfields; i++) { - key = field[i] - keylen = length(key) - if (S_is_set[key]) { - value = S[key] - line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) - len += length(value) + length(field[++i]) - substed = 1 - } else - len += 1 + keylen - } - - print line -} - -_ACAWK -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then - sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" -else - cat -fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ - || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 -_ACEOF - -# VPATH may cause trouble with some makes, so we remove sole $(srcdir), -# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and -# trailing colons and then remove the whole line if VPATH becomes empty -# (actually we leave an empty line to preserve line numbers). -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ -h -s/// -s/^/:/ -s/[ ]*$/:/ -s/:\$(srcdir):/:/g -s/:\${srcdir}:/:/g -s/:@srcdir@:/:/g -s/^:*// -s/:*$// -x -s/\(=[ ]*\).*/\1/ -G -s/\n// -s/^[^=]*=[ ]*$// -}' -fi - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -fi # test -n "$CONFIG_FILES" - -# Set up the scripts for CONFIG_HEADERS section. -# No need to generate them if there are no CONFIG_HEADERS. -# This happens for instance with `./config.status Makefile'. -if test -n "$CONFIG_HEADERS"; then -cat >"$ac_tmp/defines.awk" <<\_ACAWK || -BEGIN { -_ACEOF - -# Transform confdefs.h into an awk script `defines.awk', embedded as -# here-document in config.status, that substitutes the proper values into -# config.h.in to produce config.h. - -# Create a delimiter string that does not exist in confdefs.h, to ease -# handling of long lines. -ac_delim='%!_!# ' -for ac_last_try in false false :; do - ac_tt=`sed -n "/$ac_delim/p" confdefs.h` - if test -z "$ac_tt"; then - break - elif $ac_last_try; then - as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 - else - ac_delim="$ac_delim!$ac_delim _$ac_delim!! " - fi -done - -# For the awk script, D is an array of macro values keyed by name, -# likewise P contains macro parameters if any. Preserve backslash -# newline sequences. - -ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* -sed -n ' -s/.\{148\}/&'"$ac_delim"'/g -t rset -:rset -s/^[ ]*#[ ]*define[ ][ ]*/ / -t def -d -:def -s/\\$// -t bsnl -s/["\\]/\\&/g -s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ -D["\1"]=" \3"/p -s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p -d -:bsnl -s/["\\]/\\&/g -s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ -D["\1"]=" \3\\\\\\n"\\/p -t cont -s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p -t cont -d -:cont -n -s/.\{148\}/&'"$ac_delim"'/g -t clear -:clear -s/\\$// -t bsnlc -s/["\\]/\\&/g; s/^/"/; s/$/"/p -d -:bsnlc -s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p -b cont -' >$CONFIG_STATUS || ac_write_fail=1 - -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 - for (key in D) D_is_set[key] = 1 - FS = "" -} -/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { - line = \$ 0 - split(line, arg, " ") - if (arg[1] == "#") { - defundef = arg[2] - mac1 = arg[3] - } else { - defundef = substr(arg[1], 2) - mac1 = arg[2] - } - split(mac1, mac2, "(") #) - macro = mac2[1] - prefix = substr(line, 1, index(line, defundef) - 1) - if (D_is_set[macro]) { - # Preserve the white space surrounding the "#". - print prefix "define", macro P[macro] D[macro] - next - } else { - # Replace #undef with comments. This is necessary, for example, - # in the case of _POSIX_SOURCE, which is predefined and required - # on some systems where configure will not decide to define it. - if (defundef == "undef") { - print "/*", prefix defundef, macro, "*/" - next - } - } -} -{ print } -_ACAWK -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 - as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 -fi # test -n "$CONFIG_HEADERS" - - -eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" -shift -for ac_tag -do - case $ac_tag in - :[FHLC]) ac_mode=$ac_tag; continue;; - esac - case $ac_mode$ac_tag in - :[FHL]*:*);; - :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; - :[FH]-) ac_tag=-:-;; - :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; - esac - ac_save_IFS=$IFS - IFS=: - set x $ac_tag - IFS=$ac_save_IFS - shift - ac_file=$1 - shift - - case $ac_mode in - :L) ac_source=$1;; - :[FH]) - ac_file_inputs= - for ac_f - do - case $ac_f in - -) ac_f="$ac_tmp/stdin";; - *) # Look for the file first in the build tree, then in the source tree - # (if the path is not absolute). The absolute path cannot be DOS-style, - # because $ac_f cannot contain `:'. - test -f "$ac_f" || - case $ac_f in - [\\/$]*) false;; - *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; - esac || - as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; - esac - case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac - as_fn_append ac_file_inputs " '$ac_f'" - done - - # Let's still pretend it is `configure' which instantiates (i.e., don't - # use $as_me), people would be surprised to read: - # /* config.h. Generated by config.status. */ - configure_input='Generated from '` - $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' - `' by configure.' - if test x"$ac_file" != x-; then - configure_input="$ac_file. $configure_input" - { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 -$as_echo "$as_me: creating $ac_file" >&6;} - fi - # Neutralize special characters interpreted by sed in replacement strings. - case $configure_input in #( - *\&* | *\|* | *\\* ) - ac_sed_conf_input=`$as_echo "$configure_input" | - sed 's/[\\\\&|]/\\\\&/g'`;; #( - *) ac_sed_conf_input=$configure_input;; - esac - - case $ac_tag in - *:-:* | *:-) cat >"$ac_tmp/stdin" \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; - esac - ;; - esac - - ac_dir=`$as_dirname -- "$ac_file" || -$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$ac_file" : 'X\(//\)[^/]' \| \ - X"$ac_file" : 'X\(//\)$' \| \ - X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$ac_file" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - as_dir="$ac_dir"; as_fn_mkdir_p - ac_builddir=. - -case "$ac_dir" in -.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; -*) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` - # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` - case $ac_top_builddir_sub in - "") ac_top_builddir_sub=. ac_top_build_prefix= ;; - *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; - esac ;; -esac -ac_abs_top_builddir=$ac_pwd -ac_abs_builddir=$ac_pwd$ac_dir_suffix -# for backward compatibility: -ac_top_builddir=$ac_top_build_prefix - -case $srcdir in - .) # We are building in place. - ac_srcdir=. - ac_top_srcdir=$ac_top_builddir_sub - ac_abs_top_srcdir=$ac_pwd ;; - [\\/]* | ?:[\\/]* ) # Absolute name. - ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir - ac_abs_top_srcdir=$srcdir ;; - *) # Relative name. - ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_build_prefix$srcdir - ac_abs_top_srcdir=$ac_pwd/$srcdir ;; -esac -ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix - - - case $ac_mode in - :F) - # - # CONFIG_FILE - # - - case $INSTALL in - [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; - *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; - esac -_ACEOF - -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -# If the template does not know about datarootdir, expand it. -# FIXME: This hack should be removed a few years after 2.60. -ac_datarootdir_hack=; ac_datarootdir_seen= -ac_sed_dataroot=' -/datarootdir/ { - p - q -} -/@datadir@/p -/@docdir@/p -/@infodir@/p -/@localedir@/p -/@mandir@/p' -case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in -*datarootdir*) ac_datarootdir_seen=yes;; -*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 -$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} -_ACEOF -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 - ac_datarootdir_hack=' - s&@datadir@&$datadir&g - s&@docdir@&$docdir&g - s&@infodir@&$infodir&g - s&@localedir@&$localedir&g - s&@mandir@&$mandir&g - s&\\\${datarootdir}&$datarootdir&g' ;; -esac -_ACEOF - -# Neutralize VPATH when `$srcdir' = `.'. -# Shell code in configure.ac might set extrasub. -# FIXME: do we really want to maintain this feature? -cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -ac_sed_extra="$ac_vpsub -$extrasub -_ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 -:t -/@[a-zA-Z_][a-zA-Z_0-9]*@/!b -s|@configure_input@|$ac_sed_conf_input|;t t -s&@top_builddir@&$ac_top_builddir_sub&;t t -s&@top_build_prefix@&$ac_top_build_prefix&;t t -s&@srcdir@&$ac_srcdir&;t t -s&@abs_srcdir@&$ac_abs_srcdir&;t t -s&@top_srcdir@&$ac_top_srcdir&;t t -s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t -s&@builddir@&$ac_builddir&;t t -s&@abs_builddir@&$ac_abs_builddir&;t t -s&@abs_top_builddir@&$ac_abs_top_builddir&;t t -s&@INSTALL@&$ac_INSTALL&;t t -$ac_datarootdir_hack -" -eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ - >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - -test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && - { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && - { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ - "$ac_tmp/out"`; test -z "$ac_out"; } && - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' -which seems to be undefined. Please make sure it is defined" >&5 -$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' -which seems to be undefined. Please make sure it is defined" >&2;} - - rm -f "$ac_tmp/stdin" - case $ac_file in - -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; - *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; - esac \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - ;; - :H) - # - # CONFIG_HEADER - # - if test x"$ac_file" != x-; then - { - $as_echo "/* $configure_input */" \ - && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" - } >"$ac_tmp/config.h" \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then - { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 -$as_echo "$as_me: $ac_file is unchanged" >&6;} - else - rm -f "$ac_file" - mv "$ac_tmp/config.h" "$ac_file" \ - || as_fn_error $? "could not create $ac_file" "$LINENO" 5 - fi - else - $as_echo "/* $configure_input */" \ - && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ - || as_fn_error $? "could not create -" "$LINENO" 5 - fi - ;; - - :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 -$as_echo "$as_me: executing $ac_file commands" >&6;} - ;; - esac - - - case $ac_file$ac_mode in - "include/jemalloc/internal/size_classes.h":C) - mkdir -p "include/jemalloc/internal" - "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h" - ;; - - esac -done # for ac_tag - - -as_fn_exit 0 -_ACEOF -ac_clean_files=$ac_clean_files_save - -test $ac_write_fail = 0 || - as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 - - -# configure is writing to config.log, and then calls config.status. -# config.status does its own redirection, appending to config.log. -# Unfortunately, on DOS this fails, as config.log is still kept open -# by configure, so config.status won't be able to write to it; its -# output is simply discarded. So we exec the FD to /dev/null, -# effectively closing config.log, so it can be properly (re)opened and -# appended to by config.status. When coming back to configure, we -# need to make the FD available again. -if test "$no_create" != yes; then - ac_cs_success=: - ac_config_status_args= - test "$silent" = yes && - ac_config_status_args="$ac_config_status_args --quiet" - exec 5>/dev/null - $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false - exec 5>>config.log - # Use ||, not &&, to avoid exiting from the if with $? = 1, which - # would make configure fail if this is the last instruction. - $ac_cs_success || as_fn_exit 1 -fi -if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 -$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} -fi - - -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ===============================================================================" >&5 -$as_echo "===============================================================================" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: jemalloc version : ${jemalloc_version}" >&5 -$as_echo "jemalloc version : ${jemalloc_version}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: library revision : ${rev}" >&5 -$as_echo "library revision : ${rev}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5 -$as_echo "" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CC : ${CC}" >&5 -$as_echo "CC : ${CC}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CPPFLAGS : ${CPPFLAGS}" >&5 -$as_echo "CPPFLAGS : ${CPPFLAGS}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CFLAGS : ${CFLAGS}" >&5 -$as_echo "CFLAGS : ${CFLAGS}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: LDFLAGS : ${LDFLAGS}" >&5 -$as_echo "LDFLAGS : ${LDFLAGS}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: LIBS : ${LIBS}" >&5 -$as_echo "LIBS : ${LIBS}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: RPATH_EXTRA : ${RPATH_EXTRA}" >&5 -$as_echo "RPATH_EXTRA : ${RPATH_EXTRA}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5 -$as_echo "" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: XSLTPROC : ${XSLTPROC}" >&5 -$as_echo "XSLTPROC : ${XSLTPROC}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: XSLROOT : ${XSLROOT}" >&5 -$as_echo "XSLROOT : ${XSLROOT}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5 -$as_echo "" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: PREFIX : ${PREFIX}" >&5 -$as_echo "PREFIX : ${PREFIX}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: BINDIR : ${BINDIR}" >&5 -$as_echo "BINDIR : ${BINDIR}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: INCLUDEDIR : ${INCLUDEDIR}" >&5 -$as_echo "INCLUDEDIR : ${INCLUDEDIR}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: LIBDIR : ${LIBDIR}" >&5 -$as_echo "LIBDIR : ${LIBDIR}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: DATADIR : ${DATADIR}" >&5 -$as_echo "DATADIR : ${DATADIR}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: MANDIR : ${MANDIR}" >&5 -$as_echo "MANDIR : ${MANDIR}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5 -$as_echo "" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: srcroot : ${srcroot}" >&5 -$as_echo "srcroot : ${srcroot}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: abs_srcroot : ${abs_srcroot}" >&5 -$as_echo "abs_srcroot : ${abs_srcroot}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: objroot : ${objroot}" >&5 -$as_echo "objroot : ${objroot}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: abs_objroot : ${abs_objroot}" >&5 -$as_echo "abs_objroot : ${abs_objroot}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5 -$as_echo "" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}" >&5 -$as_echo "JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: JEMALLOC_PRIVATE_NAMESPACE" >&5 -$as_echo "JEMALLOC_PRIVATE_NAMESPACE" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: : ${JEMALLOC_PRIVATE_NAMESPACE}" >&5 -$as_echo " : ${JEMALLOC_PRIVATE_NAMESPACE}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: install_suffix : ${install_suffix}" >&5 -$as_echo "install_suffix : ${install_suffix}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: autogen : ${enable_autogen}" >&5 -$as_echo "autogen : ${enable_autogen}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: experimental : ${enable_experimental}" >&5 -$as_echo "experimental : ${enable_experimental}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: cc-silence : ${enable_cc_silence}" >&5 -$as_echo "cc-silence : ${enable_cc_silence}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: debug : ${enable_debug}" >&5 -$as_echo "debug : ${enable_debug}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: stats : ${enable_stats}" >&5 -$as_echo "stats : ${enable_stats}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: prof : ${enable_prof}" >&5 -$as_echo "prof : ${enable_prof}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: prof-libunwind : ${enable_prof_libunwind}" >&5 -$as_echo "prof-libunwind : ${enable_prof_libunwind}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: prof-libgcc : ${enable_prof_libgcc}" >&5 -$as_echo "prof-libgcc : ${enable_prof_libgcc}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: prof-gcc : ${enable_prof_gcc}" >&5 -$as_echo "prof-gcc : ${enable_prof_gcc}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: tcache : ${enable_tcache}" >&5 -$as_echo "tcache : ${enable_tcache}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: fill : ${enable_fill}" >&5 -$as_echo "fill : ${enable_fill}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: utrace : ${enable_utrace}" >&5 -$as_echo "utrace : ${enable_utrace}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: valgrind : ${enable_valgrind}" >&5 -$as_echo "valgrind : ${enable_valgrind}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: xmalloc : ${enable_xmalloc}" >&5 -$as_echo "xmalloc : ${enable_xmalloc}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: mremap : ${enable_mremap}" >&5 -$as_echo "mremap : ${enable_mremap}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: munmap : ${enable_munmap}" >&5 -$as_echo "munmap : ${enable_munmap}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: dss : ${enable_dss}" >&5 -$as_echo "dss : ${enable_dss}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: lazy_lock : ${enable_lazy_lock}" >&5 -$as_echo "lazy_lock : ${enable_lazy_lock}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: tls : ${enable_tls}" >&5 -$as_echo "tls : ${enable_tls}" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ===============================================================================" >&5 -$as_echo "===============================================================================" >&6; } diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/configure.ac mariadb-5.5-5.5.40/extra/jemalloc/configure.ac --- mariadb-5.5-5.5.39/extra/jemalloc/configure.ac 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/configure.ac 1970-01-01 00:00:00.000000000 +0000 @@ -1,1333 +0,0 @@ -dnl Process this file with autoconf to produce a configure script. -AC_INIT([Makefile.in]) - -dnl ============================================================================ -dnl Custom macro definitions. - -dnl JE_CFLAGS_APPEND(cflag) -AC_DEFUN([JE_CFLAGS_APPEND], -[ -AC_MSG_CHECKING([whether compiler supports $1]) -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="$1" -else - CFLAGS="${CFLAGS} $1" -fi -AC_COMPILE_IFELSE([AC_LANG_PROGRAM( -[[ -]], [[ - return 0; -]])], - AC_MSG_RESULT([yes]), - AC_MSG_RESULT([no]) - [CFLAGS="${TCFLAGS}"] -) -]) - -dnl JE_COMPILABLE(label, hcode, mcode, rvar) -dnl -dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors -dnl cause failure. -AC_DEFUN([JE_COMPILABLE], -[ -AC_CACHE_CHECK([whether $1 is compilable], - [$4], - [AC_LINK_IFELSE([AC_LANG_PROGRAM([$2], - [$3])], - [$4=yes], - [$4=no])]) -]) - -dnl ============================================================================ - -dnl Library revision. -rev=1 -AC_SUBST([rev]) - -srcroot=$srcdir -if test "x${srcroot}" = "x." ; then - srcroot="" -else - srcroot="${srcroot}/" -fi -AC_SUBST([srcroot]) -abs_srcroot="`cd \"${srcdir}\"; pwd`/" -AC_SUBST([abs_srcroot]) - -objroot="" -AC_SUBST([objroot]) -abs_objroot="`pwd`/" -AC_SUBST([abs_objroot]) - -dnl Munge install path variables. -if test "x$prefix" = "xNONE" ; then - prefix="/usr/local" -fi -if test "x$exec_prefix" = "xNONE" ; then - exec_prefix=$prefix -fi -PREFIX=$prefix -AC_SUBST([PREFIX]) -BINDIR=`eval echo $bindir` -BINDIR=`eval echo $BINDIR` -AC_SUBST([BINDIR]) -INCLUDEDIR=`eval echo $includedir` -INCLUDEDIR=`eval echo $INCLUDEDIR` -AC_SUBST([INCLUDEDIR]) -LIBDIR=`eval echo $libdir` -LIBDIR=`eval echo $LIBDIR` -AC_SUBST([LIBDIR]) -DATADIR=`eval echo $datadir` -DATADIR=`eval echo $DATADIR` -AC_SUBST([DATADIR]) -MANDIR=`eval echo $mandir` -MANDIR=`eval echo $MANDIR` -AC_SUBST([MANDIR]) - -dnl Support for building documentation. -AC_PATH_PROG([XSLTPROC], [xsltproc], [false], [$PATH]) -if test -d "/usr/share/xml/docbook/stylesheet/docbook-xsl" ; then - DEFAULT_XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" -elif test -d "/usr/share/sgml/docbook/xsl-stylesheets" ; then - DEFAULT_XSLROOT="/usr/share/sgml/docbook/xsl-stylesheets" -else - dnl Documentation building will fail if this default gets used. - DEFAULT_XSLROOT="" -fi -AC_ARG_WITH([xslroot], - [AS_HELP_STRING([--with-xslroot=], [XSL stylesheet root path])], [ -if test "x$with_xslroot" = "xno" ; then - XSLROOT="${DEFAULT_XSLROOT}" -else - XSLROOT="${with_xslroot}" -fi -], - XSLROOT="${DEFAULT_XSLROOT}" -) -AC_SUBST([XSLROOT]) - -dnl If CFLAGS isn't defined, set CFLAGS to something reasonable. Otherwise, -dnl just prevent autoconf from molesting CFLAGS. -CFLAGS=$CFLAGS -AC_PROG_CC -if test "x$GCC" != "xyes" ; then - AC_CACHE_CHECK([whether compiler is MSVC], - [je_cv_msvc], - [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], - [ -#ifndef _MSC_VER - int fail[-1]; -#endif -])], - [je_cv_msvc=yes], - [je_cv_msvc=no])]) -fi - -if test "x$CFLAGS" = "x" ; then - no_CFLAGS="yes" - if test "x$GCC" = "xyes" ; then - JE_CFLAGS_APPEND([-std=gnu99]) - JE_CFLAGS_APPEND([-Wall]) - JE_CFLAGS_APPEND([-pipe]) - JE_CFLAGS_APPEND([-g3]) - elif test "x$je_cv_msvc" = "xyes" ; then - CC="$CC -nologo" - JE_CFLAGS_APPEND([-Zi]) - JE_CFLAGS_APPEND([-MT]) - JE_CFLAGS_APPEND([-W3]) - CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat" - fi -fi -dnl Append EXTRA_CFLAGS to CFLAGS, if defined. -if test "x$EXTRA_CFLAGS" != "x" ; then - JE_CFLAGS_APPEND([$EXTRA_CFLAGS]) -fi -AC_PROG_CPP - -AC_CHECK_SIZEOF([void *]) -if test "x${ac_cv_sizeof_void_p}" = "x8" ; then - LG_SIZEOF_PTR=3 -elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then - LG_SIZEOF_PTR=2 -else - AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) -fi -AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR]) - -AC_CHECK_SIZEOF([int]) -if test "x${ac_cv_sizeof_int}" = "x8" ; then - LG_SIZEOF_INT=3 -elif test "x${ac_cv_sizeof_int}" = "x4" ; then - LG_SIZEOF_INT=2 -else - AC_MSG_ERROR([Unsupported int size: ${ac_cv_sizeof_int}]) -fi -AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT]) - -AC_CHECK_SIZEOF([long]) -if test "x${ac_cv_sizeof_long}" = "x8" ; then - LG_SIZEOF_LONG=3 -elif test "x${ac_cv_sizeof_long}" = "x4" ; then - LG_SIZEOF_LONG=2 -else - AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}]) -fi -AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG]) - -AC_CHECK_SIZEOF([intmax_t]) -if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then - LG_SIZEOF_INTMAX_T=4 -elif test "x${ac_cv_sizeof_intmax_t}" = "x8" ; then - LG_SIZEOF_INTMAX_T=3 -elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then - LG_SIZEOF_INTMAX_T=2 -else - AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_intmax_t}]) -fi -AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T]) - -AC_CANONICAL_HOST -dnl CPU-specific settings. -CPU_SPINWAIT="" -case "${host_cpu}" in - i[[345]]86) - ;; - i686) - JE_COMPILABLE([__asm__], [], [[__asm__ volatile("pause"); return 0;]], - [je_cv_asm]) - if test "x${je_cv_asm}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' - fi - ;; - x86_64) - JE_COMPILABLE([__asm__ syntax], [], - [[__asm__ volatile("pause"); return 0;]], [je_cv_asm]) - if test "x${je_cv_asm}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' - fi - ;; - *) - ;; -esac -AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) - -LD_PRELOAD_VAR="LD_PRELOAD" -so="so" -importlib="${so}" -o="$ac_objext" -a="a" -exe="$ac_exeext" -libprefix="lib" -DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' -RPATH='-Wl,-rpath,$(1)' -SOREV="${so}.${rev}" -PIC_CFLAGS='-fPIC -DPIC' -CTARGET='-o $@' -LDTARGET='-o $@' -EXTRA_LDFLAGS= -MKLIB='ar crus $@' -CC_MM=1 - -dnl Platform-specific settings. abi and RPATH can probably be determined -dnl programmatically, but doing so is error-prone, which makes it generally -dnl not worth the trouble. -dnl -dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the -dnl definitions need to be seen before any headers are included, which is a pain -dnl to make happen otherwise. -default_munmap="1" -JEMALLOC_USABLE_SIZE_CONST="const" -case "${host}" in - *-*-darwin*) - CFLAGS="$CFLAGS" - abi="macho" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) - RPATH="" - LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" - so="dylib" - importlib="${so}" - force_tls="0" - DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' - SOREV="${rev}.${so}" - ;; - *-*-freebsd*) - CFLAGS="$CFLAGS" - abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) - force_lazy_lock="1" - ;; - *-*-linux*) - CFLAGS="$CFLAGS" - CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" - abi="elf" - AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) - AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) - AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) - JEMALLOC_USABLE_SIZE_CONST="" - default_munmap="0" - ;; - *-*-netbsd*) - AC_MSG_CHECKING([ABI]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM( -[[#ifdef __ELF__ -/* ELF */ -#else -#error aout -#endif -]])], - [CFLAGS="$CFLAGS"; abi="elf"], - [abi="aout"]) - AC_MSG_RESULT([$abi]) - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) - ;; - *-*-solaris2*) - CFLAGS="$CFLAGS" - abi="elf" - RPATH='-Wl,-R,$(1)' - dnl Solaris needs this for sigwait(). - CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" - LIBS="$LIBS -lposix4 -lsocket -lnsl" - ;; - *-ibm-aix*) - if "$LG_SIZEOF_PTR" = "8"; then - dnl 64bit AIX - LD_PRELOAD_VAR="LDR_PRELOAD64" - else - dnl 32bit AIX - LD_PRELOAD_VAR="LDR_PRELOAD" - fi - abi="xcoff" - ;; - *-*-mingw*) - abi="pecoff" - force_tls="0" - RPATH="" - so="dll" - if test "x$je_cv_msvc" = "xyes" ; then - importlib="lib" - DSO_LDFLAGS="-LD" - EXTRA_LDFLAGS="-link -DEBUG" - CTARGET='-Fo$@' - LDTARGET='-Fe$@' - MKLIB='lib -nologo -out:$@' - CC_MM= - else - importlib="${so}" - DSO_LDFLAGS="-shared" - fi - a="lib" - libprefix="" - SOREV="${so}" - PIC_CFLAGS="" - ;; - *) - AC_MSG_RESULT([Unsupported operating system: ${host}]) - abi="elf" - ;; -esac -AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST]) -AC_SUBST([abi]) -AC_SUBST([RPATH]) -AC_SUBST([LD_PRELOAD_VAR]) -AC_SUBST([so]) -AC_SUBST([importlib]) -AC_SUBST([o]) -AC_SUBST([a]) -AC_SUBST([exe]) -AC_SUBST([libprefix]) -AC_SUBST([DSO_LDFLAGS]) -AC_SUBST([EXTRA_LDFLAGS]) -AC_SUBST([SOREV]) -AC_SUBST([PIC_CFLAGS]) -AC_SUBST([CTARGET]) -AC_SUBST([LDTARGET]) -AC_SUBST([MKLIB]) -AC_SUBST([CC_MM]) - -if test "x$abi" != "xpecoff"; then - dnl Heap profiling uses the log(3) function. - LIBS="$LIBS -lm" -fi - -JE_COMPILABLE([__attribute__ syntax], - [static __attribute__((unused)) void foo(void){}], - [], - [je_cv_attribute]) -if test "x${je_cv_attribute}" = "xyes" ; then - AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ]) - if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then - JE_CFLAGS_APPEND([-fvisibility=hidden]) - fi -fi -dnl Check for tls_model attribute support (clang 3.0 still lacks support). -SAVED_CFLAGS="${CFLAGS}" -JE_CFLAGS_APPEND([-Werror]) -JE_COMPILABLE([tls_model attribute], [], - [static __thread int - __attribute__((tls_model("initial-exec"))) foo; - foo = 0;], - [je_cv_tls_model]) -CFLAGS="${SAVED_CFLAGS}" -if test "x${je_cv_tls_model}" = "xyes" ; then - AC_DEFINE([JEMALLOC_TLS_MODEL], - [__attribute__((tls_model("initial-exec")))]) -else - AC_DEFINE([JEMALLOC_TLS_MODEL], [ ]) -fi - -dnl Support optional additions to rpath. -AC_ARG_WITH([rpath], - [AS_HELP_STRING([--with-rpath=], [Colon-separated rpath (ELF systems only)])], -if test "x$with_rpath" = "xno" ; then - RPATH_EXTRA= -else - RPATH_EXTRA="`echo $with_rpath | tr \":\" \" \"`" -fi, - RPATH_EXTRA= -) -AC_SUBST([RPATH_EXTRA]) - -dnl Disable rules that do automatic regeneration of configure output by default. -AC_ARG_ENABLE([autogen], - [AS_HELP_STRING([--enable-autogen], [Automatically regenerate configure output])], -if test "x$enable_autogen" = "xno" ; then - enable_autogen="0" -else - enable_autogen="1" -fi -, -enable_autogen="0" -) -AC_SUBST([enable_autogen]) - -AC_PROG_INSTALL -AC_PROG_RANLIB -AC_PATH_PROG([AR], [ar], [false], [$PATH]) -AC_PATH_PROG([LD], [ld], [false], [$PATH]) -AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH]) - -public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" - -dnl Check for allocator-related functions that should be wrapped. -AC_CHECK_FUNC([memalign], - [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ]) - public_syms="${public_syms} memalign"]) -AC_CHECK_FUNC([valloc], - [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ]) - public_syms="${public_syms} valloc"]) - -dnl Support the experimental API by default. -AC_ARG_ENABLE([experimental], - [AS_HELP_STRING([--disable-experimental], - [Disable support for the experimental API])], -[if test "x$enable_experimental" = "xno" ; then - enable_experimental="0" -else - enable_experimental="1" -fi -], -[enable_experimental="1"] -) -if test "x$enable_experimental" = "x1" ; then - AC_DEFINE([JEMALLOC_EXPERIMENTAL], [ ]) - public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm" -fi -AC_SUBST([enable_experimental]) - -dnl Perform no name mangling by default. -AC_ARG_WITH([mangling], - [AS_HELP_STRING([--with-mangling=], [Mangle symbols in ])], - [mangling_map="$with_mangling"], [mangling_map=""]) -for nm in `echo ${mangling_map} |tr ',' ' '` ; do - k="`echo ${nm} |tr ':' ' ' |awk '{print $1}'`" - n="je_${k}" - m=`echo ${nm} |tr ':' ' ' |awk '{print $2}'` - AC_DEFINE_UNQUOTED([${n}], [${m}]) - dnl Remove key from public_syms so that it isn't redefined later. - public_syms=`for sym in ${public_syms}; do echo "${sym}"; done |grep -v "^${k}\$" |tr '\n' ' '` -done - -dnl Do not prefix public APIs by default. -AC_ARG_WITH([jemalloc_prefix], - [AS_HELP_STRING([--with-jemalloc-prefix=], [Prefix to prepend to all public APIs])], - [JEMALLOC_PREFIX="$with_jemalloc_prefix"], - [if test "x$abi" != "xmacho" -a "x$abi" != "xpecoff"; then - JEMALLOC_PREFIX="" -else - JEMALLOC_PREFIX="je_" -fi] -) -if test "x$JEMALLOC_PREFIX" != "x" ; then - JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"` - AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"]) - AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"]) -fi -dnl Generate macros to rename public symbols. All public symbols are prefixed -dnl with je_ in the source code, so these macro definitions are needed even if -dnl --with-jemalloc-prefix wasn't specified. -for stem in ${public_syms}; do - n="je_${stem}" - m="${JEMALLOC_PREFIX}${stem}" - AC_DEFINE_UNQUOTED([${n}], [${m}]) -done - -AC_ARG_WITH([export], - [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])], - [if test "x$with_export" = "xno"; then - AC_DEFINE([JEMALLOC_EXPORT],[]) -fi] -) - -dnl Do not mangle library-private APIs by default. -AC_ARG_WITH([private_namespace], - [AS_HELP_STRING([--with-private-namespace=], [Prefix to prepend to all library-private APIs])], - [JEMALLOC_PRIVATE_NAMESPACE="$with_private_namespace"], - [JEMALLOC_PRIVATE_NAMESPACE=""] -) -AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], ["$JEMALLOC_PRIVATE_NAMESPACE"]) -if test "x$JEMALLOC_PRIVATE_NAMESPACE" != "x" ; then - AC_DEFINE_UNQUOTED([JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix)], [${JEMALLOC_PRIVATE_NAMESPACE}##string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix]) -else - AC_DEFINE_UNQUOTED([JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix)], [string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix]) -fi - -dnl Do not add suffix to installed files by default. -AC_ARG_WITH([install_suffix], - [AS_HELP_STRING([--with-install-suffix=], [Suffix to append to all installed files])], - [INSTALL_SUFFIX="$with_install_suffix"], - [INSTALL_SUFFIX=] -) -install_suffix="$INSTALL_SUFFIX" -AC_SUBST([install_suffix]) - -cfgoutputs_in="${srcroot}Makefile.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/jemalloc_test.h.in" - -cfgoutputs_out="Makefile" -cfgoutputs_out="${cfgoutputs_out} doc/html.xsl" -cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl" -cfgoutputs_out="${cfgoutputs_out} doc/jemalloc${install_suffix}.xml" -cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc${install_suffix}.h" -cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h" -cfgoutputs_out="${cfgoutputs_out} test/jemalloc_test.h" - -cfgoutputs_tup="Makefile" -cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in" -cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in" -cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc${install_suffix}.xml:doc/jemalloc.xml.in" -cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc${install_suffix}.h:include/jemalloc/jemalloc.h.in" -cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" -cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in" - -cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in" -cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh" - -cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h" -cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h" - -cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in" - -dnl Do not silence irrelevant compiler warnings by default, since enabling this -dnl option incurs a performance penalty. -AC_ARG_ENABLE([cc-silence], - [AS_HELP_STRING([--enable-cc-silence], - [Silence irrelevant compiler warnings])], -[if test "x$enable_cc_silence" = "xno" ; then - enable_cc_silence="0" -else - enable_cc_silence="1" -fi -], -[enable_cc_silence="0"] -) -if test "x$enable_cc_silence" = "x1" ; then - AC_DEFINE([JEMALLOC_CC_SILENCE], [ ]) -fi - -dnl Do not compile with debugging by default. -AC_ARG_ENABLE([debug], - [AS_HELP_STRING([--enable-debug], [Build debugging code (implies --enable-ivsalloc)])], -[if test "x$enable_debug" = "xno" ; then - enable_debug="0" -else - enable_debug="1" -fi -], -[enable_debug="0"] -) -if test "x$enable_debug" = "x1" ; then - AC_DEFINE([JEMALLOC_DEBUG], [ ]) - enable_ivsalloc="1" -fi -AC_SUBST([enable_debug]) - -dnl Do not validate pointers by default. -AC_ARG_ENABLE([ivsalloc], - [AS_HELP_STRING([--enable-ivsalloc], [Validate pointers passed through the public API])], -[if test "x$enable_ivsalloc" = "xno" ; then - enable_ivsalloc="0" -else - enable_ivsalloc="1" -fi -], -[enable_ivsalloc="0"] -) -if test "x$enable_ivsalloc" = "x1" ; then - AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) -fi - -dnl Only optimize if not debugging. -if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then - dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS. - optimize="no" - echo "$EXTRA_CFLAGS" | grep "\-O" >/dev/null || optimize="yes" - if test "x${optimize}" = "xyes" ; then - if test "x$GCC" = "xyes" ; then - JE_CFLAGS_APPEND([-O3]) - JE_CFLAGS_APPEND([-funroll-loops]) - elif test "x$je_cv_msvc" = "xyes" ; then - JE_CFLAGS_APPEND([-O2]) - else - JE_CFLAGS_APPEND([-O]) - fi - fi -fi - -dnl Enable statistics calculation by default. -AC_ARG_ENABLE([stats], - [AS_HELP_STRING([--disable-stats], - [Disable statistics calculation/reporting])], -[if test "x$enable_stats" = "xno" ; then - enable_stats="0" -else - enable_stats="1" -fi -], -[enable_stats="1"] -) -if test "x$enable_stats" = "x1" ; then - AC_DEFINE([JEMALLOC_STATS], [ ]) -fi -AC_SUBST([enable_stats]) - -dnl Do not enable profiling by default. -AC_ARG_ENABLE([prof], - [AS_HELP_STRING([--enable-prof], [Enable allocation profiling])], -[if test "x$enable_prof" = "xno" ; then - enable_prof="0" -else - enable_prof="1" -fi -], -[enable_prof="0"] -) -if test "x$enable_prof" = "x1" ; then - backtrace_method="" -else - backtrace_method="N/A" -fi - -AC_ARG_ENABLE([prof-libunwind], - [AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])], -[if test "x$enable_prof_libunwind" = "xno" ; then - enable_prof_libunwind="0" -else - enable_prof_libunwind="1" -fi -], -[enable_prof_libunwind="0"] -) -AC_ARG_WITH([static_libunwind], - [AS_HELP_STRING([--with-static-libunwind=], - [Path to static libunwind library; use rather than dynamically linking])], -if test "x$with_static_libunwind" = "xno" ; then - LUNWIND="-lunwind" -else - if test ! -f "$with_static_libunwind" ; then - AC_MSG_ERROR([Static libunwind not found: $with_static_libunwind]) - fi - LUNWIND="$with_static_libunwind" -fi, - LUNWIND="-lunwind" -) -if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then - AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"]) - if test "x$LUNWIND" = "x-lunwind" ; then - AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"], - [enable_prof_libunwind="0"]) - else - LIBS="$LIBS $LUNWIND" - fi - if test "x${enable_prof_libunwind}" = "x1" ; then - backtrace_method="libunwind" - AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ]) - fi -fi - -AC_ARG_ENABLE([prof-libgcc], - [AS_HELP_STRING([--disable-prof-libgcc], - [Do not use libgcc for backtracing])], -[if test "x$enable_prof_libgcc" = "xno" ; then - enable_prof_libgcc="0" -else - enable_prof_libgcc="1" -fi -], -[enable_prof_libgcc="1"] -) -if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \ - -a "x$GCC" = "xyes" ; then - AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"]) - AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"]) - dnl The following is conservative, in that it only has entries for CPUs on - dnl which jemalloc has been tested. - AC_MSG_CHECKING([libgcc-based backtracing reliability on ${host_cpu}]) - case "${host_cpu}" in - i[[3456]]86) - AC_MSG_RESULT([unreliable]) - enable_prof_libgcc="0"; - ;; - x86_64) - AC_MSG_RESULT([reliable]) - ;; - *) - AC_MSG_RESULT([unreliable]) - enable_prof_libgcc="0"; - ;; - esac - if test "x${enable_prof_libgcc}" = "x1" ; then - backtrace_method="libgcc" - AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ]) - fi -else - enable_prof_libgcc="0" -fi - -AC_ARG_ENABLE([prof-gcc], - [AS_HELP_STRING([--disable-prof-gcc], - [Do not use gcc intrinsics for backtracing])], -[if test "x$enable_prof_gcc" = "xno" ; then - enable_prof_gcc="0" -else - enable_prof_gcc="1" -fi -], -[enable_prof_gcc="1"] -) -if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \ - -a "x$GCC" = "xyes" ; then - backtrace_method="gcc intrinsics" - AC_DEFINE([JEMALLOC_PROF_GCC], [ ]) -else - enable_prof_gcc="0" -fi - -if test "x$backtrace_method" = "x" ; then - backtrace_method="none (disabling profiling)" - enable_prof="0" -fi -AC_MSG_CHECKING([configured backtracing method]) -AC_MSG_RESULT([$backtrace_method]) -if test "x$enable_prof" = "x1" ; then - if test "x${force_tls}" = "x0" ; then - AC_MSG_ERROR([Heap profiling requires TLS]); - fi - force_tls="1" - AC_DEFINE([JEMALLOC_PROF], [ ]) -fi -AC_SUBST([enable_prof]) - -dnl Enable thread-specific caching by default. -AC_ARG_ENABLE([tcache], - [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])], -[if test "x$enable_tcache" = "xno" ; then - enable_tcache="0" -else - enable_tcache="1" -fi -], -[enable_tcache="1"] -) -if test "x$enable_tcache" = "x1" ; then - AC_DEFINE([JEMALLOC_TCACHE], [ ]) -fi -AC_SUBST([enable_tcache]) - -dnl Disable mremap() for huge realloc() by default. -AC_ARG_ENABLE([mremap], - [AS_HELP_STRING([--enable-mremap], [Enable mremap(2) for huge realloc()])], -[if test "x$enable_mremap" = "xno" ; then - enable_mremap="0" -else - enable_mremap="1" -fi -], -[enable_mremap="0"] -) -if test "x$enable_mremap" = "x1" ; then - JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ -#define _GNU_SOURCE -#include -], [ -void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); -], [je_cv_mremap_fixed]) - if test "x${je_cv_mremap_fixed}" = "xno" ; then - enable_mremap="0" - fi -fi -if test "x$enable_mremap" = "x1" ; then - AC_DEFINE([JEMALLOC_MREMAP], [ ]) -fi -AC_SUBST([enable_mremap]) - -dnl Enable VM deallocation via munmap() by default. -AC_ARG_ENABLE([munmap], - [AS_HELP_STRING([--disable-munmap], [Disable VM deallocation via munmap(2)])], -[if test "x$enable_munmap" = "xno" ; then - enable_munmap="0" -else - enable_munmap="1" -fi -], -[enable_munmap="${default_munmap}"] -) -if test "x$enable_munmap" = "x1" ; then - AC_DEFINE([JEMALLOC_MUNMAP], [ ]) -fi -AC_SUBST([enable_munmap]) - -dnl Do not enable allocation from DSS by default. -AC_ARG_ENABLE([dss], - [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])], -[if test "x$enable_dss" = "xno" ; then - enable_dss="0" -else - enable_dss="1" -fi -], -[enable_dss="0"] -) -dnl Check whether the BSD/SUSv1 sbrk() exists. If not, disable DSS support. -AC_CHECK_FUNC([sbrk], [have_sbrk="1"], [have_sbrk="0"]) -if test "x$have_sbrk" = "x1" ; then - AC_DEFINE([JEMALLOC_HAVE_SBRK], [ ]) -else - enable_dss="0" -fi - -if test "x$enable_dss" = "x1" ; then - AC_DEFINE([JEMALLOC_DSS], [ ]) -fi -AC_SUBST([enable_dss]) - -dnl Support the junk/zero filling option by default. -AC_ARG_ENABLE([fill], - [AS_HELP_STRING([--disable-fill], - [Disable support for junk/zero filling, quarantine, and redzones])], -[if test "x$enable_fill" = "xno" ; then - enable_fill="0" -else - enable_fill="1" -fi -], -[enable_fill="1"] -) -if test "x$enable_fill" = "x1" ; then - AC_DEFINE([JEMALLOC_FILL], [ ]) -fi -AC_SUBST([enable_fill]) - -dnl Disable utrace(2)-based tracing by default. -AC_ARG_ENABLE([utrace], - [AS_HELP_STRING([--enable-utrace], [Enable utrace(2)-based tracing])], -[if test "x$enable_utrace" = "xno" ; then - enable_utrace="0" -else - enable_utrace="1" -fi -], -[enable_utrace="0"] -) -JE_COMPILABLE([utrace(2)], [ -#include -#include -#include -#include -#include -], [ - utrace((void *)0, 0); -], [je_cv_utrace]) -if test "x${je_cv_utrace}" = "xno" ; then - enable_utrace="0" -fi -if test "x$enable_utrace" = "x1" ; then - AC_DEFINE([JEMALLOC_UTRACE], [ ]) -fi -AC_SUBST([enable_utrace]) - -dnl Support Valgrind by default. -AC_ARG_ENABLE([valgrind], - [AS_HELP_STRING([--disable-valgrind], [Disable support for Valgrind])], -[if test "x$enable_valgrind" = "xno" ; then - enable_valgrind="0" -else - enable_valgrind="1" -fi -], -[enable_valgrind="1"] -) -if test "x$enable_valgrind" = "x1" ; then - JE_COMPILABLE([valgrind], [ -#include -#include - -#if !defined(VALGRIND_RESIZEINPLACE_BLOCK) -# error "Incompatible Valgrind version" -#endif -], [], [je_cv_valgrind]) - if test "x${je_cv_valgrind}" = "xno" ; then - enable_valgrind="0" - fi - if test "x$enable_valgrind" = "x1" ; then - AC_DEFINE([JEMALLOC_VALGRIND], [ ]) - fi -fi -AC_SUBST([enable_valgrind]) - -dnl Do not support the xmalloc option by default. -AC_ARG_ENABLE([xmalloc], - [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])], -[if test "x$enable_xmalloc" = "xno" ; then - enable_xmalloc="0" -else - enable_xmalloc="1" -fi -], -[enable_xmalloc="0"] -) -if test "x$enable_xmalloc" = "x1" ; then - AC_DEFINE([JEMALLOC_XMALLOC], [ ]) -fi -AC_SUBST([enable_xmalloc]) - -AC_CACHE_CHECK([STATIC_PAGE_SHIFT], - [je_cv_static_page_shift], - AC_RUN_IFELSE([AC_LANG_PROGRAM( -[[ -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include -]], -[[ - int result; - FILE *f; - -#ifdef _WIN32 - SYSTEM_INFO si; - GetSystemInfo(&si); - result = si.dwPageSize; -#else - result = sysconf(_SC_PAGESIZE); -#endif - if (result == -1) { - return 1; - } - result = ffsl(result) - 1; - - f = fopen("conftest.out", "w"); - if (f == NULL) { - return 1; - } - fprintf(f, "%d\n", result); - fclose(f); - - return 0; -]])], - [je_cv_static_page_shift=`cat conftest.out`], - [je_cv_static_page_shift=undefined])) - -if test "x$je_cv_static_page_shift" != "xundefined"; then - AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$je_cv_static_page_shift]) -else - AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT]) -fi - -dnl ============================================================================ -dnl jemalloc configuration. -dnl - -dnl Set VERSION if source directory has an embedded git repository. -if test -d "${srcroot}.git" ; then - git describe --long --abbrev=40 > ${srcroot}VERSION -fi -jemalloc_version=`cat ${srcroot}VERSION` -jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'` -jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'` -jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'` -jemalloc_version_nrev=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]4}'` -jemalloc_version_gid=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]5}'` -AC_SUBST([jemalloc_version]) -AC_SUBST([jemalloc_version_major]) -AC_SUBST([jemalloc_version_minor]) -AC_SUBST([jemalloc_version_bugfix]) -AC_SUBST([jemalloc_version_nrev]) -AC_SUBST([jemalloc_version_gid]) - -dnl ============================================================================ -dnl Configure pthreads. - -if test "x$abi" != "xpecoff" ; then - AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) - dnl Some systems may embed pthreads functionality in libc; check for libpthread - dnl first, but try libc too before failing. - AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], - [AC_SEARCH_LIBS([pthread_create], , , - AC_MSG_ERROR([libpthread is missing]))]) -fi - -CPPFLAGS="$CPPFLAGS -D_REENTRANT" - -dnl Check whether the BSD-specific _malloc_thread_cleanup() exists. If so, use -dnl it rather than pthreads TSD cleanup functions to support cleanup during -dnl thread exit, in order to avoid pthreads library recursion during -dnl bootstrapping. -AC_CHECK_FUNC([_malloc_thread_cleanup], - [have__malloc_thread_cleanup="1"], - [have__malloc_thread_cleanup="0"] - ) -if test "x$have__malloc_thread_cleanup" = "x1" ; then - AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ]) - force_tls="1" -fi - -dnl Check whether the BSD-specific _pthread_mutex_init_calloc_cb() exists. If -dnl so, mutex initialization causes allocation, and we need to implement this -dnl callback function in order to prevent recursive allocation. -AC_CHECK_FUNC([_pthread_mutex_init_calloc_cb], - [have__pthread_mutex_init_calloc_cb="1"], - [have__pthread_mutex_init_calloc_cb="0"] - ) -if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then - AC_DEFINE([JEMALLOC_MUTEX_INIT_CB]) -fi - -dnl Disable lazy locking by default. -AC_ARG_ENABLE([lazy_lock], - [AS_HELP_STRING([--enable-lazy-lock], - [Enable lazy locking (only lock when multi-threaded)])], -[if test "x$enable_lazy_lock" = "xno" ; then - enable_lazy_lock="0" -else - enable_lazy_lock="1" -fi -], -[enable_lazy_lock="0"] -) -if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then - AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues]) - enable_lazy_lock="1" -fi -if test "x$enable_lazy_lock" = "x1" ; then - if test "x$abi" != "xpecoff" ; then - AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) - AC_CHECK_FUNC([dlsym], [], - [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], - [AC_MSG_ERROR([libdl is missing])]) - ]) - fi - AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) -fi -AC_SUBST([enable_lazy_lock]) - -AC_ARG_ENABLE([tls], - [AS_HELP_STRING([--disable-tls], [Disable thread-local storage (__thread keyword)])], -if test "x$enable_tls" = "xno" ; then - enable_tls="0" -else - enable_tls="1" -fi -, -enable_tls="1" -) -if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then - AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues]) - enable_tls="1" -fi -if test "x${enable_tls}" = "x1" -a "x${force_tls}" = "x0" ; then - AC_MSG_RESULT([Forcing no TLS to avoid allocator/threading bootstrap issues]) - enable_tls="0" -fi -if test "x${enable_tls}" = "x1" ; then -AC_MSG_CHECKING([for TLS]) -AC_COMPILE_IFELSE([AC_LANG_PROGRAM( -[[ - __thread int x; -]], [[ - x = 42; - - return 0; -]])], - AC_MSG_RESULT([yes]), - AC_MSG_RESULT([no]) - enable_tls="0") -fi -AC_SUBST([enable_tls]) -if test "x${enable_tls}" = "x1" ; then - AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ]) -elif test "x${force_tls}" = "x1" ; then - AC_MSG_ERROR([Failed to configure TLS, which is mandatory for correct function]) -fi - -dnl ============================================================================ -dnl Check for ffsl(3), and fail if not found. This function exists on all -dnl platforms that jemalloc currently has a chance of functioning on without -dnl modification. -JE_COMPILABLE([a program using ffsl], [ -#include -#include -#include -], [ - { - int rv = ffsl(0x08); - printf("%d\n", rv); - } -], [je_cv_function_ffsl]) -if test "x${je_cv_function_ffsl}" != "xyes" ; then - AC_MSG_ERROR([Cannot build without ffsl(3)]) -fi - -dnl ============================================================================ -dnl Check for atomic(9) operations as provided on FreeBSD. - -JE_COMPILABLE([atomic(9)], [ -#include -#include -#include -], [ - { - uint32_t x32 = 0; - volatile uint32_t *x32p = &x32; - atomic_fetchadd_32(x32p, 1); - } - { - unsigned long xlong = 0; - volatile unsigned long *xlongp = &xlong; - atomic_fetchadd_long(xlongp, 1); - } -], [je_cv_atomic9]) -if test "x${je_cv_atomic9}" = "xyes" ; then - AC_DEFINE([JEMALLOC_ATOMIC9]) -fi - -dnl ============================================================================ -dnl Check for atomic(3) operations as provided on Darwin. - -JE_COMPILABLE([Darwin OSAtomic*()], [ -#include -#include -], [ - { - int32_t x32 = 0; - volatile int32_t *x32p = &x32; - OSAtomicAdd32(1, x32p); - } - { - int64_t x64 = 0; - volatile int64_t *x64p = &x64; - OSAtomicAdd64(1, x64p); - } -], [je_cv_osatomic]) -if test "x${je_cv_osatomic}" = "xyes" ; then - AC_DEFINE([JEMALLOC_OSATOMIC], [ ]) -fi - -dnl ============================================================================ -dnl Check whether __sync_{add,sub}_and_fetch() are available despite -dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined. - -AC_DEFUN([JE_SYNC_COMPARE_AND_SWAP_CHECK],[ - AC_CACHE_CHECK([whether to force $1-bit __sync_{add,sub}_and_fetch()], - [je_cv_sync_compare_and_swap_$2], - [AC_LINK_IFELSE([AC_LANG_PROGRAM([ - #include - ], - [ - #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 - { - uint$1_t x$1 = 0; - __sync_add_and_fetch(&x$1, 42); - __sync_sub_and_fetch(&x$1, 1); - } - #else - #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 is defined, no need to force - #endif - ])], - [je_cv_sync_compare_and_swap_$2=yes], - [je_cv_sync_compare_and_swap_$2=no])]) - - if test "x${je_cv_sync_compare_and_swap_$2}" = "xyes" ; then - AC_DEFINE([JE_FORCE_SYNC_COMPARE_AND_SWAP_$2], [ ]) - fi -]) - -if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then - JE_SYNC_COMPARE_AND_SWAP_CHECK(32, 4) - JE_SYNC_COMPARE_AND_SWAP_CHECK(64, 8) -fi - -dnl ============================================================================ -dnl Check for spinlock(3) operations as provided on Darwin. - -JE_COMPILABLE([Darwin OSSpin*()], [ -#include -#include -], [ - OSSpinLock lock = 0; - OSSpinLockLock(&lock); - OSSpinLockUnlock(&lock); -], [je_cv_osspin]) -if test "x${je_cv_osspin}" = "xyes" ; then - AC_DEFINE([JEMALLOC_OSSPIN], [ ]) -fi - -dnl ============================================================================ -dnl Darwin-related configuration. - -AC_ARG_ENABLE([zone-allocator], - [AS_HELP_STRING([--disable-zone-allocator], - [Disable zone allocator for Darwin])], -[if test "x$enable_zone_allocator" = "xno" ; then - enable_zone_allocator="0" -else - enable_zone_allocator="1" -fi -], -[if test "x${abi}" = "xmacho"; then - enable_zone_allocator="1" -fi -] -) -AC_SUBST([enable_zone_allocator]) - -if test "x${enable_zone_allocator}" = "x1" ; then - if test "x${abi}" != "xmacho"; then - AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin]) - fi - AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) - AC_DEFINE([JEMALLOC_ZONE], [ ]) - - dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6 - dnl releases. malloc_zone_t and malloc_introspection_t have new fields in - dnl 10.6, which is the only source-level indication of the change. - AC_MSG_CHECKING([malloc zone version]) - AC_DEFUN([JE_ZONE_PROGRAM], - [AC_LANG_PROGRAM( - [#include ], - [static foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]] - )]) - - AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,14)],[JEMALLOC_ZONE_VERSION=3],[ - AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,15)],[JEMALLOC_ZONE_VERSION=5],[ - AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,16)],[ - AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,9)],[JEMALLOC_ZONE_VERSION=6],[ - AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,13)],[JEMALLOC_ZONE_VERSION=7],[JEMALLOC_ZONE_VERSION=] - )])],[ - AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,17)],[JEMALLOC_ZONE_VERSION=8],[ - AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,>,17)],[JEMALLOC_ZONE_VERSION=9],[JEMALLOC_ZONE_VERSION=] - )])])])]) - if test "x${JEMALLOC_ZONE_VERSION}" = "x"; then - AC_MSG_RESULT([unsupported]) - AC_MSG_ERROR([Unsupported malloc zone version]) - fi - if test "${JEMALLOC_ZONE_VERSION}" = 9; then - JEMALLOC_ZONE_VERSION=8 - AC_MSG_RESULT([> 8]) - else - AC_MSG_RESULT([$JEMALLOC_ZONE_VERSION]) - fi - AC_DEFINE_UNQUOTED(JEMALLOC_ZONE_VERSION, [$JEMALLOC_ZONE_VERSION]) -fi - -dnl ============================================================================ -dnl Check for typedefs, structures, and compiler characteristics. -AC_HEADER_STDBOOL - -AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [ - mkdir -p "include/jemalloc/internal" - "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h" -]) - -dnl Process .in files. -AC_SUBST([cfghdrs_in]) -AC_SUBST([cfghdrs_out]) -AC_CONFIG_HEADERS([$cfghdrs_tup]) - -dnl ============================================================================ -dnl Generate outputs. -AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc.sh]) -AC_SUBST([cfgoutputs_in]) -AC_SUBST([cfgoutputs_out]) -AC_OUTPUT - -dnl ============================================================================ -dnl Print out the results of configuration. -AC_MSG_RESULT([===============================================================================]) -AC_MSG_RESULT([jemalloc version : ${jemalloc_version}]) -AC_MSG_RESULT([library revision : ${rev}]) -AC_MSG_RESULT([]) -AC_MSG_RESULT([CC : ${CC}]) -AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) -AC_MSG_RESULT([CFLAGS : ${CFLAGS}]) -AC_MSG_RESULT([LDFLAGS : ${LDFLAGS}]) -AC_MSG_RESULT([LIBS : ${LIBS}]) -AC_MSG_RESULT([RPATH_EXTRA : ${RPATH_EXTRA}]) -AC_MSG_RESULT([]) -AC_MSG_RESULT([XSLTPROC : ${XSLTPROC}]) -AC_MSG_RESULT([XSLROOT : ${XSLROOT}]) -AC_MSG_RESULT([]) -AC_MSG_RESULT([PREFIX : ${PREFIX}]) -AC_MSG_RESULT([BINDIR : ${BINDIR}]) -AC_MSG_RESULT([INCLUDEDIR : ${INCLUDEDIR}]) -AC_MSG_RESULT([LIBDIR : ${LIBDIR}]) -AC_MSG_RESULT([DATADIR : ${DATADIR}]) -AC_MSG_RESULT([MANDIR : ${MANDIR}]) -AC_MSG_RESULT([]) -AC_MSG_RESULT([srcroot : ${srcroot}]) -AC_MSG_RESULT([abs_srcroot : ${abs_srcroot}]) -AC_MSG_RESULT([objroot : ${objroot}]) -AC_MSG_RESULT([abs_objroot : ${abs_objroot}]) -AC_MSG_RESULT([]) -AC_MSG_RESULT([JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}]) -AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE]) -AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}]) -AC_MSG_RESULT([install_suffix : ${install_suffix}]) -AC_MSG_RESULT([autogen : ${enable_autogen}]) -AC_MSG_RESULT([experimental : ${enable_experimental}]) -AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) -AC_MSG_RESULT([debug : ${enable_debug}]) -AC_MSG_RESULT([stats : ${enable_stats}]) -AC_MSG_RESULT([prof : ${enable_prof}]) -AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}]) -AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) -AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) -AC_MSG_RESULT([tcache : ${enable_tcache}]) -AC_MSG_RESULT([fill : ${enable_fill}]) -AC_MSG_RESULT([utrace : ${enable_utrace}]) -AC_MSG_RESULT([valgrind : ${enable_valgrind}]) -AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) -AC_MSG_RESULT([mremap : ${enable_mremap}]) -AC_MSG_RESULT([munmap : ${enable_munmap}]) -AC_MSG_RESULT([dss : ${enable_dss}]) -AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) -AC_MSG_RESULT([tls : ${enable_tls}]) -AC_MSG_RESULT([===============================================================================]) diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/COPYING mariadb-5.5-5.5.40/extra/jemalloc/COPYING --- mariadb-5.5-5.5.39/extra/jemalloc/COPYING 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/COPYING 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ -Unless otherwise specified, files in the jemalloc source distribution are -subject to the following license: --------------------------------------------------------------------------------- -Copyright (C) 2002-2013 Jason Evans . -All rights reserved. -Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice(s), - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice(s), - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------- diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/doc/html.xsl.in mariadb-5.5-5.5.40/extra/jemalloc/doc/html.xsl.in --- mariadb-5.5-5.5.39/extra/jemalloc/doc/html.xsl.in 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/doc/html.xsl.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ - - - - diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/doc/jemalloc.3 mariadb-5.5-5.5.40/extra/jemalloc/doc/jemalloc.3 --- mariadb-5.5-5.5.39/extra/jemalloc/doc/jemalloc.3 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/doc/jemalloc.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,1482 +0,0 @@ -'\" t -.\" Title: JEMALLOC -.\" Author: Jason Evans -.\" Generator: DocBook XSL Stylesheets v1.76.1 -.\" Date: 03/06/2013 -.\" Manual: User Manual -.\" Source: jemalloc 3.3.1-0-g9ef9d9e8c271cdf14f664b871a8f98c827714784 -.\" Language: English -.\" -.TH "JEMALLOC" "3" "03/06/2013" "jemalloc 3.3.1-0-g9ef9d9e8c271" "User Manual" -.\" ----------------------------------------------------------------- -.\" * Define some portability stuff -.\" ----------------------------------------------------------------- -.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.\" http://bugs.debian.org/507673 -.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html -.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.ie \n(.g .ds Aq \(aq -.el .ds Aq ' -.\" ----------------------------------------------------------------- -.\" * set default formatting -.\" ----------------------------------------------------------------- -.\" disable hyphenation -.nh -.\" disable justification (adjust text to left margin only) -.ad l -.\" ----------------------------------------------------------------- -.\" * MAIN CONTENT STARTS HERE * -.\" ----------------------------------------------------------------- -.SH "NAME" -jemalloc \- general purpose memory allocation functions -.SH "LIBRARY" -.PP -This manual describes jemalloc 3\&.3\&.1\-0\-g9ef9d9e8c271cdf14f664b871a8f98c827714784\&. More information can be found at the -\m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&. -.SH "SYNOPSIS" -.sp -.ft B -.nf -#include -#include -.fi -.ft -.SS "Standard API" -.HP \w'void\ *malloc('u -.BI "void *malloc(size_t\ " "size" ");" -.HP \w'void\ *calloc('u -.BI "void *calloc(size_t\ " "number" ", size_t\ " "size" ");" -.HP \w'int\ posix_memalign('u -.BI "int posix_memalign(void\ **" "ptr" ", size_t\ " "alignment" ", size_t\ " "size" ");" -.HP \w'void\ *aligned_alloc('u -.BI "void *aligned_alloc(size_t\ " "alignment" ", size_t\ " "size" ");" -.HP \w'void\ *realloc('u -.BI "void *realloc(void\ *" "ptr" ", size_t\ " "size" ");" -.HP \w'void\ free('u -.BI "void free(void\ *" "ptr" ");" -.SS "Non\-standard API" -.HP \w'size_t\ malloc_usable_size('u -.BI "size_t malloc_usable_size(const\ void\ *" "ptr" ");" -.HP \w'void\ malloc_stats_print('u -.BI "void malloc_stats_print(void\ " "(*write_cb)" "\ (void\ *,\ const\ char\ *), void\ *" "cbopaque" ", const\ char\ *" "opts" ");" -.HP \w'int\ mallctl('u -.BI "int mallctl(const\ char\ *" "name" ", void\ *" "oldp" ", size_t\ *" "oldlenp" ", void\ *" "newp" ", size_t\ " "newlen" ");" -.HP \w'int\ mallctlnametomib('u -.BI "int mallctlnametomib(const\ char\ *" "name" ", size_t\ *" "mibp" ", size_t\ *" "miblenp" ");" -.HP \w'int\ mallctlbymib('u -.BI "int mallctlbymib(const\ size_t\ *" "mib" ", size_t\ " "miblen" ", void\ *" "oldp" ", size_t\ *" "oldlenp" ", void\ *" "newp" ", size_t\ " "newlen" ");" -.HP \w'void\ (*malloc_message)('u -.BI "void (*malloc_message)(void\ *" "cbopaque" ", const\ char\ *" "s" ");" -.PP -const char *\fImalloc_conf\fR; -.SS "Experimental API" -.HP \w'int\ allocm('u -.BI "int allocm(void\ **" "ptr" ", size_t\ *" "rsize" ", size_t\ " "size" ", int\ " "flags" ");" -.HP \w'int\ rallocm('u -.BI "int rallocm(void\ **" "ptr" ", size_t\ *" "rsize" ", size_t\ " "size" ", size_t\ " "extra" ", int\ " "flags" ");" -.HP \w'int\ sallocm('u -.BI "int sallocm(const\ void\ *" "ptr" ", size_t\ *" "rsize" ", int\ " "flags" ");" -.HP \w'int\ dallocm('u -.BI "int dallocm(void\ *" "ptr" ", int\ " "flags" ");" -.HP \w'int\ nallocm('u -.BI "int nallocm(size_t\ *" "rsize" ", size_t\ " "size" ", int\ " "flags" ");" -.SH "DESCRIPTION" -.SS "Standard API" -.PP -The -\fBmalloc\fR\fB\fR -function allocates -\fIsize\fR -bytes of uninitialized memory\&. The allocated space is suitably aligned (after possible pointer coercion) for storage of any type of object\&. -.PP -The -\fBcalloc\fR\fB\fR -function allocates space for -\fInumber\fR -objects, each -\fIsize\fR -bytes in length\&. The result is identical to calling -\fBmalloc\fR\fB\fR -with an argument of -\fInumber\fR -* -\fIsize\fR, with the exception that the allocated memory is explicitly initialized to zero bytes\&. -.PP -The -\fBposix_memalign\fR\fB\fR -function allocates -\fIsize\fR -bytes of memory such that the allocation\*(Aqs base address is an even multiple of -\fIalignment\fR, and returns the allocation in the value pointed to by -\fIptr\fR\&. The requested -\fIalignment\fR -must be a power of 2 at least as large as -sizeof(\fBvoid *\fR)\&. -.PP -The -\fBaligned_alloc\fR\fB\fR -function allocates -\fIsize\fR -bytes of memory such that the allocation\*(Aqs base address is an even multiple of -\fIalignment\fR\&. The requested -\fIalignment\fR -must be a power of 2\&. Behavior is undefined if -\fIsize\fR -is not an integral multiple of -\fIalignment\fR\&. -.PP -The -\fBrealloc\fR\fB\fR -function changes the size of the previously allocated memory referenced by -\fIptr\fR -to -\fIsize\fR -bytes\&. The contents of the memory are unchanged up to the lesser of the new and old sizes\&. If the new size is larger, the contents of the newly allocated portion of the memory are undefined\&. Upon success, the memory referenced by -\fIptr\fR -is freed and a pointer to the newly allocated memory is returned\&. Note that -\fBrealloc\fR\fB\fR -may move the memory allocation, resulting in a different return value than -\fIptr\fR\&. If -\fIptr\fR -is -\fBNULL\fR, the -\fBrealloc\fR\fB\fR -function behaves identically to -\fBmalloc\fR\fB\fR -for the specified size\&. -.PP -The -\fBfree\fR\fB\fR -function causes the allocated memory referenced by -\fIptr\fR -to be made available for future allocations\&. If -\fIptr\fR -is -\fBNULL\fR, no action occurs\&. -.SS "Non\-standard API" -.PP -The -\fBmalloc_usable_size\fR\fB\fR -function returns the usable size of the allocation pointed to by -\fIptr\fR\&. The return value may be larger than the size that was requested during allocation\&. The -\fBmalloc_usable_size\fR\fB\fR -function is not a mechanism for in\-place -\fBrealloc\fR\fB\fR; rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by -\fBmalloc_usable_size\fR\fB\fR -should not be depended on, since such behavior is entirely implementation\-dependent\&. -.PP -The -\fBmalloc_stats_print\fR\fB\fR -function writes human\-readable summary statistics via the -\fIwrite_cb\fR -callback function pointer and -\fIcbopaque\fR -data passed to -\fIwrite_cb\fR, or -\fBmalloc_message\fR\fB\fR -if -\fIwrite_cb\fR -is -\fBNULL\fR\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying "g" as a character within the -\fIopts\fR -string\&. Note that -\fBmalloc_message\fR\fB\fR -uses the -\fBmallctl*\fR\fB\fR -functions internally, so inconsistent statistics can be reported if multiple threads use these functions simultaneously\&. If -\fB\-\-enable\-stats\fR -is specified during configuration, \(lqm\(rq and \(lqa\(rq can be specified to omit merged arena and per arena statistics, respectively; \(lqb\(rq and \(lql\(rq can be specified to omit per size class statistics for bins and large objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&. -.PP -The -\fBmallctl\fR\fB\fR -function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions\&. The period\-separated -\fIname\fR -argument specifies a location in a tree\-structured namespace; see the -MALLCTL NAMESPACE -section for documentation on the tree contents\&. To read a value, pass a pointer via -\fIoldp\fR -to adequate space to contain the value, and a pointer to its length via -\fIoldlenp\fR; otherwise pass -\fBNULL\fR -and -\fBNULL\fR\&. Similarly, to write a value, pass a pointer to the value via -\fInewp\fR, and its length via -\fInewlen\fR; otherwise pass -\fBNULL\fR -and -\fB0\fR\&. -.PP -The -\fBmallctlnametomib\fR\fB\fR -function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name to a \(lqManagement Information Base\(rq (MIB) that can be passed repeatedly to -\fBmallctlbymib\fR\fB\fR\&. Upon successful return from -\fBmallctlnametomib\fR\fB\fR, -\fImibp\fR -contains an array of -\fI*miblenp\fR -integers, where -\fI*miblenp\fR -is the lesser of the number of components in -\fIname\fR -and the input value of -\fI*miblenp\fR\&. Thus it is possible to pass a -\fI*miblenp\fR -that is smaller than the number of period\-separated name components, which results in a partial MIB that can be used as the basis for constructing a complete MIB\&. For name components that are integers (e\&.g\&. the 2 in -"arenas\&.bin\&.2\&.size"), the corresponding MIB component will always be that integer\&. Therefore, it is legitimate to construct code like the following: -.sp -.if n \{\ -.RS 4 -.\} -.nf -unsigned nbins, i; - -int mib[4]; -size_t len, miblen; - -len = sizeof(nbins); -mallctl("arenas\&.nbins", &nbins, &len, NULL, 0); - -miblen = 4; -mallnametomib("arenas\&.bin\&.0\&.size", mib, &miblen); -for (i = 0; i < nbins; i++) { - size_t bin_size; - - mib[2] = i; - len = sizeof(bin_size); - mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0); - /* Do something with bin_size\&.\&.\&. */ -} -.fi -.if n \{\ -.RE -.\} -.SS "Experimental API" -.PP -The experimental API is subject to change or removal without regard for backward compatibility\&. If -\fB\-\-disable\-experimental\fR -is specified during configuration, the experimental API is omitted\&. -.PP -The -\fBallocm\fR\fB\fR, -\fBrallocm\fR\fB\fR, -\fBsallocm\fR\fB\fR, -\fBdallocm\fR\fB\fR, and -\fBnallocm\fR\fB\fR -functions all have a -\fIflags\fR -argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following: -.PP -\fBALLOCM_LG_ALIGN(\fR\fB\fIla\fR\fR\fB) \fR -.RS 4 -Align the memory allocation to start at an address that is a multiple of -(1 << \fIla\fR)\&. This macro does not validate that -\fIla\fR -is within the valid range\&. -.RE -.PP -\fBALLOCM_ALIGN(\fR\fB\fIa\fR\fR\fB) \fR -.RS 4 -Align the memory allocation to start at an address that is a multiple of -\fIa\fR, where -\fIa\fR -is a power of two\&. This macro does not validate that -\fIa\fR -is a power of 2\&. -.RE -.PP -\fBALLOCM_ZERO\fR -.RS 4 -Initialize newly allocated memory to contain zero bytes\&. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes\&. If this option is absent, newly allocated memory is uninitialized\&. -.RE -.PP -\fBALLOCM_NO_MOVE\fR -.RS 4 -For reallocation, fail rather than moving the object\&. This constraint can apply to both growth and shrinkage\&. -.RE -.PP -\fBALLOCM_ARENA(\fR\fB\fIa\fR\fR\fB) \fR -.RS 4 -Use the arena specified by the index -\fIa\fR\&. This macro does not validate that -\fIa\fR -specifies an arena in the valid range\&. -.RE -.PP -The -\fBallocm\fR\fB\fR -function allocates at least -\fIsize\fR -bytes of memory, sets -\fI*ptr\fR -to the base address of the allocation, and sets -\fI*rsize\fR -to the real size of the allocation if -\fIrsize\fR -is not -\fBNULL\fR\&. Behavior is undefined if -\fIsize\fR -is -\fB0\fR\&. -.PP -The -\fBrallocm\fR\fB\fR -function resizes the allocation at -\fI*ptr\fR -to be at least -\fIsize\fR -bytes, sets -\fI*ptr\fR -to the base address of the allocation if it moved, and sets -\fI*rsize\fR -to the real size of the allocation if -\fIrsize\fR -is not -\fBNULL\fR\&. If -\fIextra\fR -is non\-zero, an attempt is made to resize the allocation to be at least -\fIsize\fR + \fIextra\fR) -bytes, though inability to allocate the extra byte(s) will not by itself result in failure\&. Behavior is undefined if -\fIsize\fR -is -\fB0\fR, or if -(\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&. -.PP -The -\fBsallocm\fR\fB\fR -function sets -\fI*rsize\fR -to the real size of the allocation\&. -.PP -The -\fBdallocm\fR\fB\fR -function causes the memory referenced by -\fIptr\fR -to be made available for future allocations\&. -.PP -The -\fBnallocm\fR\fB\fR -function allocates no memory, but it performs the same size computation as the -\fBallocm\fR\fB\fR -function, and if -\fIrsize\fR -is not -\fBNULL\fR -it sets -\fI*rsize\fR -to the real size of the allocation that would result from the equivalent -\fBallocm\fR\fB\fR -function call\&. Behavior is undefined if -\fIsize\fR -is -\fB0\fR\&. -.SH "TUNING" -.PP -Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various options that can be specified at compile\- or run\-time\&. -.PP -The string pointed to by the global variable -\fImalloc_conf\fR, the \(lqname\(rq of the file referenced by the symbolic link named -/etc/malloc\&.conf, and the value of the environment variable -\fBMALLOC_CONF\fR, will be interpreted, in that order, from left to right as options\&. -.PP -An options string is a comma\-separated list of option:value pairs\&. There is one key corresponding to each -"opt\&.*" -mallctl (see the -MALLCTL NAMESPACE -section for options documentation)\&. For example, -abort:true,narenas:1 -sets the -"opt\&.abort" -and -"opt\&.narenas" -options\&. Some options have boolean values (true/false), others have integer values (base 8, 10, or 16, depending on prefix), and yet others have raw string values\&. -.SH "IMPLEMENTATION NOTES" -.PP -Traditionally, allocators have used -\fBsbrk\fR(2) -to obtain memory, which is suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory\&. If -\fB\-\-enable\-dss\fR -is specified during configuration, this allocator uses both -\fBmmap\fR(2) -and -\fBsbrk\fR(2), in that order of preference; otherwise only -\fBmmap\fR(2) -is used\&. -.PP -This allocator uses multiple arenas in order to reduce lock contention for threaded programs on multi\-processor systems\&. This works well with regard to threading scalability, but incurs some costs\&. There is a small fixed per\-arena overhead, and additionally, arenas manage memory completely independently of each other, which means a small fixed increase in overall memory fragmentation\&. These overheads are not generally an issue, given the number of arenas normally used\&. Note that using substantially more arenas than the default is not likely to improve performance, mainly due to reduced cache performance\&. However, it may make sense to reduce the number of arenas if an application does not make much use of the allocation functions\&. -.PP -In addition to multiple arenas, unless -\fB\-\-disable\-tcache\fR -is specified during configuration, this allocator supports thread\-specific caching for small and large objects, in order to make it possible to completely avoid synchronization for most allocation requests\&. Such caching allows very fast allocation in the common case, but it increases memory usage and fragmentation, since a bounded number of objects can remain allocated in each thread cache\&. -.PP -Memory is conceptually broken into equal\-sized chunks, where the chunk size is a power of two that is greater than the page size\&. Chunks are always aligned to multiples of the chunk size\&. This alignment makes it possible to find metadata for user objects very quickly\&. -.PP -User objects are broken into three categories according to size: small, large, and huge\&. Small objects are smaller than one page\&. Large objects are smaller than the chunk size\&. Huge objects are a multiple of the chunk size\&. Small and large objects are managed by arenas; huge objects are managed separately in a single data structure that is shared by all threads\&. Huge objects are used by applications infrequently enough that this single data structure is not a scalability issue\&. -.PP -Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&. -.PP -Small objects are managed in groups by page runs\&. Each run maintains a frontier and free list to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least -sizeof(\fBdouble\fR)\&. All other small object size classes are multiples of the quantum, spaced such that internal fragmentation is limited to approximately 25% for all but the smallest size classes\&. Allocation requests that are larger than the maximum small size class, but small enough to fit in an arena\-managed chunk (see the -"opt\&.lg_chunk" -option), are rounded up to the nearest run size\&. Allocation requests that are too large to fit in an arena\-managed chunk are rounded up to the nearest multiple of the chunk size\&. -.PP -Allocations are packed tightly together, which can be an issue for multi\-threaded applications\&. If you need to assure that allocations do not suffer from cacheline sharing, round your allocation requests up to the nearest multiple of the cacheline size, or specify cacheline alignment when allocating\&. -.PP -Assuming 4 MiB chunks, 4 KiB pages, and a 16\-byte quantum on a 64\-bit system, the size classes in each category are as shown in -Table 1\&. -.sp -.it 1 an-trap -.nr an-no-space-flag 1 -.nr an-break-flag 1 -.br -.B Table\ \&1.\ \&Size classes -.TS -allbox tab(:); -lB rB lB. -T{ -Category -T}:T{ -Spacing -T}:T{ -Size -T} -.T& -l r l -^ r l -^ r l -^ r l -^ r l -^ r l -^ r l -l r l -l r l. -T{ -Small -T}:T{ -lg -T}:T{ -[8] -T} -:T{ -16 -T}:T{ -[16, 32, 48, \&.\&.\&., 128] -T} -:T{ -32 -T}:T{ -[160, 192, 224, 256] -T} -:T{ -64 -T}:T{ -[320, 384, 448, 512] -T} -:T{ -128 -T}:T{ -[640, 768, 896, 1024] -T} -:T{ -256 -T}:T{ -[1280, 1536, 1792, 2048] -T} -:T{ -512 -T}:T{ -[2560, 3072, 3584] -T} -T{ -Large -T}:T{ -4 KiB -T}:T{ -[4 KiB, 8 KiB, 12 KiB, \&.\&.\&., 4072 KiB] -T} -T{ -Huge -T}:T{ -4 MiB -T}:T{ -[4 MiB, 8 MiB, 12 MiB, \&.\&.\&.] -T} -.TE -.sp 1 -.SH "MALLCTL NAMESPACE" -.PP -The following names are defined in the namespace accessible via the -\fBmallctl*\fR\fB\fR -functions\&. Value types are specified in parentheses, their readable/writable statuses are encoded as -rw, -r\-, -\-w, or -\-\-, and required build configuration flags follow, if any\&. A name element encoded as - -or - -indicates an integer component, where the integer varies from 0 to some upper value that must be determined via introspection\&. In the case of -"stats\&.arenas\&.\&.*", - -equal to -"arenas\&.narenas" -can be used to access the summation of statistics from all arenas\&. Take special note of the -"epoch" -mallctl, which controls refreshing of cached dynamic statistics\&. -.PP -"version" (\fBconst char *\fR) r\- -.RS 4 -Return the jemalloc version string\&. -.RE -.PP -"epoch" (\fBuint64_t\fR) rw -.RS 4 -If a value is passed in, refresh the data from which the -\fBmallctl*\fR\fB\fR -functions report values, and increment the epoch\&. Return the current epoch\&. This is useful for detecting whether another thread caused a refresh\&. -.RE -.PP -"config\&.debug" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-debug\fR -was specified during build configuration\&. -.RE -.PP -"config\&.dss" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-dss\fR -was specified during build configuration\&. -.RE -.PP -"config\&.fill" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-fill\fR -was specified during build configuration\&. -.RE -.PP -"config\&.lazy_lock" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-lazy\-lock\fR -was specified during build configuration\&. -.RE -.PP -"config\&.mremap" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-mremap\fR -was specified during build configuration\&. -.RE -.PP -"config\&.munmap" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-munmap\fR -was specified during build configuration\&. -.RE -.PP -"config\&.prof" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-prof\fR -was specified during build configuration\&. -.RE -.PP -"config\&.prof_libgcc" (\fBbool\fR) r\- -.RS 4 -\fB\-\-disable\-prof\-libgcc\fR -was not specified during build configuration\&. -.RE -.PP -"config\&.prof_libunwind" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-prof\-libunwind\fR -was specified during build configuration\&. -.RE -.PP -"config\&.stats" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-stats\fR -was specified during build configuration\&. -.RE -.PP -"config\&.tcache" (\fBbool\fR) r\- -.RS 4 -\fB\-\-disable\-tcache\fR -was not specified during build configuration\&. -.RE -.PP -"config\&.tls" (\fBbool\fR) r\- -.RS 4 -\fB\-\-disable\-tls\fR -was not specified during build configuration\&. -.RE -.PP -"config\&.utrace" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-utrace\fR -was specified during build configuration\&. -.RE -.PP -"config\&.valgrind" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-valgrind\fR -was specified during build configuration\&. -.RE -.PP -"config\&.xmalloc" (\fBbool\fR) r\- -.RS 4 -\fB\-\-enable\-xmalloc\fR -was specified during build configuration\&. -.RE -.PP -"opt\&.abort" (\fBbool\fR) r\- -.RS 4 -Abort\-on\-warning enabled/disabled\&. If true, most warnings are fatal\&. The process will call -\fBabort\fR(3) -in these cases\&. This option is disabled by default unless -\fB\-\-enable\-debug\fR -is specified during configuration, in which case it is enabled by default\&. -.RE -.PP -"opt\&.lg_chunk" (\fBsize_t\fR) r\- -.RS 4 -Virtual memory chunk size (log base 2)\&. If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size\&. The default chunk size is 4 MiB (2^22)\&. -.RE -.PP -"opt\&.dss" (\fBconst char *\fR) r\- -.RS 4 -dss (\fBsbrk\fR(2)) allocation precedence as related to -\fBmmap\fR(2) -allocation\&. The following settings are supported: \(lqdisabled\(rq, \(lqprimary\(rq, and \(lqsecondary\(rq (default)\&. -.RE -.PP -"opt\&.narenas" (\fBsize_t\fR) r\- -.RS 4 -Maximum number of arenas to use for automatic multiplexing of threads and arenas\&. The default is four times the number of CPUs, or one if there is a single CPU\&. -.RE -.PP -"opt\&.lg_dirty_mult" (\fBssize_t\fR) r\- -.RS 4 -Per\-arena minimum ratio (log base 2) of active to dirty pages\&. Some dirty unused pages may be allowed to accumulate, within the limit set by the ratio (or one chunk worth of dirty pages, whichever is greater), before informing the kernel about some of those pages via -\fBmadvise\fR(2) -or a similar system call\&. This provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused\&. The default minimum ratio is 8:1 (2^3:1); an option value of \-1 will disable dirty page purging\&. -.RE -.PP -"opt\&.stats_print" (\fBbool\fR) r\- -.RS 4 -Enable/disable statistics printing at exit\&. If enabled, the -\fBmalloc_stats_print\fR\fB\fR -function is called at program exit via an -\fBatexit\fR(3) -function\&. If -\fB\-\-enable\-stats\fR -is specified during configuration, this has the potential to cause deadlock for a multi\-threaded process that exits while one or more threads are executing in the memory allocation functions\&. Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development\&. This option is disabled by default\&. -.RE -.PP -"opt\&.junk" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] -.RS 4 -Junk filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to -0xa5\&. All deallocated memory will be initialized to -0x5a\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default unless -\fB\-\-enable\-debug\fR -is specified during configuration, in which case it is enabled by default unless running inside -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2\&. -.RE -.PP -"opt\&.quarantine" (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR] -.RS 4 -Per thread quarantine size in bytes\&. If non\-zero, each thread maintains a FIFO object quarantine that stores up to the specified number of bytes of memory\&. The quarantined memory is not freed until it is released from quarantine, though it is immediately junk\-filled if the -"opt\&.junk" -option is enabled\&. This feature is of particular use in combination with -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0 unless running inside Valgrind, in which case the default is 16 MiB\&. -.RE -.PP -"opt\&.redzone" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] -.RS 4 -Redzones enabled/disabled\&. If enabled, small allocations have redzones before and after them\&. Furthermore, if the -"opt\&.junk" -option is enabled, the redzones are checked for corruption during deallocation\&. However, the primary intended purpose of this feature is to be used in combination with -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default unless running inside Valgrind\&. -.RE -.PP -"opt\&.zero" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] -.RS 4 -Zero filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to 0\&. Note that this initialization only happens once for each byte, so -\fBrealloc\fR\fB\fR -and -\fBrallocm\fR\fB\fR -calls do not zero memory that was previously allocated\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default\&. -.RE -.PP -"opt\&.utrace" (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR] -.RS 4 -Allocation tracing based on -\fButrace\fR(2) -enabled/disabled\&. This option is disabled by default\&. -.RE -.PP -"opt\&.valgrind" (\fBbool\fR) r\- [\fB\-\-enable\-valgrind\fR] -.RS 4 -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2 -support enabled/disabled\&. This option is vestigal because jemalloc auto\-detects whether it is running inside Valgrind\&. This option is disabled by default, unless running inside Valgrind\&. -.RE -.PP -"opt\&.xmalloc" (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR] -.RS 4 -Abort\-on\-out\-of\-memory enabled/disabled\&. If enabled, rather than returning failure for any allocation function, display a diagnostic message on -\fBSTDERR_FILENO\fR -and cause the program to drop core (using -\fBabort\fR(3))\&. If an application is designed to depend on this behavior, set the option at compile time by including the following in the source code: -.sp -.if n \{\ -.RS 4 -.\} -.nf -malloc_conf = "xmalloc:true"; -.fi -.if n \{\ -.RE -.\} -.sp -This option is disabled by default\&. -.RE -.PP -"opt\&.tcache" (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR] -.RS 4 -Thread\-specific caching enabled/disabled\&. When there are multiple threads, each thread uses a thread\-specific cache for objects up to a certain size\&. Thread\-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use\&. See the -"opt\&.lg_tcache_max" -option for related tuning information\&. This option is enabled by default unless running inside -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2\&. -.RE -.PP -"opt\&.lg_tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] -.RS 4 -Maximum size class (log base 2) to cache in the thread\-specific cache\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&. -.RE -.PP -"opt\&.prof" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] -.RS 4 -Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity\&. See the -"opt\&.prof_active" -option for on\-the\-fly activation/deactivation\&. See the -"opt\&.lg_prof_sample" -option for probabilistic sampling control\&. See the -"opt\&.prof_accum" -option for control of cumulative sample reporting\&. See the -"opt\&.lg_prof_interval" -option for information on interval\-triggered profile dumping, the -"opt\&.prof_gdump" -option for information on high\-water\-triggered profile dumping, and the -"opt\&.prof_final" -option for final profile dumping\&. Profile output is compatible with the included -\fBpprof\fR -Perl script, which originates from the -\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2\&. -.RE -.PP -"opt\&.prof_prefix" (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR] -.RS 4 -Filename prefix for profile dumps\&. If the prefix is set to the empty string, no automatic dumps will occur; this is primarily useful for disabling the automatic final heap dump (which also disables leak reporting, if enabled)\&. The default prefix is -jeprof\&. -.RE -.PP -"opt\&.prof_active" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] -.RS 4 -Profiling activated/deactivated\&. This is a secondary control mechanism that makes it possible to start the application with profiling enabled (see the -"opt\&.prof" -option) but inactive, then toggle profiling at any time during program execution with the -"prof\&.active" -mallctl\&. This option is enabled by default\&. -.RE -.PP -"opt\&.lg_prof_sample" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR] -.RS 4 -Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 512 KiB (2^19 B)\&. -.RE -.PP -"opt\&.prof_accum" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] -.RS 4 -Reporting of cumulative object/byte counts in profile dumps enabled/disabled\&. If this option is enabled, every unique backtrace must be stored for the duration of execution\&. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest\&. This option is disabled by default\&. -.RE -.PP -"opt\&.lg_prof_interval" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR] -.RS 4 -Average interval (log base 2) between memory profile dumps, as measured in bytes of allocation activity\&. The actual interval between dumps may be sporadic because decentralized allocation counters are used to avoid synchronization bottlenecks\&. Profiles are dumped to files named according to the pattern -\&.\&.\&.i\&.heap, where - -is controlled by the -"opt\&.prof_prefix" -option\&. By default, interval\-triggered profile dumping is disabled (encoded as \-1)\&. -.RE -.PP -"opt\&.prof_gdump" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] -.RS 4 -Trigger a memory profile dump every time the total virtual memory exceeds the previous maximum\&. Profiles are dumped to files named according to the pattern -\&.\&.\&.u\&.heap, where - -is controlled by the -"opt\&.prof_prefix" -option\&. This option is disabled by default\&. -.RE -.PP -"opt\&.prof_final" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] -.RS 4 -Use an -\fBatexit\fR(3) -function to dump final memory usage to a file named according to the pattern -\&.\&.\&.f\&.heap, where - -is controlled by the -"opt\&.prof_prefix" -option\&. This option is enabled by default\&. -.RE -.PP -"opt\&.prof_leak" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] -.RS 4 -Leak reporting enabled/disabled\&. If enabled, use an -\fBatexit\fR(3) -function to report memory leaks detected by allocation sampling\&. See the -"opt\&.prof" -option for information on analyzing heap profile output\&. This option is disabled by default\&. -.RE -.PP -"thread\&.arena" (\fBunsigned\fR) rw -.RS 4 -Get or set the arena associated with the calling thread\&. If the specified arena was not initialized beforehand (see the -"arenas\&.initialized" -mallctl), it will be automatically initialized as a side effect of calling this interface\&. -.RE -.PP -"thread\&.allocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Get the total number of bytes ever allocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&. -.RE -.PP -"thread\&.allocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Get a pointer to the the value that is returned by the -"thread\&.allocated" -mallctl\&. This is useful for avoiding the overhead of repeated -\fBmallctl*\fR\fB\fR -calls\&. -.RE -.PP -"thread\&.deallocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Get the total number of bytes ever deallocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&. -.RE -.PP -"thread\&.deallocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Get a pointer to the the value that is returned by the -"thread\&.deallocated" -mallctl\&. This is useful for avoiding the overhead of repeated -\fBmallctl*\fR\fB\fR -calls\&. -.RE -.PP -"thread\&.tcache\&.enabled" (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR] -.RS 4 -Enable/disable calling thread\*(Aqs tcache\&. The tcache is implicitly flushed as a side effect of becoming disabled (see -"thread\&.tcache\&.flush")\&. -.RE -.PP -"thread\&.tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR] -.RS 4 -Flush calling thread\*(Aqs tcache\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs thread\-specific cache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&. -.RE -.PP -"arena\&.\&.purge" (\fBunsigned\fR) \-\- -.RS 4 -Purge unused dirty pages for arena , or for all arenas if equals -"arenas\&.narenas"\&. -.RE -.PP -"arena\&.\&.dss" (\fBconst char *\fR) rw -.RS 4 -Set the precedence of dss allocation as related to mmap allocation for arena , or for all arenas if equals -"arenas\&.narenas"\&. See -"opt\&.dss" -for supported settings\&. -.RE -.PP -"arenas\&.narenas" (\fBunsigned\fR) r\- -.RS 4 -Current limit on number of arenas\&. -.RE -.PP -"arenas\&.initialized" (\fBbool *\fR) r\- -.RS 4 -An array of -"arenas\&.narenas" -booleans\&. Each boolean indicates whether the corresponding arena is initialized\&. -.RE -.PP -"arenas\&.quantum" (\fBsize_t\fR) r\- -.RS 4 -Quantum size\&. -.RE -.PP -"arenas\&.page" (\fBsize_t\fR) r\- -.RS 4 -Page size\&. -.RE -.PP -"arenas\&.tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] -.RS 4 -Maximum thread\-cached size class\&. -.RE -.PP -"arenas\&.nbins" (\fBunsigned\fR) r\- -.RS 4 -Number of bin size classes\&. -.RE -.PP -"arenas\&.nhbins" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] -.RS 4 -Total number of thread cache bin size classes\&. -.RE -.PP -"arenas\&.bin\&.\&.size" (\fBsize_t\fR) r\- -.RS 4 -Maximum size supported by size class\&. -.RE -.PP -"arenas\&.bin\&.\&.nregs" (\fBuint32_t\fR) r\- -.RS 4 -Number of regions per page run\&. -.RE -.PP -"arenas\&.bin\&.\&.run_size" (\fBsize_t\fR) r\- -.RS 4 -Number of bytes per page run\&. -.RE -.PP -"arenas\&.nlruns" (\fBsize_t\fR) r\- -.RS 4 -Total number of large size classes\&. -.RE -.PP -"arenas\&.lrun\&.\&.size" (\fBsize_t\fR) r\- -.RS 4 -Maximum size supported by this large size class\&. -.RE -.PP -"arenas\&.purge" (\fBunsigned\fR) \-w -.RS 4 -Purge unused dirty pages for the specified arena, or for all arenas if none is specified\&. -.RE -.PP -"arenas\&.extend" (\fBunsigned\fR) r\- -.RS 4 -Extend the array of arenas by appending a new arena, and returning the new arena index\&. -.RE -.PP -"prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] -.RS 4 -Control whether sampling is currently active\&. See the -"opt\&.prof_active" -option for additional information\&. -.RE -.PP -"prof\&.dump" (\fBconst char *\fR) \-w [\fB\-\-enable\-prof\fR] -.RS 4 -Dump a memory profile to the specified file, or if NULL is specified, to a file according to the pattern -\&.\&.\&.m\&.heap, where - -is controlled by the -"opt\&.prof_prefix" -option\&. -.RE -.PP -"prof\&.interval" (\fBuint64_t\fR) r\- [\fB\-\-enable\-prof\fR] -.RS 4 -Average number of bytes allocated between inverval\-based profile dumps\&. See the -"opt\&.lg_prof_interval" -option for additional information\&. -.RE -.PP -"stats\&.cactive" (\fBsize_t *\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Pointer to a counter that contains an approximate count of the current number of bytes in active pages\&. The estimate may be high, but never low, because each arena rounds up to the nearest multiple of the chunk size when computing its contribution to the counter\&. Note that the -"epoch" -mallctl has no bearing on this counter\&. Furthermore, counter consistency is maintained via atomic operations, so it is necessary to use an atomic operation in order to guarantee a consistent read when dereferencing the pointer\&. -.RE -.PP -"stats\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Total number of bytes allocated by the application\&. -.RE -.PP -"stats\&.active" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Total number of bytes in active pages allocated by the application\&. This is a multiple of the page size, and greater than or equal to -"stats\&.allocated"\&. This does not include -"stats\&.arenas\&.\&.pdirty" -and pages entirely devoted to allocator metadata\&. -.RE -.PP -"stats\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Total number of bytes in chunks mapped on behalf of the application\&. This is a multiple of the chunk size, and is at least as large as -"stats\&.active"\&. This does not include inactive chunks\&. -.RE -.PP -"stats\&.chunks\&.current" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Total number of chunks actively mapped on behalf of the application\&. This does not include inactive chunks\&. -.RE -.PP -"stats\&.chunks\&.total" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of chunks allocated\&. -.RE -.PP -"stats\&.chunks\&.high" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Maximum number of active chunks at any time thus far\&. -.RE -.PP -"stats\&.huge\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Number of bytes currently allocated by huge objects\&. -.RE -.PP -"stats\&.huge\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of huge allocation requests\&. -.RE -.PP -"stats\&.huge\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of huge deallocation requests\&. -.RE -.PP -"stats\&.arenas\&.\&.dss" (\fBconst char *\fR) r\- -.RS 4 -dss (\fBsbrk\fR(2)) allocation precedence as related to -\fBmmap\fR(2) -allocation\&. See -"opt\&.dss" -for details\&. -.RE -.PP -"stats\&.arenas\&.\&.nthreads" (\fBunsigned\fR) r\- -.RS 4 -Number of threads currently assigned to arena\&. -.RE -.PP -"stats\&.arenas\&.\&.pactive" (\fBsize_t\fR) r\- -.RS 4 -Number of pages in active runs\&. -.RE -.PP -"stats\&.arenas\&.\&.pdirty" (\fBsize_t\fR) r\- -.RS 4 -Number of pages within unused runs that are potentially dirty, and for which -\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR -or similar has not been called\&. -.RE -.PP -"stats\&.arenas\&.\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Number of mapped bytes\&. -.RE -.PP -"stats\&.arenas\&.\&.npurge" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Number of dirty page purge sweeps performed\&. -.RE -.PP -"stats\&.arenas\&.\&.nmadvise" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Number of -\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR -or similar calls made to purge dirty pages\&. -.RE -.PP -"stats\&.arenas\&.\&.npurged" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Number of pages purged\&. -.RE -.PP -"stats\&.arenas\&.\&.small\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Number of bytes currently allocated by small objects\&. -.RE -.PP -"stats\&.arenas\&.\&.small\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of allocation requests served by small bins\&. -.RE -.PP -"stats\&.arenas\&.\&.small\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of small objects returned to bins\&. -.RE -.PP -"stats\&.arenas\&.\&.small\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of small allocation requests\&. -.RE -.PP -"stats\&.arenas\&.\&.large\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Number of bytes currently allocated by large objects\&. -.RE -.PP -"stats\&.arenas\&.\&.large\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of large allocation requests served directly by the arena\&. -.RE -.PP -"stats\&.arenas\&.\&.large\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of large deallocation requests served directly by the arena\&. -.RE -.PP -"stats\&.arenas\&.\&.large\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of large allocation requests\&. -.RE -.PP -"stats\&.arenas\&.\&.bins\&.\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Current number of bytes allocated by bin\&. -.RE -.PP -"stats\&.arenas\&.\&.bins\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of allocations served by bin\&. -.RE -.PP -"stats\&.arenas\&.\&.bins\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of allocations returned to bin\&. -.RE -.PP -"stats\&.arenas\&.\&.bins\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of allocation requests\&. -.RE -.PP -"stats\&.arenas\&.\&.bins\&.\&.nfills" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] -.RS 4 -Cumulative number of tcache fills\&. -.RE -.PP -"stats\&.arenas\&.\&.bins\&.\&.nflushes" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] -.RS 4 -Cumulative number of tcache flushes\&. -.RE -.PP -"stats\&.arenas\&.\&.bins\&.\&.nruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of runs created\&. -.RE -.PP -"stats\&.arenas\&.\&.bins\&.\&.nreruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of times the current run from which to allocate changed\&. -.RE -.PP -"stats\&.arenas\&.\&.bins\&.\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Current number of runs\&. -.RE -.PP -"stats\&.arenas\&.\&.lruns\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of allocation requests for this size class served directly by the arena\&. -.RE -.PP -"stats\&.arenas\&.\&.lruns\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of deallocation requests for this size class served directly by the arena\&. -.RE -.PP -"stats\&.arenas\&.\&.lruns\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Cumulative number of allocation requests for this size class\&. -.RE -.PP -"stats\&.arenas\&.\&.lruns\&.\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] -.RS 4 -Current number of runs for this size class\&. -.RE -.SH "DEBUGGING MALLOC PROBLEMS" -.PP -When debugging, it is a good idea to configure/build jemalloc with the -\fB\-\-enable\-debug\fR -and -\fB\-\-enable\-fill\fR -options, and recompile the program with suitable options and symbols for debugger support\&. When so configured, jemalloc incorporates a wide variety of run\-time assertions that catch application errors such as double\-free, write\-after\-free, etc\&. -.PP -Programs often accidentally depend on \(lquninitialized\(rq memory actually being filled with zero bytes\&. Junk filling (see the -"opt\&.junk" -option) tends to expose such bugs in the form of obviously incorrect results and/or coredumps\&. Conversely, zero filling (see the -"opt\&.zero" -option) eliminates the symptoms of such bugs\&. Between these two options, it is usually possible to quickly detect, diagnose, and eliminate such bugs\&. -.PP -This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive\&. However, jemalloc does integrate with the most excellent -\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2 -tool if the -\fB\-\-enable\-valgrind\fR -configuration option is enabled\&. -.SH "DIAGNOSTIC MESSAGES" -.PP -If any of the memory allocation/deallocation functions detect an error or warning condition, a message will be printed to file descriptor -\fBSTDERR_FILENO\fR\&. Errors will result in the process dumping core\&. If the -"opt\&.abort" -option is set, most warnings are treated as errors\&. -.PP -The -\fImalloc_message\fR -variable allows the programmer to override the function which emits the text strings forming the errors and warnings if for some reason the -\fBSTDERR_FILENO\fR -file descriptor is not suitable for this\&. -\fBmalloc_message\fR\fB\fR -takes the -\fIcbopaque\fR -pointer argument that is -\fBNULL\fR -unless overridden by the arguments in a call to -\fBmalloc_stats_print\fR\fB\fR, followed by a string pointer\&. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock\&. -.PP -All messages are prefixed by \(lq:\(rq\&. -.SH "RETURN VALUES" -.SS "Standard API" -.PP -The -\fBmalloc\fR\fB\fR -and -\fBcalloc\fR\fB\fR -functions return a pointer to the allocated memory if successful; otherwise a -\fBNULL\fR -pointer is returned and -\fIerrno\fR -is set to -ENOMEM\&. -.PP -The -\fBposix_memalign\fR\fB\fR -function returns the value 0 if successful; otherwise it returns an error value\&. The -\fBposix_memalign\fR\fB\fR -function will fail if: -.PP -EINVAL -.RS 4 -The -\fIalignment\fR -parameter is not a power of 2 at least as large as -sizeof(\fBvoid *\fR)\&. -.RE -.PP -ENOMEM -.RS 4 -Memory allocation error\&. -.RE -.PP -The -\fBaligned_alloc\fR\fB\fR -function returns a pointer to the allocated memory if successful; otherwise a -\fBNULL\fR -pointer is returned and -\fIerrno\fR -is set\&. The -\fBaligned_alloc\fR\fB\fR -function will fail if: -.PP -EINVAL -.RS 4 -The -\fIalignment\fR -parameter is not a power of 2\&. -.RE -.PP -ENOMEM -.RS 4 -Memory allocation error\&. -.RE -.PP -The -\fBrealloc\fR\fB\fR -function returns a pointer, possibly identical to -\fIptr\fR, to the allocated memory if successful; otherwise a -\fBNULL\fR -pointer is returned, and -\fIerrno\fR -is set to -ENOMEM -if the error was the result of an allocation failure\&. The -\fBrealloc\fR\fB\fR -function always leaves the original buffer intact when an error occurs\&. -.PP -The -\fBfree\fR\fB\fR -function returns no value\&. -.SS "Non\-standard API" -.PP -The -\fBmalloc_usable_size\fR\fB\fR -function returns the usable size of the allocation pointed to by -\fIptr\fR\&. -.PP -The -\fBmallctl\fR\fB\fR, -\fBmallctlnametomib\fR\fB\fR, and -\fBmallctlbymib\fR\fB\fR -functions return 0 on success; otherwise they return an error value\&. The functions will fail if: -.PP -EINVAL -.RS 4 -\fInewp\fR -is not -\fBNULL\fR, and -\fInewlen\fR -is too large or too small\&. Alternatively, -\fI*oldlenp\fR -is too large or too small; in this case as much data as possible are read despite the error\&. -.RE -.PP -ENOMEM -.RS 4 -\fI*oldlenp\fR -is too short to hold the requested value\&. -.RE -.PP -ENOENT -.RS 4 -\fIname\fR -or -\fImib\fR -specifies an unknown/invalid value\&. -.RE -.PP -EPERM -.RS 4 -Attempt to read or write void value, or attempt to write read\-only value\&. -.RE -.PP -EAGAIN -.RS 4 -A memory allocation failure occurred\&. -.RE -.PP -EFAULT -.RS 4 -An interface with side effects failed in some way not directly related to -\fBmallctl*\fR\fB\fR -read/write processing\&. -.RE -.SS "Experimental API" -.PP -The -\fBallocm\fR\fB\fR, -\fBrallocm\fR\fB\fR, -\fBsallocm\fR\fB\fR, -\fBdallocm\fR\fB\fR, and -\fBnallocm\fR\fB\fR -functions return -\fBALLOCM_SUCCESS\fR -on success; otherwise they return an error value\&. The -\fBallocm\fR\fB\fR, -\fBrallocm\fR\fB\fR, and -\fBnallocm\fR\fB\fR -functions will fail if: -.PP -ALLOCM_ERR_OOM -.RS 4 -Out of memory\&. Insufficient contiguous memory was available to service the allocation request\&. The -\fBallocm\fR\fB\fR -function additionally sets -\fI*ptr\fR -to -\fBNULL\fR, whereas the -\fBrallocm\fR\fB\fR -function leaves -\fB*ptr\fR -unmodified\&. -.RE -The -\fBrallocm\fR\fB\fR -function will also fail if: -.PP -ALLOCM_ERR_NOT_MOVED -.RS 4 -\fBALLOCM_NO_MOVE\fR -was specified, but the reallocation request could not be serviced without moving the object\&. -.RE -.SH "ENVIRONMENT" -.PP -The following environment variable affects the execution of the allocation functions: -.PP -\fBMALLOC_CONF\fR -.RS 4 -If the environment variable -\fBMALLOC_CONF\fR -is set, the characters it contains will be interpreted as options\&. -.RE -.SH "EXAMPLES" -.PP -To dump core whenever a problem occurs: -.sp -.if n \{\ -.RS 4 -.\} -.nf -ln \-s \*(Aqabort:true\*(Aq /etc/malloc\&.conf -.fi -.if n \{\ -.RE -.\} -.PP -To specify in the source a chunk size that is 16 MiB: -.sp -.if n \{\ -.RS 4 -.\} -.nf -malloc_conf = "lg_chunk:24"; -.fi -.if n \{\ -.RE -.\} -.SH "SEE ALSO" -.PP -\fBmadvise\fR(2), -\fBmmap\fR(2), -\fBsbrk\fR(2), -\fButrace\fR(2), -\fBalloca\fR(3), -\fBatexit\fR(3), -\fBgetpagesize\fR(3) -.SH "STANDARDS" -.PP -The -\fBmalloc\fR\fB\fR, -\fBcalloc\fR\fB\fR, -\fBrealloc\fR\fB\fR, and -\fBfree\fR\fB\fR -functions conform to ISO/IEC 9899:1990 (\(lqISO C90\(rq)\&. -.PP -The -\fBposix_memalign\fR\fB\fR -function conforms to IEEE Std 1003\&.1\-2001 (\(lqPOSIX\&.1\(rq)\&. -.SH "AUTHOR" -.PP -\fBJason Evans\fR -.RS 4 -.RE -.SH "NOTES" -.IP " 1." 4 -jemalloc website -.RS 4 -\%http://www.canonware.com/jemalloc/ -.RE -.IP " 2." 4 -Valgrind -.RS 4 -\%http://valgrind.org/ -.RE -.IP " 3." 4 -gperftools package -.RS 4 -\%http://code.google.com/p/gperftools/ -.RE diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/doc/jemalloc.html mariadb-5.5-5.5.40/extra/jemalloc/doc/jemalloc.html --- mariadb-5.5-5.5.39/extra/jemalloc/doc/jemalloc.html 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/doc/jemalloc.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,1417 +0,0 @@ -JEMALLOC

Name

jemalloc — general purpose memory allocation functions

LIBRARY

This manual describes jemalloc 3.3.1-0-g9ef9d9e8c271cdf14f664b871a8f98c827714784. More information - can be found at the jemalloc website.

SYNOPSIS

#include <stdlib.h>
-#include <jemalloc/jemalloc.h>

Standard API

void *malloc(size_t size);
 
void *calloc(size_t number,
 size_t size);
 
int posix_memalign(void **ptr,
 size_t alignment,
 size_t size);
 
void *aligned_alloc(size_t alignment,
 size_t size);
 
void *realloc(void *ptr,
 size_t size);
 
void free(void *ptr);
 

Non-standard API

size_t malloc_usable_size(const void *ptr);
 
void malloc_stats_print(void (*write_cb) - (void *, const char *) - ,
 void *cbopaque,
 const char *opts);
 
int mallctl(const char *name,
 void *oldp,
 size_t *oldlenp,
 void *newp,
 size_t newlen);
 
int mallctlnametomib(const char *name,
 size_t *mibp,
 size_t *miblenp);
 
int mallctlbymib(const size_t *mib,
 size_t miblen,
 void *oldp,
 size_t *oldlenp,
 void *newp,
 size_t newlen);
 
void (*malloc_message)(void *cbopaque,
 const char *s);
 

const char *malloc_conf;

Experimental API

int allocm(void **ptr,
 size_t *rsize,
 size_t size,
 int flags);
 
int rallocm(void **ptr,
 size_t *rsize,
 size_t size,
 size_t extra,
 int flags);
 
int sallocm(const void *ptr,
 size_t *rsize,
 int flags);
 
int dallocm(void *ptr,
 int flags);
 
int nallocm(size_t *rsize,
 size_t size,
 int flags);
 

DESCRIPTION

Standard API

The malloc() function allocates - size bytes of uninitialized memory. The allocated - space is suitably aligned (after possible pointer coercion) for storage - of any type of object.

The calloc() function allocates - space for number objects, each - size bytes in length. The result is identical to - calling malloc() with an argument of - number * size, with the - exception that the allocated memory is explicitly initialized to zero - bytes.

The posix_memalign() function - allocates size bytes of memory such that the - allocation's base address is an even multiple of - alignment, and returns the allocation in the value - pointed to by ptr. The requested - alignment must be a power of 2 at least as large - as sizeof(void *).

The aligned_alloc() function - allocates size bytes of memory such that the - allocation's base address is an even multiple of - alignment. The requested - alignment must be a power of 2. Behavior is - undefined if size is not an integral multiple of - alignment.

The realloc() function changes the - size of the previously allocated memory referenced by - ptr to size bytes. The - contents of the memory are unchanged up to the lesser of the new and old - sizes. If the new size is larger, the contents of the newly allocated - portion of the memory are undefined. Upon success, the memory referenced - by ptr is freed and a pointer to the newly - allocated memory is returned. Note that - realloc() may move the memory allocation, - resulting in a different return value than ptr. - If ptr is NULL, the - realloc() function behaves identically to - malloc() for the specified size.

The free() function causes the - allocated memory referenced by ptr to be made - available for future allocations. If ptr is - NULL, no action occurs.

Non-standard API

The malloc_usable_size() function - returns the usable size of the allocation pointed to by - ptr. The return value may be larger than the size - that was requested during allocation. The - malloc_usable_size() function is not a - mechanism for in-place realloc(); rather - it is provided solely as a tool for introspection purposes. Any - discrepancy between the requested allocation size and the size reported - by malloc_usable_size() should not be - depended on, since such behavior is entirely implementation-dependent. -

The malloc_stats_print() function - writes human-readable summary statistics via the - write_cb callback function pointer and - cbopaque data passed to - write_cb, or - malloc_message() if - write_cb is NULL. This - function can be called repeatedly. General information that never - changes during execution can be omitted by specifying "g" as a character - within the opts string. Note that - malloc_message() uses the - mallctl*() functions internally, so - inconsistent statistics can be reported if multiple threads use these - functions simultaneously. If --enable-stats is - specified during configuration, “m” and “a” can - be specified to omit merged arena and per arena statistics, respectively; - “b” and “l” can be specified to omit per size - class statistics for bins and large objects, respectively. Unrecognized - characters are silently ignored. Note that thread caching may prevent - some statistics from being completely up to date, since extra locking - would be required to merge counters that track thread cache operations. -

The mallctl() function provides a - general interface for introspecting the memory allocator, as well as - setting modifiable parameters and triggering actions. The - period-separated name argument specifies a - location in a tree-structured namespace; see the MALLCTL NAMESPACE section for - documentation on the tree contents. To read a value, pass a pointer via - oldp to adequate space to contain the value, and a - pointer to its length via oldlenp; otherwise pass - NULL and NULL. Similarly, to - write a value, pass a pointer to the value via - newp, and its length via - newlen; otherwise pass NULL - and 0.

The mallctlnametomib() function - provides a way to avoid repeated name lookups for applications that - repeatedly query the same portion of the namespace, by translating a name - to a “Management Information Base” (MIB) that can be passed - repeatedly to mallctlbymib(). Upon - successful return from mallctlnametomib(), - mibp contains an array of - *miblenp integers, where - *miblenp is the lesser of the number of components - in name and the input value of - *miblenp. Thus it is possible to pass a - *miblenp that is smaller than the number of - period-separated name components, which results in a partial MIB that can - be used as the basis for constructing a complete MIB. For name - components that are integers (e.g. the 2 in - - "arenas.bin.2.size" - ), - the corresponding MIB component will always be that integer. Therefore, - it is legitimate to construct code like the following:

-unsigned nbins, i;
-
-int mib[4];
-size_t len, miblen;
-
-len = sizeof(nbins);
-mallctl("arenas.nbins", &nbins, &len, NULL, 0);
-
-miblen = 4;
-mallnametomib("arenas.bin.0.size", mib, &miblen);
-for (i = 0; i < nbins; i++) {
-	size_t bin_size;
-
-	mib[2] = i;
-	len = sizeof(bin_size);
-	mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0);
-	/* Do something with bin_size... */
-}

Experimental API

The experimental API is subject to change or removal without regard - for backward compatibility. If --disable-experimental - is specified during configuration, the experimental API is - omitted.

The allocm(), - rallocm(), - sallocm(), - dallocm(), and - nallocm() functions all have a - flags argument that can be used to specify - options. The functions only check the options that are contextually - relevant. Use bitwise or (|) operations to - specify one or more of the following: -

ALLOCM_LG_ALIGN(la) -

Align the memory allocation to start at an address - that is a multiple of (1 << - la). This macro does not validate - that la is within the valid - range.

ALLOCM_ALIGN(a) -

Align the memory allocation to start at an address - that is a multiple of a, where - a is a power of two. This macro does not - validate that a is a power of 2. -

ALLOCM_ZERO

Initialize newly allocated memory to contain zero - bytes. In the growing reallocation case, the real size prior to - reallocation defines the boundary between untouched bytes and those - that are initialized to contain zero bytes. If this option is - absent, newly allocated memory is uninitialized.

ALLOCM_NO_MOVE

For reallocation, fail rather than moving the - object. This constraint can apply to both growth and - shrinkage.

ALLOCM_ARENA(a) -

Use the arena specified by the index - a. This macro does not validate that - a specifies an arena in the valid - range.

-

The allocm() function allocates at - least size bytes of memory, sets - *ptr to the base address of the allocation, and - sets *rsize to the real size of the allocation if - rsize is not NULL. Behavior - is undefined if size is - 0.

The rallocm() function resizes the - allocation at *ptr to be at least - size bytes, sets *ptr to - the base address of the allocation if it moved, and sets - *rsize to the real size of the allocation if - rsize is not NULL. If - extra is non-zero, an attempt is made to resize - the allocation to be at least size + - extra) bytes, though inability to allocate - the extra byte(s) will not by itself result in failure. Behavior is - undefined if size is 0, or if - (size + - extra > - SIZE_T_MAX).

The sallocm() function sets - *rsize to the real size of the allocation.

The dallocm() function causes the - memory referenced by ptr to be made available for - future allocations.

The nallocm() function allocates no - memory, but it performs the same size computation as the - allocm() function, and if - rsize is not NULL it sets - *rsize to the real size of the allocation that - would result from the equivalent allocm() - function call. Behavior is undefined if - size is 0.

TUNING

Once, when the first call is made to one of the memory allocation - routines, the allocator initializes its internals based in part on various - options that can be specified at compile- or run-time.

The string pointed to by the global variable - malloc_conf, the “name” of the file - referenced by the symbolic link named /etc/malloc.conf, and the value of the - environment variable MALLOC_CONF, will be interpreted, in - that order, from left to right as options.

An options string is a comma-separated list of option:value pairs. - There is one key corresponding to each - "opt.*" - mallctl (see the MALLCTL NAMESPACE section for options - documentation). For example, abort:true,narenas:1 sets - the - "opt.abort" - and - "opt.narenas" - options. Some - options have boolean values (true/false), others have integer values (base - 8, 10, or 16, depending on prefix), and yet others have raw string - values.

IMPLEMENTATION NOTES

Traditionally, allocators have used - sbrk(2) to obtain memory, which is - suboptimal for several reasons, including race conditions, increased - fragmentation, and artificial limitations on maximum usable memory. If - --enable-dss is specified during configuration, this - allocator uses both mmap(2) and - sbrk(2), in that order of preference; - otherwise only mmap(2) is used.

This allocator uses multiple arenas in order to reduce lock - contention for threaded programs on multi-processor systems. This works - well with regard to threading scalability, but incurs some costs. There is - a small fixed per-arena overhead, and additionally, arenas manage memory - completely independently of each other, which means a small fixed increase - in overall memory fragmentation. These overheads are not generally an - issue, given the number of arenas normally used. Note that using - substantially more arenas than the default is not likely to improve - performance, mainly due to reduced cache performance. However, it may make - sense to reduce the number of arenas if an application does not make much - use of the allocation functions.

In addition to multiple arenas, unless - --disable-tcache is specified during configuration, this - allocator supports thread-specific caching for small and large objects, in - order to make it possible to completely avoid synchronization for most - allocation requests. Such caching allows very fast allocation in the - common case, but it increases memory usage and fragmentation, since a - bounded number of objects can remain allocated in each thread cache.

Memory is conceptually broken into equal-sized chunks, where the - chunk size is a power of two that is greater than the page size. Chunks - are always aligned to multiples of the chunk size. This alignment makes it - possible to find metadata for user objects very quickly.

User objects are broken into three categories according to size: - small, large, and huge. Small objects are smaller than one page. Large - objects are smaller than the chunk size. Huge objects are a multiple of - the chunk size. Small and large objects are managed by arenas; huge - objects are managed separately in a single data structure that is shared by - all threads. Huge objects are used by applications infrequently enough - that this single data structure is not a scalability issue.

Each chunk that is managed by an arena tracks its contents as runs of - contiguous pages (unused, backing a set of small objects, or backing one - large object). The combination of chunk alignment and chunk page maps - makes it possible to determine all metadata regarding small and large - allocations in constant time.

Small objects are managed in groups by page runs. Each run maintains - a frontier and free list to track which regions are in use. Allocation - requests that are no more than half the quantum (8 or 16, depending on - architecture) are rounded up to the nearest power of two that is at least - sizeof(double). All other small - object size classes are multiples of the quantum, spaced such that internal - fragmentation is limited to approximately 25% for all but the smallest size - classes. Allocation requests that are larger than the maximum small size - class, but small enough to fit in an arena-managed chunk (see the - "opt.lg_chunk" - option), are - rounded up to the nearest run size. Allocation requests that are too large - to fit in an arena-managed chunk are rounded up to the nearest multiple of - the chunk size.

Allocations are packed tightly together, which can be an issue for - multi-threaded applications. If you need to assure that allocations do not - suffer from cacheline sharing, round your allocation requests up to the - nearest multiple of the cacheline size, or specify cacheline alignment when - allocating.

Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit - system, the size classes in each category are as shown in Table 1.

Table 1. Size classes

CategorySpacingSize
Smalllg[8]
16[16, 32, 48, ..., 128]
32[160, 192, 224, 256]
64[320, 384, 448, 512]
128[640, 768, 896, 1024]
256[1280, 1536, 1792, 2048]
512[2560, 3072, 3584]
Large4 KiB[4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB]
Huge4 MiB[4 MiB, 8 MiB, 12 MiB, ...]

MALLCTL NAMESPACE

The following names are defined in the namespace accessible via the - mallctl*() functions. Value types are - specified in parentheses, their readable/writable statuses are encoded as - rw, r-, -w, or - --, and required build configuration flags follow, if - any. A name element encoded as <i> or - <j> indicates an integer component, where the - integer varies from 0 to some upper value that must be determined via - introspection. In the case of - "stats.arenas.<i>.*" - , - <i> equal to - "arenas.narenas" - can be - used to access the summation of statistics from all arenas. Take special - note of the - "epoch" - mallctl, - which controls refreshing of cached dynamic statistics.

- - "version" - - (const char *) - r- -

Return the jemalloc version string.

- - "epoch" - - (uint64_t) - rw -

If a value is passed in, refresh the data from which - the mallctl*() functions report values, - and increment the epoch. Return the current epoch. This is useful for - detecting whether another thread caused a refresh.

- - "config.debug" - - (bool) - r- -

--enable-debug was specified during - build configuration.

- - "config.dss" - - (bool) - r- -

--enable-dss was specified during - build configuration.

- - "config.fill" - - (bool) - r- -

--enable-fill was specified during - build configuration.

- - "config.lazy_lock" - - (bool) - r- -

--enable-lazy-lock was specified - during build configuration.

- - "config.mremap" - - (bool) - r- -

--enable-mremap was specified during - build configuration.

- - "config.munmap" - - (bool) - r- -

--enable-munmap was specified during - build configuration.

- - "config.prof" - - (bool) - r- -

--enable-prof was specified during - build configuration.

- - "config.prof_libgcc" - - (bool) - r- -

--disable-prof-libgcc was not - specified during build configuration.

- - "config.prof_libunwind" - - (bool) - r- -

--enable-prof-libunwind was specified - during build configuration.

- - "config.stats" - - (bool) - r- -

--enable-stats was specified during - build configuration.

- - "config.tcache" - - (bool) - r- -

--disable-tcache was not specified - during build configuration.

- - "config.tls" - - (bool) - r- -

--disable-tls was not specified during - build configuration.

- - "config.utrace" - - (bool) - r- -

--enable-utrace was specified during - build configuration.

- - "config.valgrind" - - (bool) - r- -

--enable-valgrind was specified during - build configuration.

- - "config.xmalloc" - - (bool) - r- -

--enable-xmalloc was specified during - build configuration.

- - "opt.abort" - - (bool) - r- -

Abort-on-warning enabled/disabled. If true, most - warnings are fatal. The process will call - abort(3) in these cases. This option is - disabled by default unless --enable-debug is - specified during configuration, in which case it is enabled by default. -

- - "opt.lg_chunk" - - (size_t) - r- -

Virtual memory chunk size (log base 2). If a chunk - size outside the supported size range is specified, the size is - silently clipped to the minimum/maximum supported size. The default - chunk size is 4 MiB (2^22). -

- - "opt.dss" - - (const char *) - r- -

dss (sbrk(2)) allocation precedence as - related to mmap(2) allocation. The following - settings are supported: “disabled”, “primary”, - and “secondary” (default).

- - "opt.narenas" - - (size_t) - r- -

Maximum number of arenas to use for automatic - multiplexing of threads and arenas. The default is four times the - number of CPUs, or one if there is a single CPU.

- - "opt.lg_dirty_mult" - - (ssize_t) - r- -

Per-arena minimum ratio (log base 2) of active to dirty - pages. Some dirty unused pages may be allowed to accumulate, within - the limit set by the ratio (or one chunk worth of dirty pages, - whichever is greater), before informing the kernel about some of those - pages via madvise(2) or a similar system call. This - provides the kernel with sufficient information to recycle dirty pages - if physical memory becomes scarce and the pages remain unused. The - default minimum ratio is 8:1 (2^3:1); an option value of -1 will - disable dirty page purging.

- - "opt.stats_print" - - (bool) - r- -

Enable/disable statistics printing at exit. If - enabled, the malloc_stats_print() - function is called at program exit via an - atexit(3) function. If - --enable-stats is specified during configuration, this - has the potential to cause deadlock for a multi-threaded process that - exits while one or more threads are executing in the memory allocation - functions. Therefore, this option should only be used with care; it is - primarily intended as a performance tuning aid during application - development. This option is disabled by default.

- - "opt.junk" - - (bool) - r- - [--enable-fill] -

Junk filling enabled/disabled. If enabled, each byte - of uninitialized allocated memory will be initialized to - 0xa5. All deallocated memory will be initialized to - 0x5a. This is intended for debugging and will - impact performance negatively. This option is disabled by default - unless --enable-debug is specified during - configuration, in which case it is enabled by default unless running - inside Valgrind.

- - "opt.quarantine" - - (size_t) - r- - [--enable-fill] -

Per thread quarantine size in bytes. If non-zero, each - thread maintains a FIFO object quarantine that stores up to the - specified number of bytes of memory. The quarantined memory is not - freed until it is released from quarantine, though it is immediately - junk-filled if the - "opt.junk" - option is - enabled. This feature is of particular use in combination with Valgrind, which can detect attempts - to access quarantined objects. This is intended for debugging and will - impact performance negatively. The default quarantine size is 0 unless - running inside Valgrind, in which case the default is 16 - MiB.

- - "opt.redzone" - - (bool) - r- - [--enable-fill] -

Redzones enabled/disabled. If enabled, small - allocations have redzones before and after them. Furthermore, if the - - "opt.junk" - option is - enabled, the redzones are checked for corruption during deallocation. - However, the primary intended purpose of this feature is to be used in - combination with Valgrind, - which needs redzones in order to do effective buffer overflow/underflow - detection. This option is intended for debugging and will impact - performance negatively. This option is disabled by - default unless running inside Valgrind.

- - "opt.zero" - - (bool) - r- - [--enable-fill] -

Zero filling enabled/disabled. If enabled, each byte - of uninitialized allocated memory will be initialized to 0. Note that - this initialization only happens once for each byte, so - realloc() and - rallocm() calls do not zero memory that - was previously allocated. This is intended for debugging and will - impact performance negatively. This option is disabled by default. -

- - "opt.utrace" - - (bool) - r- - [--enable-utrace] -

Allocation tracing based on - utrace(2) enabled/disabled. This option - is disabled by default.

- - "opt.valgrind" - - (bool) - r- - [--enable-valgrind] -

Valgrind - support enabled/disabled. This option is vestigal because jemalloc - auto-detects whether it is running inside Valgrind. This option is - disabled by default, unless running inside Valgrind.

- - "opt.xmalloc" - - (bool) - r- - [--enable-xmalloc] -

Abort-on-out-of-memory enabled/disabled. If enabled, - rather than returning failure for any allocation function, display a - diagnostic message on STDERR_FILENO and cause the - program to drop core (using - abort(3)). If an application is - designed to depend on this behavior, set the option at compile time by - including the following in the source code: -

-malloc_conf = "xmalloc:true";

- This option is disabled by default.

- - "opt.tcache" - - (bool) - r- - [--enable-tcache] -

Thread-specific caching enabled/disabled. When there - are multiple threads, each thread uses a thread-specific cache for - objects up to a certain size. Thread-specific caching allows many - allocations to be satisfied without performing any thread - synchronization, at the cost of increased memory use. See the - - "opt.lg_tcache_max" - - option for related tuning information. This option is enabled by - default unless running inside Valgrind.

- - "opt.lg_tcache_max" - - (size_t) - r- - [--enable-tcache] -

Maximum size class (log base 2) to cache in the - thread-specific cache. At a minimum, all small size classes are - cached, and at a maximum all large size classes are cached. The - default maximum is 32 KiB (2^15).

- - "opt.prof" - - (bool) - r- - [--enable-prof] -

Memory profiling enabled/disabled. If enabled, profile - memory allocation activity. See the - "opt.prof_active" - - option for on-the-fly activation/deactivation. See the - "opt.lg_prof_sample" - - option for probabilistic sampling control. See the - "opt.prof_accum" - - option for control of cumulative sample reporting. See the - "opt.lg_prof_interval" - - option for information on interval-triggered profile dumping, the - "opt.prof_gdump" - - option for information on high-water-triggered profile dumping, and the - - "opt.prof_final" - - option for final profile dumping. Profile output is compatible with - the included pprof Perl script, which originates - from the gperftools - package.

- - "opt.prof_prefix" - - (const char *) - r- - [--enable-prof] -

Filename prefix for profile dumps. If the prefix is - set to the empty string, no automatic dumps will occur; this is - primarily useful for disabling the automatic final heap dump (which - also disables leak reporting, if enabled). The default prefix is - jeprof.

- - "opt.prof_active" - - (bool) - r- - [--enable-prof] -

Profiling activated/deactivated. This is a secondary - control mechanism that makes it possible to start the application with - profiling enabled (see the - "opt.prof" - option) but - inactive, then toggle profiling at any time during program execution - with the - "prof.active" - mallctl. - This option is enabled by default.

- - "opt.lg_prof_sample" - - (ssize_t) - r- - [--enable-prof] -

Average interval (log base 2) between allocation - samples, as measured in bytes of allocation activity. Increasing the - sampling interval decreases profile fidelity, but also decreases the - computational overhead. The default sample interval is 512 KiB (2^19 - B).

- - "opt.prof_accum" - - (bool) - r- - [--enable-prof] -

Reporting of cumulative object/byte counts in profile - dumps enabled/disabled. If this option is enabled, every unique - backtrace must be stored for the duration of execution. Depending on - the application, this can impose a large memory overhead, and the - cumulative counts are not always of interest. This option is disabled - by default.

- - "opt.lg_prof_interval" - - (ssize_t) - r- - [--enable-prof] -

Average interval (log base 2) between memory profile - dumps, as measured in bytes of allocation activity. The actual - interval between dumps may be sporadic because decentralized allocation - counters are used to avoid synchronization bottlenecks. Profiles are - dumped to files named according to the pattern - <prefix>.<pid>.<seq>.i<iseq>.heap, - where <prefix> is controlled by the - - "opt.prof_prefix" - - option. By default, interval-triggered profile dumping is disabled - (encoded as -1). -

- - "opt.prof_gdump" - - (bool) - r- - [--enable-prof] -

Trigger a memory profile dump every time the total - virtual memory exceeds the previous maximum. Profiles are dumped to - files named according to the pattern - <prefix>.<pid>.<seq>.u<useq>.heap, - where <prefix> is controlled by the - "opt.prof_prefix" - - option. This option is disabled by default.

- - "opt.prof_final" - - (bool) - r- - [--enable-prof] -

Use an - atexit(3) function to dump final memory - usage to a file named according to the pattern - <prefix>.<pid>.<seq>.f.heap, - where <prefix> is controlled by the - "opt.prof_prefix" - - option. This option is enabled by default.

- - "opt.prof_leak" - - (bool) - r- - [--enable-prof] -

Leak reporting enabled/disabled. If enabled, use an - atexit(3) function to report memory leaks - detected by allocation sampling. See the - - "opt.prof" - option for - information on analyzing heap profile output. This option is disabled - by default.

- - "thread.arena" - - (unsigned) - rw -

Get or set the arena associated with the calling - thread. If the specified arena was not initialized beforehand (see the - - "arenas.initialized" - - mallctl), it will be automatically initialized as a side effect of - calling this interface.

- - "thread.allocated" - - (uint64_t) - r- - [--enable-stats] -

Get the total number of bytes ever allocated by the - calling thread. This counter has the potential to wrap around; it is - up to the application to appropriately interpret the counter in such - cases.

- - "thread.allocatedp" - - (uint64_t *) - r- - [--enable-stats] -

Get a pointer to the the value that is returned by the - - "thread.allocated" - - mallctl. This is useful for avoiding the overhead of repeated - mallctl*() calls.

- - "thread.deallocated" - - (uint64_t) - r- - [--enable-stats] -

Get the total number of bytes ever deallocated by the - calling thread. This counter has the potential to wrap around; it is - up to the application to appropriately interpret the counter in such - cases.

- - "thread.deallocatedp" - - (uint64_t *) - r- - [--enable-stats] -

Get a pointer to the the value that is returned by the - - "thread.deallocated" - - mallctl. This is useful for avoiding the overhead of repeated - mallctl*() calls.

- - "thread.tcache.enabled" - - (bool) - rw - [--enable-tcache] -

Enable/disable calling thread's tcache. The tcache is - implicitly flushed as a side effect of becoming - disabled (see - "thread.tcache.flush" - ). -

- - "thread.tcache.flush" - - (void) - -- - [--enable-tcache] -

Flush calling thread's tcache. This interface releases - all cached objects and internal data structures associated with the - calling thread's thread-specific cache. Ordinarily, this interface - need not be called, since automatic periodic incremental garbage - collection occurs, and the thread cache is automatically discarded when - a thread exits. However, garbage collection is triggered by allocation - activity, so it is possible for a thread that stops - allocating/deallocating to retain its cache indefinitely, in which case - the developer may find manual flushing useful.

- - "arena.<i>.purge" - - (unsigned) - -- -

Purge unused dirty pages for arena <i>, or for - all arenas if <i> equals - "arenas.narenas" - . -

- - "arena.<i>.dss" - - (const char *) - rw -

Set the precedence of dss allocation as related to mmap - allocation for arena <i>, or for all arenas if <i> equals - - "arenas.narenas" - . See - - "opt.dss" - for supported - settings. -

- - "arenas.narenas" - - (unsigned) - r- -

Current limit on number of arenas.

- - "arenas.initialized" - - (bool *) - r- -

An array of - "arenas.narenas" - - booleans. Each boolean indicates whether the corresponding arena is - initialized.

- - "arenas.quantum" - - (size_t) - r- -

Quantum size.

- - "arenas.page" - - (size_t) - r- -

Page size.

- - "arenas.tcache_max" - - (size_t) - r- - [--enable-tcache] -

Maximum thread-cached size class.

- - "arenas.nbins" - - (unsigned) - r- -

Number of bin size classes.

- - "arenas.nhbins" - - (unsigned) - r- - [--enable-tcache] -

Total number of thread cache bin size - classes.

- - "arenas.bin.<i>.size" - - (size_t) - r- -

Maximum size supported by size class.

- - "arenas.bin.<i>.nregs" - - (uint32_t) - r- -

Number of regions per page run.

- - "arenas.bin.<i>.run_size" - - (size_t) - r- -

Number of bytes per page run.

- - "arenas.nlruns" - - (size_t) - r- -

Total number of large size classes.

- - "arenas.lrun.<i>.size" - - (size_t) - r- -

Maximum size supported by this large size - class.

- - "arenas.purge" - - (unsigned) - -w -

Purge unused dirty pages for the specified arena, or - for all arenas if none is specified.

- - "arenas.extend" - - (unsigned) - r- -

Extend the array of arenas by appending a new arena, - and returning the new arena index.

- - "prof.active" - - (bool) - rw - [--enable-prof] -

Control whether sampling is currently active. See the - - "opt.prof_active" - - option for additional information. -

- - "prof.dump" - - (const char *) - -w - [--enable-prof] -

Dump a memory profile to the specified file, or if NULL - is specified, to a file according to the pattern - <prefix>.<pid>.<seq>.m<mseq>.heap, - where <prefix> is controlled by the - - "opt.prof_prefix" - - option.

- - "prof.interval" - - (uint64_t) - r- - [--enable-prof] -

Average number of bytes allocated between - inverval-based profile dumps. See the - - "opt.lg_prof_interval" - - option for additional information.

- - "stats.cactive" - - (size_t *) - r- - [--enable-stats] -

Pointer to a counter that contains an approximate count - of the current number of bytes in active pages. The estimate may be - high, but never low, because each arena rounds up to the nearest - multiple of the chunk size when computing its contribution to the - counter. Note that the - "epoch" - mallctl has no bearing - on this counter. Furthermore, counter consistency is maintained via - atomic operations, so it is necessary to use an atomic operation in - order to guarantee a consistent read when dereferencing the pointer. -

- - "stats.allocated" - - (size_t) - r- - [--enable-stats] -

Total number of bytes allocated by the - application.

- - "stats.active" - - (size_t) - r- - [--enable-stats] -

Total number of bytes in active pages allocated by the - application. This is a multiple of the page size, and greater than or - equal to - "stats.allocated" - . - This does not include - - "stats.arenas.<i>.pdirty" - and pages - entirely devoted to allocator metadata.

- - "stats.mapped" - - (size_t) - r- - [--enable-stats] -

Total number of bytes in chunks mapped on behalf of the - application. This is a multiple of the chunk size, and is at least as - large as - "stats.active" - . This - does not include inactive chunks.

- - "stats.chunks.current" - - (size_t) - r- - [--enable-stats] -

Total number of chunks actively mapped on behalf of the - application. This does not include inactive chunks. -

- - "stats.chunks.total" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of chunks allocated.

- - "stats.chunks.high" - - (size_t) - r- - [--enable-stats] -

Maximum number of active chunks at any time thus far. -

- - "stats.huge.allocated" - - (size_t) - r- - [--enable-stats] -

Number of bytes currently allocated by huge objects. -

- - "stats.huge.nmalloc" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of huge allocation requests. -

- - "stats.huge.ndalloc" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of huge deallocation requests. -

- - "stats.arenas.<i>.dss" - - (const char *) - r- -

dss (sbrk(2)) allocation precedence as - related to mmap(2) allocation. See - "opt.dss" - for details. -

- - "stats.arenas.<i>.nthreads" - - (unsigned) - r- -

Number of threads currently assigned to - arena.

- - "stats.arenas.<i>.pactive" - - (size_t) - r- -

Number of pages in active runs.

- - "stats.arenas.<i>.pdirty" - - (size_t) - r- -

Number of pages within unused runs that are potentially - dirty, and for which madvise(..., - MADV_DONTNEED) or - similar has not been called.

- - "stats.arenas.<i>.mapped" - - (size_t) - r- - [--enable-stats] -

Number of mapped bytes.

- - "stats.arenas.<i>.npurge" - - (uint64_t) - r- - [--enable-stats] -

Number of dirty page purge sweeps performed. -

- - "stats.arenas.<i>.nmadvise" - - (uint64_t) - r- - [--enable-stats] -

Number of madvise(..., - MADV_DONTNEED) or - similar calls made to purge dirty pages.

- - "stats.arenas.<i>.npurged" - - (uint64_t) - r- - [--enable-stats] -

Number of pages purged.

- - "stats.arenas.<i>.small.allocated" - - (size_t) - r- - [--enable-stats] -

Number of bytes currently allocated by small objects. -

- - "stats.arenas.<i>.small.nmalloc" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of allocation requests served by - small bins.

- - "stats.arenas.<i>.small.ndalloc" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of small objects returned to bins. -

- - "stats.arenas.<i>.small.nrequests" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of small allocation requests. -

- - "stats.arenas.<i>.large.allocated" - - (size_t) - r- - [--enable-stats] -

Number of bytes currently allocated by large objects. -

- - "stats.arenas.<i>.large.nmalloc" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of large allocation requests served - directly by the arena.

- - "stats.arenas.<i>.large.ndalloc" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of large deallocation requests served - directly by the arena.

- - "stats.arenas.<i>.large.nrequests" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of large allocation requests. -

- - "stats.arenas.<i>.bins.<j>.allocated" - - (size_t) - r- - [--enable-stats] -

Current number of bytes allocated by - bin.

- - "stats.arenas.<i>.bins.<j>.nmalloc" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of allocations served by bin. -

- - "stats.arenas.<i>.bins.<j>.ndalloc" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of allocations returned to bin. -

- - "stats.arenas.<i>.bins.<j>.nrequests" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of allocation - requests.

- - "stats.arenas.<i>.bins.<j>.nfills" - - (uint64_t) - r- - [--enable-stats --enable-tcache] -

Cumulative number of tcache fills.

- - "stats.arenas.<i>.bins.<j>.nflushes" - - (uint64_t) - r- - [--enable-stats --enable-tcache] -

Cumulative number of tcache flushes.

- - "stats.arenas.<i>.bins.<j>.nruns" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of runs created.

- - "stats.arenas.<i>.bins.<j>.nreruns" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of times the current run from which - to allocate changed.

- - "stats.arenas.<i>.bins.<j>.curruns" - - (size_t) - r- - [--enable-stats] -

Current number of runs.

- - "stats.arenas.<i>.lruns.<j>.nmalloc" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of allocation requests for this size - class served directly by the arena.

- - "stats.arenas.<i>.lruns.<j>.ndalloc" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of deallocation requests for this - size class served directly by the arena.

- - "stats.arenas.<i>.lruns.<j>.nrequests" - - (uint64_t) - r- - [--enable-stats] -

Cumulative number of allocation requests for this size - class.

- - "stats.arenas.<i>.lruns.<j>.curruns" - - (size_t) - r- - [--enable-stats] -

Current number of runs for this size class. -

DEBUGGING MALLOC PROBLEMS

When debugging, it is a good idea to configure/build jemalloc with - the --enable-debug and --enable-fill - options, and recompile the program with suitable options and symbols for - debugger support. When so configured, jemalloc incorporates a wide variety - of run-time assertions that catch application errors such as double-free, - write-after-free, etc.

Programs often accidentally depend on “uninitialized” - memory actually being filled with zero bytes. Junk filling - (see the - "opt.junk" - - option) tends to expose such bugs in the form of obviously incorrect - results and/or coredumps. Conversely, zero - filling (see the - "opt.zero" - option) eliminates - the symptoms of such bugs. Between these two options, it is usually - possible to quickly detect, diagnose, and eliminate such bugs.

This implementation does not provide much detail about the problems - it detects, because the performance impact for storing such information - would be prohibitive. However, jemalloc does integrate with the most - excellent Valgrind tool if the - --enable-valgrind configuration option is enabled.

DIAGNOSTIC MESSAGES

If any of the memory allocation/deallocation functions detect an - error or warning condition, a message will be printed to file descriptor - STDERR_FILENO. Errors will result in the process - dumping core. If the - "opt.abort" - option is set, most - warnings are treated as errors.

The malloc_message variable allows the programmer - to override the function which emits the text strings forming the errors - and warnings if for some reason the STDERR_FILENO file - descriptor is not suitable for this. - malloc_message() takes the - cbopaque pointer argument that is - NULL unless overridden by the arguments in a call to - malloc_stats_print(), followed by a string - pointer. Please note that doing anything which tries to allocate memory in - this function is likely to result in a crash or deadlock.

All messages are prefixed by - “<jemalloc>: ”.

RETURN VALUES

Standard API

The malloc() and - calloc() functions return a pointer to the - allocated memory if successful; otherwise a NULL - pointer is returned and errno is set to - ENOMEM.

The posix_memalign() function - returns the value 0 if successful; otherwise it returns an error value. - The posix_memalign() function will fail - if: -

EINVAL

The alignment parameter is - not a power of 2 at least as large as - sizeof(void *). -

ENOMEM

Memory allocation error.

-

The aligned_alloc() function returns - a pointer to the allocated memory if successful; otherwise a - NULL pointer is returned and - errno is set. The - aligned_alloc() function will fail if: -

EINVAL

The alignment parameter is - not a power of 2. -

ENOMEM

Memory allocation error.

-

The realloc() function returns a - pointer, possibly identical to ptr, to the - allocated memory if successful; otherwise a NULL - pointer is returned, and errno is set to - ENOMEM if the error was the result of an - allocation failure. The realloc() - function always leaves the original buffer intact when an error occurs. -

The free() function returns no - value.

Non-standard API

The malloc_usable_size() function - returns the usable size of the allocation pointed to by - ptr.

The mallctl(), - mallctlnametomib(), and - mallctlbymib() functions return 0 on - success; otherwise they return an error value. The functions will fail - if: -

EINVAL

newp is not - NULL, and newlen is too - large or too small. Alternatively, *oldlenp - is too large or too small; in this case as much data as possible - are read despite the error.

ENOMEM

*oldlenp is too short to - hold the requested value.

ENOENT

name or - mib specifies an unknown/invalid - value.

EPERM

Attempt to read or write void value, or attempt to - write read-only value.

EAGAIN

A memory allocation failure - occurred.

EFAULT

An interface with side effects failed in some way - not directly related to mallctl*() - read/write processing.

-

Experimental API

The allocm(), - rallocm(), - sallocm(), - dallocm(), and - nallocm() functions return - ALLOCM_SUCCESS on success; otherwise they return an - error value. The allocm(), - rallocm(), and - nallocm() functions will fail if: -

ALLOCM_ERR_OOM

Out of memory. Insufficient contiguous memory was - available to service the allocation request. The - allocm() function additionally sets - *ptr to NULL, whereas - the rallocm() function leaves - *ptr unmodified.

- The rallocm() function will also - fail if: -

ALLOCM_ERR_NOT_MOVED

ALLOCM_NO_MOVE was specified, - but the reallocation request could not be serviced without moving - the object.

-

ENVIRONMENT

The following environment variable affects the execution of the - allocation functions: -

MALLOC_CONF

If the environment variable - MALLOC_CONF is set, the characters it contains - will be interpreted as options.

-

EXAMPLES

To dump core whenever a problem occurs: -

ln -s 'abort:true' /etc/malloc.conf

-

To specify in the source a chunk size that is 16 MiB: -

-malloc_conf = "lg_chunk:24";

SEE ALSO

madvise(2), - mmap(2), - sbrk(2), - utrace(2), - alloca(3), - atexit(3), - getpagesize(3)

STANDARDS

The malloc(), - calloc(), - realloc(), and - free() functions conform to ISO/IEC - 9899:1990 (“ISO C90”).

The posix_memalign() function conforms - to IEEE Std 1003.1-2001 (“POSIX.1”).

diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/doc/jemalloc.xml.in mariadb-5.5-5.5.40/extra/jemalloc/doc/jemalloc.xml.in --- mariadb-5.5-5.5.39/extra/jemalloc/doc/jemalloc.xml.in 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/doc/jemalloc.xml.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,2176 +0,0 @@ - - - - - - - User Manual - jemalloc - @jemalloc_version@ - - - Jason - Evans - Author - - - - - JEMALLOC - 3 - - - jemalloc - jemalloc - - general purpose memory allocation functions - - - LIBRARY - This manual describes jemalloc @jemalloc_version@. More information - can be found at the jemalloc website. - - - SYNOPSIS - - #include <stdlib.h> -#include <jemalloc/jemalloc.h> - - Standard API - - void *malloc - size_t size - - - void *calloc - size_t number - size_t size - - - int posix_memalign - void **ptr - size_t alignment - size_t size - - - void *aligned_alloc - size_t alignment - size_t size - - - void *realloc - void *ptr - size_t size - - - void free - void *ptr - - - - Non-standard API - - size_t malloc_usable_size - const void *ptr - - - void malloc_stats_print - void (*write_cb) - void *, const char * - - void *cbopaque - const char *opts - - - int mallctl - const char *name - void *oldp - size_t *oldlenp - void *newp - size_t newlen - - - int mallctlnametomib - const char *name - size_t *mibp - size_t *miblenp - - - int mallctlbymib - const size_t *mib - size_t miblen - void *oldp - size_t *oldlenp - void *newp - size_t newlen - - - void (*malloc_message) - void *cbopaque - const char *s - - const char *malloc_conf; - - - Experimental API - - int allocm - void **ptr - size_t *rsize - size_t size - int flags - - - int rallocm - void **ptr - size_t *rsize - size_t size - size_t extra - int flags - - - int sallocm - const void *ptr - size_t *rsize - int flags - - - int dallocm - void *ptr - int flags - - - int nallocm - size_t *rsize - size_t size - int flags - - - - - - DESCRIPTION - - Standard API - - The malloc function allocates - size bytes of uninitialized memory. The allocated - space is suitably aligned (after possible pointer coercion) for storage - of any type of object. - - The calloc function allocates - space for number objects, each - size bytes in length. The result is identical to - calling malloc with an argument of - number * size, with the - exception that the allocated memory is explicitly initialized to zero - bytes. - - The posix_memalign function - allocates size bytes of memory such that the - allocation's base address is an even multiple of - alignment, and returns the allocation in the value - pointed to by ptr. The requested - alignment must be a power of 2 at least as large - as sizeof(void *). - - The aligned_alloc function - allocates size bytes of memory such that the - allocation's base address is an even multiple of - alignment. The requested - alignment must be a power of 2. Behavior is - undefined if size is not an integral multiple of - alignment. - - The realloc function changes the - size of the previously allocated memory referenced by - ptr to size bytes. The - contents of the memory are unchanged up to the lesser of the new and old - sizes. If the new size is larger, the contents of the newly allocated - portion of the memory are undefined. Upon success, the memory referenced - by ptr is freed and a pointer to the newly - allocated memory is returned. Note that - realloc may move the memory allocation, - resulting in a different return value than ptr. - If ptr is NULL, the - realloc function behaves identically to - malloc for the specified size. - - The free function causes the - allocated memory referenced by ptr to be made - available for future allocations. If ptr is - NULL, no action occurs. - - - Non-standard API - - The malloc_usable_size function - returns the usable size of the allocation pointed to by - ptr. The return value may be larger than the size - that was requested during allocation. The - malloc_usable_size function is not a - mechanism for in-place realloc; rather - it is provided solely as a tool for introspection purposes. Any - discrepancy between the requested allocation size and the size reported - by malloc_usable_size should not be - depended on, since such behavior is entirely implementation-dependent. - - - The malloc_stats_print function - writes human-readable summary statistics via the - write_cb callback function pointer and - cbopaque data passed to - write_cb, or - malloc_message if - write_cb is NULL. This - function can be called repeatedly. General information that never - changes during execution can be omitted by specifying "g" as a character - within the opts string. Note that - malloc_message uses the - mallctl* functions internally, so - inconsistent statistics can be reported if multiple threads use these - functions simultaneously. If is - specified during configuration, “m” and “a” can - be specified to omit merged arena and per arena statistics, respectively; - “b” and “l” can be specified to omit per size - class statistics for bins and large objects, respectively. Unrecognized - characters are silently ignored. Note that thread caching may prevent - some statistics from being completely up to date, since extra locking - would be required to merge counters that track thread cache operations. - - - The mallctl function provides a - general interface for introspecting the memory allocator, as well as - setting modifiable parameters and triggering actions. The - period-separated name argument specifies a - location in a tree-structured namespace; see the section for - documentation on the tree contents. To read a value, pass a pointer via - oldp to adequate space to contain the value, and a - pointer to its length via oldlenp; otherwise pass - NULL and NULL. Similarly, to - write a value, pass a pointer to the value via - newp, and its length via - newlen; otherwise pass NULL - and 0. - - The mallctlnametomib function - provides a way to avoid repeated name lookups for applications that - repeatedly query the same portion of the namespace, by translating a name - to a “Management Information Base” (MIB) that can be passed - repeatedly to mallctlbymib. Upon - successful return from mallctlnametomib, - mibp contains an array of - *miblenp integers, where - *miblenp is the lesser of the number of components - in name and the input value of - *miblenp. Thus it is possible to pass a - *miblenp that is smaller than the number of - period-separated name components, which results in a partial MIB that can - be used as the basis for constructing a complete MIB. For name - components that are integers (e.g. the 2 in - arenas.bin.2.size), - the corresponding MIB component will always be that integer. Therefore, - it is legitimate to construct code like the following: - - - Experimental API - The experimental API is subject to change or removal without regard - for backward compatibility. If - is specified during configuration, the experimental API is - omitted. - - The allocm, - rallocm, - sallocm, - dallocm, and - nallocm functions all have a - flags argument that can be used to specify - options. The functions only check the options that are contextually - relevant. Use bitwise or (|) operations to - specify one or more of the following: - - - ALLOCM_LG_ALIGN(la) - - - Align the memory allocation to start at an address - that is a multiple of (1 << - la). This macro does not validate - that la is within the valid - range. - - - ALLOCM_ALIGN(a) - - - Align the memory allocation to start at an address - that is a multiple of a, where - a is a power of two. This macro does not - validate that a is a power of 2. - - - - ALLOCM_ZERO - - Initialize newly allocated memory to contain zero - bytes. In the growing reallocation case, the real size prior to - reallocation defines the boundary between untouched bytes and those - that are initialized to contain zero bytes. If this option is - absent, newly allocated memory is uninitialized. - - - ALLOCM_NO_MOVE - - For reallocation, fail rather than moving the - object. This constraint can apply to both growth and - shrinkage. - - - ALLOCM_ARENA(a) - - - Use the arena specified by the index - a. This macro does not validate that - a specifies an arena in the valid - range. - - - - - The allocm function allocates at - least size bytes of memory, sets - *ptr to the base address of the allocation, and - sets *rsize to the real size of the allocation if - rsize is not NULL. Behavior - is undefined if size is - 0. - - The rallocm function resizes the - allocation at *ptr to be at least - size bytes, sets *ptr to - the base address of the allocation if it moved, and sets - *rsize to the real size of the allocation if - rsize is not NULL. If - extra is non-zero, an attempt is made to resize - the allocation to be at least size + - extra) bytes, though inability to allocate - the extra byte(s) will not by itself result in failure. Behavior is - undefined if size is 0, or if - (size + - extra > - SIZE_T_MAX). - - The sallocm function sets - *rsize to the real size of the allocation. - - The dallocm function causes the - memory referenced by ptr to be made available for - future allocations. - - The nallocm function allocates no - memory, but it performs the same size computation as the - allocm function, and if - rsize is not NULL it sets - *rsize to the real size of the allocation that - would result from the equivalent allocm - function call. Behavior is undefined if - size is 0. - - - - TUNING - Once, when the first call is made to one of the memory allocation - routines, the allocator initializes its internals based in part on various - options that can be specified at compile- or run-time. - - The string pointed to by the global variable - malloc_conf, the “name” of the file - referenced by the symbolic link named /etc/malloc.conf, and the value of the - environment variable MALLOC_CONF, will be interpreted, in - that order, from left to right as options. - - An options string is a comma-separated list of option:value pairs. - There is one key corresponding to each opt.* mallctl (see the section for options - documentation). For example, abort:true,narenas:1 sets - the opt.abort and opt.narenas options. Some - options have boolean values (true/false), others have integer values (base - 8, 10, or 16, depending on prefix), and yet others have raw string - values. - - - IMPLEMENTATION NOTES - Traditionally, allocators have used - sbrk - 2 to obtain memory, which is - suboptimal for several reasons, including race conditions, increased - fragmentation, and artificial limitations on maximum usable memory. If - is specified during configuration, this - allocator uses both mmap - 2 and - sbrk - 2, in that order of preference; - otherwise only mmap - 2 is used. - - This allocator uses multiple arenas in order to reduce lock - contention for threaded programs on multi-processor systems. This works - well with regard to threading scalability, but incurs some costs. There is - a small fixed per-arena overhead, and additionally, arenas manage memory - completely independently of each other, which means a small fixed increase - in overall memory fragmentation. These overheads are not generally an - issue, given the number of arenas normally used. Note that using - substantially more arenas than the default is not likely to improve - performance, mainly due to reduced cache performance. However, it may make - sense to reduce the number of arenas if an application does not make much - use of the allocation functions. - - In addition to multiple arenas, unless - is specified during configuration, this - allocator supports thread-specific caching for small and large objects, in - order to make it possible to completely avoid synchronization for most - allocation requests. Such caching allows very fast allocation in the - common case, but it increases memory usage and fragmentation, since a - bounded number of objects can remain allocated in each thread cache. - - Memory is conceptually broken into equal-sized chunks, where the - chunk size is a power of two that is greater than the page size. Chunks - are always aligned to multiples of the chunk size. This alignment makes it - possible to find metadata for user objects very quickly. - - User objects are broken into three categories according to size: - small, large, and huge. Small objects are smaller than one page. Large - objects are smaller than the chunk size. Huge objects are a multiple of - the chunk size. Small and large objects are managed by arenas; huge - objects are managed separately in a single data structure that is shared by - all threads. Huge objects are used by applications infrequently enough - that this single data structure is not a scalability issue. - - Each chunk that is managed by an arena tracks its contents as runs of - contiguous pages (unused, backing a set of small objects, or backing one - large object). The combination of chunk alignment and chunk page maps - makes it possible to determine all metadata regarding small and large - allocations in constant time. - - Small objects are managed in groups by page runs. Each run maintains - a frontier and free list to track which regions are in use. Allocation - requests that are no more than half the quantum (8 or 16, depending on - architecture) are rounded up to the nearest power of two that is at least - sizeof(double). All other small - object size classes are multiples of the quantum, spaced such that internal - fragmentation is limited to approximately 25% for all but the smallest size - classes. Allocation requests that are larger than the maximum small size - class, but small enough to fit in an arena-managed chunk (see the opt.lg_chunk option), are - rounded up to the nearest run size. Allocation requests that are too large - to fit in an arena-managed chunk are rounded up to the nearest multiple of - the chunk size. - - Allocations are packed tightly together, which can be an issue for - multi-threaded applications. If you need to assure that allocations do not - suffer from cacheline sharing, round your allocation requests up to the - nearest multiple of the cacheline size, or specify cacheline alignment when - allocating. - - Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit - system, the size classes in each category are as shown in . - - - Size classes - - - - - - - Category - Spacing - Size - - - - - Small - lg - [8] - - - 16 - [16, 32, 48, ..., 128] - - - 32 - [160, 192, 224, 256] - - - 64 - [320, 384, 448, 512] - - - 128 - [640, 768, 896, 1024] - - - 256 - [1280, 1536, 1792, 2048] - - - 512 - [2560, 3072, 3584] - - - Large - 4 KiB - [4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB] - - - Huge - 4 MiB - [4 MiB, 8 MiB, 12 MiB, ...] - - - -
-
- - MALLCTL NAMESPACE - The following names are defined in the namespace accessible via the - mallctl* functions. Value types are - specified in parentheses, their readable/writable statuses are encoded as - rw, r-, -w, or - --, and required build configuration flags follow, if - any. A name element encoded as <i> or - <j> indicates an integer component, where the - integer varies from 0 to some upper value that must be determined via - introspection. In the case of stats.arenas.<i>.*, - <i> equal to arenas.narenas can be - used to access the summation of statistics from all arenas. Take special - note of the epoch mallctl, - which controls refreshing of cached dynamic statistics. - - - - - version - (const char *) - r- - - Return the jemalloc version string. - - - - - epoch - (uint64_t) - rw - - If a value is passed in, refresh the data from which - the mallctl* functions report values, - and increment the epoch. Return the current epoch. This is useful for - detecting whether another thread caused a refresh. - - - - - config.debug - (bool) - r- - - was specified during - build configuration. - - - - - config.dss - (bool) - r- - - was specified during - build configuration. - - - - - config.fill - (bool) - r- - - was specified during - build configuration. - - - - - config.lazy_lock - (bool) - r- - - was specified - during build configuration. - - - - - config.mremap - (bool) - r- - - was specified during - build configuration. - - - - - config.munmap - (bool) - r- - - was specified during - build configuration. - - - - - config.prof - (bool) - r- - - was specified during - build configuration. - - - - - config.prof_libgcc - (bool) - r- - - was not - specified during build configuration. - - - - - config.prof_libunwind - (bool) - r- - - was specified - during build configuration. - - - - - config.stats - (bool) - r- - - was specified during - build configuration. - - - - - config.tcache - (bool) - r- - - was not specified - during build configuration. - - - - - config.tls - (bool) - r- - - was not specified during - build configuration. - - - - - config.utrace - (bool) - r- - - was specified during - build configuration. - - - - - config.valgrind - (bool) - r- - - was specified during - build configuration. - - - - - config.xmalloc - (bool) - r- - - was specified during - build configuration. - - - - - opt.abort - (bool) - r- - - Abort-on-warning enabled/disabled. If true, most - warnings are fatal. The process will call - abort - 3 in these cases. This option is - disabled by default unless is - specified during configuration, in which case it is enabled by default. - - - - - - opt.lg_chunk - (size_t) - r- - - Virtual memory chunk size (log base 2). If a chunk - size outside the supported size range is specified, the size is - silently clipped to the minimum/maximum supported size. The default - chunk size is 4 MiB (2^22). - - - - - - opt.dss - (const char *) - r- - - dss (sbrk - 2) allocation precedence as - related to mmap - 2 allocation. The following - settings are supported: “disabled”, “primary”, - and “secondary” (default). - - - - - opt.narenas - (size_t) - r- - - Maximum number of arenas to use for automatic - multiplexing of threads and arenas. The default is four times the - number of CPUs, or one if there is a single CPU. - - - - - opt.lg_dirty_mult - (ssize_t) - r- - - Per-arena minimum ratio (log base 2) of active to dirty - pages. Some dirty unused pages may be allowed to accumulate, within - the limit set by the ratio (or one chunk worth of dirty pages, - whichever is greater), before informing the kernel about some of those - pages via madvise - 2 or a similar system call. This - provides the kernel with sufficient information to recycle dirty pages - if physical memory becomes scarce and the pages remain unused. The - default minimum ratio is 8:1 (2^3:1); an option value of -1 will - disable dirty page purging. - - - - - opt.stats_print - (bool) - r- - - Enable/disable statistics printing at exit. If - enabled, the malloc_stats_print - function is called at program exit via an - atexit - 3 function. If - is specified during configuration, this - has the potential to cause deadlock for a multi-threaded process that - exits while one or more threads are executing in the memory allocation - functions. Therefore, this option should only be used with care; it is - primarily intended as a performance tuning aid during application - development. This option is disabled by default. - - - - - opt.junk - (bool) - r- - [] - - Junk filling enabled/disabled. If enabled, each byte - of uninitialized allocated memory will be initialized to - 0xa5. All deallocated memory will be initialized to - 0x5a. This is intended for debugging and will - impact performance negatively. This option is disabled by default - unless is specified during - configuration, in which case it is enabled by default unless running - inside Valgrind. - - - - - opt.quarantine - (size_t) - r- - [] - - Per thread quarantine size in bytes. If non-zero, each - thread maintains a FIFO object quarantine that stores up to the - specified number of bytes of memory. The quarantined memory is not - freed until it is released from quarantine, though it is immediately - junk-filled if the opt.junk option is - enabled. This feature is of particular use in combination with Valgrind, which can detect attempts - to access quarantined objects. This is intended for debugging and will - impact performance negatively. The default quarantine size is 0 unless - running inside Valgrind, in which case the default is 16 - MiB. - - - - - opt.redzone - (bool) - r- - [] - - Redzones enabled/disabled. If enabled, small - allocations have redzones before and after them. Furthermore, if the - opt.junk option is - enabled, the redzones are checked for corruption during deallocation. - However, the primary intended purpose of this feature is to be used in - combination with Valgrind, - which needs redzones in order to do effective buffer overflow/underflow - detection. This option is intended for debugging and will impact - performance negatively. This option is disabled by - default unless running inside Valgrind. - - - - - opt.zero - (bool) - r- - [] - - Zero filling enabled/disabled. If enabled, each byte - of uninitialized allocated memory will be initialized to 0. Note that - this initialization only happens once for each byte, so - realloc and - rallocm calls do not zero memory that - was previously allocated. This is intended for debugging and will - impact performance negatively. This option is disabled by default. - - - - - - opt.utrace - (bool) - r- - [] - - Allocation tracing based on - utrace - 2 enabled/disabled. This option - is disabled by default. - - - - - opt.valgrind - (bool) - r- - [] - - Valgrind - support enabled/disabled. This option is vestigal because jemalloc - auto-detects whether it is running inside Valgrind. This option is - disabled by default, unless running inside Valgrind. - - - - - opt.xmalloc - (bool) - r- - [] - - Abort-on-out-of-memory enabled/disabled. If enabled, - rather than returning failure for any allocation function, display a - diagnostic message on STDERR_FILENO and cause the - program to drop core (using - abort - 3). If an application is - designed to depend on this behavior, set the option at compile time by - including the following in the source code: - - This option is disabled by default. - - - - - opt.tcache - (bool) - r- - [] - - Thread-specific caching enabled/disabled. When there - are multiple threads, each thread uses a thread-specific cache for - objects up to a certain size. Thread-specific caching allows many - allocations to be satisfied without performing any thread - synchronization, at the cost of increased memory use. See the - opt.lg_tcache_max - option for related tuning information. This option is enabled by - default unless running inside Valgrind. - - - - - opt.lg_tcache_max - (size_t) - r- - [] - - Maximum size class (log base 2) to cache in the - thread-specific cache. At a minimum, all small size classes are - cached, and at a maximum all large size classes are cached. The - default maximum is 32 KiB (2^15). - - - - - opt.prof - (bool) - r- - [] - - Memory profiling enabled/disabled. If enabled, profile - memory allocation activity. See the opt.prof_active - option for on-the-fly activation/deactivation. See the opt.lg_prof_sample - option for probabilistic sampling control. See the opt.prof_accum - option for control of cumulative sample reporting. See the opt.lg_prof_interval - option for information on interval-triggered profile dumping, the opt.prof_gdump - option for information on high-water-triggered profile dumping, and the - opt.prof_final - option for final profile dumping. Profile output is compatible with - the included pprof Perl script, which originates - from the gperftools - package. - - - - - opt.prof_prefix - (const char *) - r- - [] - - Filename prefix for profile dumps. If the prefix is - set to the empty string, no automatic dumps will occur; this is - primarily useful for disabling the automatic final heap dump (which - also disables leak reporting, if enabled). The default prefix is - jeprof. - - - - - opt.prof_active - (bool) - r- - [] - - Profiling activated/deactivated. This is a secondary - control mechanism that makes it possible to start the application with - profiling enabled (see the opt.prof option) but - inactive, then toggle profiling at any time during program execution - with the prof.active mallctl. - This option is enabled by default. - - - - - opt.lg_prof_sample - (ssize_t) - r- - [] - - Average interval (log base 2) between allocation - samples, as measured in bytes of allocation activity. Increasing the - sampling interval decreases profile fidelity, but also decreases the - computational overhead. The default sample interval is 512 KiB (2^19 - B). - - - - - opt.prof_accum - (bool) - r- - [] - - Reporting of cumulative object/byte counts in profile - dumps enabled/disabled. If this option is enabled, every unique - backtrace must be stored for the duration of execution. Depending on - the application, this can impose a large memory overhead, and the - cumulative counts are not always of interest. This option is disabled - by default. - - - - - opt.lg_prof_interval - (ssize_t) - r- - [] - - Average interval (log base 2) between memory profile - dumps, as measured in bytes of allocation activity. The actual - interval between dumps may be sporadic because decentralized allocation - counters are used to avoid synchronization bottlenecks. Profiles are - dumped to files named according to the pattern - <prefix>.<pid>.<seq>.i<iseq>.heap, - where <prefix> is controlled by the - opt.prof_prefix - option. By default, interval-triggered profile dumping is disabled - (encoded as -1). - - - - - - opt.prof_gdump - (bool) - r- - [] - - Trigger a memory profile dump every time the total - virtual memory exceeds the previous maximum. Profiles are dumped to - files named according to the pattern - <prefix>.<pid>.<seq>.u<useq>.heap, - where <prefix> is controlled by the opt.prof_prefix - option. This option is disabled by default. - - - - - opt.prof_final - (bool) - r- - [] - - Use an - atexit - 3 function to dump final memory - usage to a file named according to the pattern - <prefix>.<pid>.<seq>.f.heap, - where <prefix> is controlled by the opt.prof_prefix - option. This option is enabled by default. - - - - - opt.prof_leak - (bool) - r- - [] - - Leak reporting enabled/disabled. If enabled, use an - atexit - 3 function to report memory leaks - detected by allocation sampling. See the - opt.prof option for - information on analyzing heap profile output. This option is disabled - by default. - - - - - thread.arena - (unsigned) - rw - - Get or set the arena associated with the calling - thread. If the specified arena was not initialized beforehand (see the - arenas.initialized - mallctl), it will be automatically initialized as a side effect of - calling this interface. - - - - - thread.allocated - (uint64_t) - r- - [] - - Get the total number of bytes ever allocated by the - calling thread. This counter has the potential to wrap around; it is - up to the application to appropriately interpret the counter in such - cases. - - - - - thread.allocatedp - (uint64_t *) - r- - [] - - Get a pointer to the the value that is returned by the - thread.allocated - mallctl. This is useful for avoiding the overhead of repeated - mallctl* calls. - - - - - thread.deallocated - (uint64_t) - r- - [] - - Get the total number of bytes ever deallocated by the - calling thread. This counter has the potential to wrap around; it is - up to the application to appropriately interpret the counter in such - cases. - - - - - thread.deallocatedp - (uint64_t *) - r- - [] - - Get a pointer to the the value that is returned by the - thread.deallocated - mallctl. This is useful for avoiding the overhead of repeated - mallctl* calls. - - - - - thread.tcache.enabled - (bool) - rw - [] - - Enable/disable calling thread's tcache. The tcache is - implicitly flushed as a side effect of becoming - disabled (see thread.tcache.flush). - - - - - - thread.tcache.flush - (void) - -- - [] - - Flush calling thread's tcache. This interface releases - all cached objects and internal data structures associated with the - calling thread's thread-specific cache. Ordinarily, this interface - need not be called, since automatic periodic incremental garbage - collection occurs, and the thread cache is automatically discarded when - a thread exits. However, garbage collection is triggered by allocation - activity, so it is possible for a thread that stops - allocating/deallocating to retain its cache indefinitely, in which case - the developer may find manual flushing useful. - - - - - arena.<i>.purge - (unsigned) - -- - - Purge unused dirty pages for arena <i>, or for - all arenas if <i> equals arenas.narenas. - - - - - - arena.<i>.dss - (const char *) - rw - - Set the precedence of dss allocation as related to mmap - allocation for arena <i>, or for all arenas if <i> equals - arenas.narenas. See - opt.dss for supported - settings. - - - - - - arenas.narenas - (unsigned) - r- - - Current limit on number of arenas. - - - - - arenas.initialized - (bool *) - r- - - An array of arenas.narenas - booleans. Each boolean indicates whether the corresponding arena is - initialized. - - - - - arenas.quantum - (size_t) - r- - - Quantum size. - - - - - arenas.page - (size_t) - r- - - Page size. - - - - - arenas.tcache_max - (size_t) - r- - [] - - Maximum thread-cached size class. - - - - - arenas.nbins - (unsigned) - r- - - Number of bin size classes. - - - - - arenas.nhbins - (unsigned) - r- - [] - - Total number of thread cache bin size - classes. - - - - - arenas.bin.<i>.size - (size_t) - r- - - Maximum size supported by size class. - - - - - arenas.bin.<i>.nregs - (uint32_t) - r- - - Number of regions per page run. - - - - - arenas.bin.<i>.run_size - (size_t) - r- - - Number of bytes per page run. - - - - - arenas.nlruns - (size_t) - r- - - Total number of large size classes. - - - - - arenas.lrun.<i>.size - (size_t) - r- - - Maximum size supported by this large size - class. - - - - - arenas.purge - (unsigned) - -w - - Purge unused dirty pages for the specified arena, or - for all arenas if none is specified. - - - - - arenas.extend - (unsigned) - r- - - Extend the array of arenas by appending a new arena, - and returning the new arena index. - - - - - prof.active - (bool) - rw - [] - - Control whether sampling is currently active. See the - opt.prof_active - option for additional information. - - - - - - prof.dump - (const char *) - -w - [] - - Dump a memory profile to the specified file, or if NULL - is specified, to a file according to the pattern - <prefix>.<pid>.<seq>.m<mseq>.heap, - where <prefix> is controlled by the - opt.prof_prefix - option. - - - - - prof.interval - (uint64_t) - r- - [] - - Average number of bytes allocated between - inverval-based profile dumps. See the - opt.lg_prof_interval - option for additional information. - - - - - stats.cactive - (size_t *) - r- - [] - - Pointer to a counter that contains an approximate count - of the current number of bytes in active pages. The estimate may be - high, but never low, because each arena rounds up to the nearest - multiple of the chunk size when computing its contribution to the - counter. Note that the epoch mallctl has no bearing - on this counter. Furthermore, counter consistency is maintained via - atomic operations, so it is necessary to use an atomic operation in - order to guarantee a consistent read when dereferencing the pointer. - - - - - - stats.allocated - (size_t) - r- - [] - - Total number of bytes allocated by the - application. - - - - - stats.active - (size_t) - r- - [] - - Total number of bytes in active pages allocated by the - application. This is a multiple of the page size, and greater than or - equal to stats.allocated. - This does not include - stats.arenas.<i>.pdirty and pages - entirely devoted to allocator metadata. - - - - - stats.mapped - (size_t) - r- - [] - - Total number of bytes in chunks mapped on behalf of the - application. This is a multiple of the chunk size, and is at least as - large as stats.active. This - does not include inactive chunks. - - - - - stats.chunks.current - (size_t) - r- - [] - - Total number of chunks actively mapped on behalf of the - application. This does not include inactive chunks. - - - - - - stats.chunks.total - (uint64_t) - r- - [] - - Cumulative number of chunks allocated. - - - - - stats.chunks.high - (size_t) - r- - [] - - Maximum number of active chunks at any time thus far. - - - - - - stats.huge.allocated - (size_t) - r- - [] - - Number of bytes currently allocated by huge objects. - - - - - - stats.huge.nmalloc - (uint64_t) - r- - [] - - Cumulative number of huge allocation requests. - - - - - - stats.huge.ndalloc - (uint64_t) - r- - [] - - Cumulative number of huge deallocation requests. - - - - - - stats.arenas.<i>.dss - (const char *) - r- - - dss (sbrk - 2) allocation precedence as - related to mmap - 2 allocation. See opt.dss for details. - - - - - - stats.arenas.<i>.nthreads - (unsigned) - r- - - Number of threads currently assigned to - arena. - - - - - stats.arenas.<i>.pactive - (size_t) - r- - - Number of pages in active runs. - - - - - stats.arenas.<i>.pdirty - (size_t) - r- - - Number of pages within unused runs that are potentially - dirty, and for which madvise... - MADV_DONTNEED or - similar has not been called. - - - - - stats.arenas.<i>.mapped - (size_t) - r- - [] - - Number of mapped bytes. - - - - - stats.arenas.<i>.npurge - (uint64_t) - r- - [] - - Number of dirty page purge sweeps performed. - - - - - - stats.arenas.<i>.nmadvise - (uint64_t) - r- - [] - - Number of madvise... - MADV_DONTNEED or - similar calls made to purge dirty pages. - - - - - stats.arenas.<i>.npurged - (uint64_t) - r- - [] - - Number of pages purged. - - - - - stats.arenas.<i>.small.allocated - (size_t) - r- - [] - - Number of bytes currently allocated by small objects. - - - - - - stats.arenas.<i>.small.nmalloc - (uint64_t) - r- - [] - - Cumulative number of allocation requests served by - small bins. - - - - - stats.arenas.<i>.small.ndalloc - (uint64_t) - r- - [] - - Cumulative number of small objects returned to bins. - - - - - - stats.arenas.<i>.small.nrequests - (uint64_t) - r- - [] - - Cumulative number of small allocation requests. - - - - - - stats.arenas.<i>.large.allocated - (size_t) - r- - [] - - Number of bytes currently allocated by large objects. - - - - - - stats.arenas.<i>.large.nmalloc - (uint64_t) - r- - [] - - Cumulative number of large allocation requests served - directly by the arena. - - - - - stats.arenas.<i>.large.ndalloc - (uint64_t) - r- - [] - - Cumulative number of large deallocation requests served - directly by the arena. - - - - - stats.arenas.<i>.large.nrequests - (uint64_t) - r- - [] - - Cumulative number of large allocation requests. - - - - - - stats.arenas.<i>.bins.<j>.allocated - (size_t) - r- - [] - - Current number of bytes allocated by - bin. - - - - - stats.arenas.<i>.bins.<j>.nmalloc - (uint64_t) - r- - [] - - Cumulative number of allocations served by bin. - - - - - - stats.arenas.<i>.bins.<j>.ndalloc - (uint64_t) - r- - [] - - Cumulative number of allocations returned to bin. - - - - - - stats.arenas.<i>.bins.<j>.nrequests - (uint64_t) - r- - [] - - Cumulative number of allocation - requests. - - - - - stats.arenas.<i>.bins.<j>.nfills - (uint64_t) - r- - [ ] - - Cumulative number of tcache fills. - - - - - stats.arenas.<i>.bins.<j>.nflushes - (uint64_t) - r- - [ ] - - Cumulative number of tcache flushes. - - - - - stats.arenas.<i>.bins.<j>.nruns - (uint64_t) - r- - [] - - Cumulative number of runs created. - - - - - stats.arenas.<i>.bins.<j>.nreruns - (uint64_t) - r- - [] - - Cumulative number of times the current run from which - to allocate changed. - - - - - stats.arenas.<i>.bins.<j>.curruns - (size_t) - r- - [] - - Current number of runs. - - - - - stats.arenas.<i>.lruns.<j>.nmalloc - (uint64_t) - r- - [] - - Cumulative number of allocation requests for this size - class served directly by the arena. - - - - - stats.arenas.<i>.lruns.<j>.ndalloc - (uint64_t) - r- - [] - - Cumulative number of deallocation requests for this - size class served directly by the arena. - - - - - stats.arenas.<i>.lruns.<j>.nrequests - (uint64_t) - r- - [] - - Cumulative number of allocation requests for this size - class. - - - - - stats.arenas.<i>.lruns.<j>.curruns - (size_t) - r- - [] - - Current number of runs for this size class. - - - - - - DEBUGGING MALLOC PROBLEMS - When debugging, it is a good idea to configure/build jemalloc with - the and - options, and recompile the program with suitable options and symbols for - debugger support. When so configured, jemalloc incorporates a wide variety - of run-time assertions that catch application errors such as double-free, - write-after-free, etc. - - Programs often accidentally depend on “uninitialized” - memory actually being filled with zero bytes. Junk filling - (see the opt.junk - option) tends to expose such bugs in the form of obviously incorrect - results and/or coredumps. Conversely, zero - filling (see the opt.zero option) eliminates - the symptoms of such bugs. Between these two options, it is usually - possible to quickly detect, diagnose, and eliminate such bugs. - - This implementation does not provide much detail about the problems - it detects, because the performance impact for storing such information - would be prohibitive. However, jemalloc does integrate with the most - excellent Valgrind tool if the - configuration option is enabled. - - - DIAGNOSTIC MESSAGES - If any of the memory allocation/deallocation functions detect an - error or warning condition, a message will be printed to file descriptor - STDERR_FILENO. Errors will result in the process - dumping core. If the opt.abort option is set, most - warnings are treated as errors. - - The malloc_message variable allows the programmer - to override the function which emits the text strings forming the errors - and warnings if for some reason the STDERR_FILENO file - descriptor is not suitable for this. - malloc_message takes the - cbopaque pointer argument that is - NULL unless overridden by the arguments in a call to - malloc_stats_print, followed by a string - pointer. Please note that doing anything which tries to allocate memory in - this function is likely to result in a crash or deadlock. - - All messages are prefixed by - “<jemalloc>: ”. - - - RETURN VALUES - - Standard API - The malloc and - calloc functions return a pointer to the - allocated memory if successful; otherwise a NULL - pointer is returned and errno is set to - ENOMEM. - - The posix_memalign function - returns the value 0 if successful; otherwise it returns an error value. - The posix_memalign function will fail - if: - - - EINVAL - - The alignment parameter is - not a power of 2 at least as large as - sizeof(void *). - - - - ENOMEM - - Memory allocation error. - - - - - The aligned_alloc function returns - a pointer to the allocated memory if successful; otherwise a - NULL pointer is returned and - errno is set. The - aligned_alloc function will fail if: - - - EINVAL - - The alignment parameter is - not a power of 2. - - - - ENOMEM - - Memory allocation error. - - - - - The realloc function returns a - pointer, possibly identical to ptr, to the - allocated memory if successful; otherwise a NULL - pointer is returned, and errno is set to - ENOMEM if the error was the result of an - allocation failure. The realloc - function always leaves the original buffer intact when an error occurs. - - - The free function returns no - value. - - - Non-standard API - The malloc_usable_size function - returns the usable size of the allocation pointed to by - ptr. - - The mallctl, - mallctlnametomib, and - mallctlbymib functions return 0 on - success; otherwise they return an error value. The functions will fail - if: - - - EINVAL - - newp is not - NULL, and newlen is too - large or too small. Alternatively, *oldlenp - is too large or too small; in this case as much data as possible - are read despite the error. - - - ENOMEM - - *oldlenp is too short to - hold the requested value. - - - ENOENT - - name or - mib specifies an unknown/invalid - value. - - - EPERM - - Attempt to read or write void value, or attempt to - write read-only value. - - - EAGAIN - - A memory allocation failure - occurred. - - - EFAULT - - An interface with side effects failed in some way - not directly related to mallctl* - read/write processing. - - - - - - Experimental API - The allocm, - rallocm, - sallocm, - dallocm, and - nallocm functions return - ALLOCM_SUCCESS on success; otherwise they return an - error value. The allocm, - rallocm, and - nallocm functions will fail if: - - - ALLOCM_ERR_OOM - - Out of memory. Insufficient contiguous memory was - available to service the allocation request. The - allocm function additionally sets - *ptr to NULL, whereas - the rallocm function leaves - *ptr unmodified. - - - The rallocm function will also - fail if: - - - ALLOCM_ERR_NOT_MOVED - - ALLOCM_NO_MOVE was specified, - but the reallocation request could not be serviced without moving - the object. - - - - - - - ENVIRONMENT - The following environment variable affects the execution of the - allocation functions: - - - MALLOC_CONF - - If the environment variable - MALLOC_CONF is set, the characters it contains - will be interpreted as options. - - - - - - EXAMPLES - To dump core whenever a problem occurs: - ln -s 'abort:true' /etc/malloc.conf - - To specify in the source a chunk size that is 16 MiB: - - - - SEE ALSO - madvise - 2, - mmap - 2, - sbrk - 2, - utrace - 2, - alloca - 3, - atexit - 3, - getpagesize - 3 - - - STANDARDS - The malloc, - calloc, - realloc, and - free functions conform to ISO/IEC - 9899:1990 (“ISO C90”). - - The posix_memalign function conforms - to IEEE Std 1003.1-2001 (“POSIX.1”). - -
diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/doc/manpages.xsl.in mariadb-5.5-5.5.40/extra/jemalloc/doc/manpages.xsl.in --- mariadb-5.5-5.5.39/extra/jemalloc/doc/manpages.xsl.in 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/doc/manpages.xsl.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ - - - - diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/doc/stylesheet.xsl mariadb-5.5-5.5.40/extra/jemalloc/doc/stylesheet.xsl --- mariadb-5.5-5.5.39/extra/jemalloc/doc/stylesheet.xsl 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/doc/stylesheet.xsl 1970-01-01 00:00:00.000000000 +0000 @@ -1,7 +0,0 @@ - - ansi - - - "" - - diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/arena.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/arena.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/arena.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/arena.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,1022 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* - * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized - * as small as possible such that this setting is still honored, without - * violating other constraints. The goal is to make runs as small as possible - * without exceeding a per run external fragmentation threshold. - * - * We use binary fixed point math for overhead computations, where the binary - * point is implicitly RUN_BFP bits to the left. - * - * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be - * honored for some/all object sizes, since when heap profiling is enabled - * there is one pointer of header overhead per object (plus a constant). This - * constraint is relaxed (ignored) for runs that are so small that the - * per-region overhead is greater than: - * - * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) - */ -#define RUN_BFP 12 -/* \/ Implicit binary fixed point. */ -#define RUN_MAX_OVRHD 0x0000003dU -#define RUN_MAX_OVRHD_RELAX 0x00001800U - -/* Maximum number of regions in one run. */ -#define LG_RUN_MAXREGS 11 -#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) - -/* - * Minimum redzone size. Redzones may be larger than this if necessary to - * preserve region alignment. - */ -#define REDZONE_MINSIZE 16 - -/* - * The minimum ratio of active:dirty pages per arena is computed as: - * - * (nactive >> opt_lg_dirty_mult) >= ndirty - * - * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times - * as many active pages as dirty pages. - */ -#define LG_DIRTY_MULT_DEFAULT 3 - -typedef struct arena_chunk_map_s arena_chunk_map_t; -typedef struct arena_chunk_s arena_chunk_t; -typedef struct arena_run_s arena_run_t; -typedef struct arena_bin_info_s arena_bin_info_t; -typedef struct arena_bin_s arena_bin_t; -typedef struct arena_s arena_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Each element of the chunk map corresponds to one page within the chunk. */ -struct arena_chunk_map_s { -#ifndef JEMALLOC_PROF - /* - * Overlay prof_ctx in order to allow it to be referenced by dead code. - * Such antics aren't warranted for per arena data structures, but - * chunk map overhead accounts for a percentage of memory, rather than - * being just a fixed cost. - */ - union { -#endif - union { - /* - * Linkage for run trees. There are two disjoint uses: - * - * 1) arena_t's runs_avail tree. - * 2) arena_run_t conceptually uses this linkage for in-use - * non-full runs, rather than directly embedding linkage. - */ - rb_node(arena_chunk_map_t) rb_link; - /* - * List of runs currently in purgatory. arena_chunk_purge() - * temporarily allocates runs that contain dirty pages while - * purging, so that other threads cannot use the runs while the - * purging thread is operating without the arena lock held. - */ - ql_elm(arena_chunk_map_t) ql_link; - } u; - - /* Profile counters, used for large object runs. */ - prof_ctx_t *prof_ctx; -#ifndef JEMALLOC_PROF - }; /* union { ... }; */ -#endif - - /* - * Run address (or size) and various flags are stored together. The bit - * layout looks like (assuming 32-bit system): - * - * ???????? ???????? ????nnnn nnnndula - * - * ? : Unallocated: Run address for first/last pages, unset for internal - * pages. - * Small: Run page offset. - * Large: Run size for first page, unset for trailing pages. - * n : binind for small size class, BININD_INVALID for large size class. - * d : dirty? - * u : unzeroed? - * l : large? - * a : allocated? - * - * Following are example bit patterns for the three types of runs. - * - * p : run page offset - * s : run size - * n : binind for size class; large objects set these to BININD_INVALID - * except for promoted allocations (see prof_promote) - * x : don't care - * - : 0 - * + : 1 - * [DULA] : bit set - * [dula] : bit unset - * - * Unallocated (clean): - * ssssssss ssssssss ssss++++ ++++du-a - * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx - * ssssssss ssssssss ssss++++ ++++dU-a - * - * Unallocated (dirty): - * ssssssss ssssssss ssss++++ ++++D--a - * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * ssssssss ssssssss ssss++++ ++++D--a - * - * Small: - * pppppppp pppppppp ppppnnnn nnnnd--A - * pppppppp pppppppp ppppnnnn nnnn---A - * pppppppp pppppppp ppppnnnn nnnnd--A - * - * Large: - * ssssssss ssssssss ssss++++ ++++D-LA - * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ----++++ ++++D-LA - * - * Large (sampled, size <= PAGE): - * ssssssss ssssssss ssssnnnn nnnnD-LA - * - * Large (not sampled, size == PAGE): - * ssssssss ssssssss ssss++++ ++++D-LA - */ - size_t bits; -#define CHUNK_MAP_BININD_SHIFT 4 -#define BININD_INVALID ((size_t)0xffU) -/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */ -#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U) -#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK -#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU) -#define CHUNK_MAP_DIRTY ((size_t)0x8U) -#define CHUNK_MAP_UNZEROED ((size_t)0x4U) -#define CHUNK_MAP_LARGE ((size_t)0x2U) -#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) -#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED -}; -typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; -typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; - -/* Arena chunk header. */ -struct arena_chunk_s { - /* Arena that owns the chunk. */ - arena_t *arena; - - /* Linkage for tree of arena chunks that contain dirty runs. */ - rb_node(arena_chunk_t) dirty_link; - - /* Number of dirty pages. */ - size_t ndirty; - - /* Number of available runs. */ - size_t nruns_avail; - - /* - * Number of available run adjacencies. Clean and dirty available runs - * are not coalesced, which causes virtual memory fragmentation. The - * ratio of (nruns_avail-nruns_adjac):nruns_adjac is used for tracking - * this fragmentation. - * */ - size_t nruns_adjac; - - /* - * Map of pages within chunk that keeps track of free/large/small. The - * first map_bias entries are omitted, since the chunk header does not - * need to be tracked in the map. This omission saves a header page - * for common chunk sizes (e.g. 4 MiB). - */ - arena_chunk_map_t map[1]; /* Dynamically sized. */ -}; -typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; - -struct arena_run_s { - /* Bin this run is associated with. */ - arena_bin_t *bin; - - /* Index of next region that has never been allocated, or nregs. */ - uint32_t nextind; - - /* Number of free regions in run. */ - unsigned nfree; -}; - -/* - * Read-only information associated with each element of arena_t's bins array - * is stored separately, partly to reduce memory usage (only one copy, rather - * than one per arena), but mainly to avoid false cacheline sharing. - * - * Each run has the following layout: - * - * /--------------------\ - * | arena_run_t header | - * | ... | - * bitmap_offset | bitmap | - * | ... | - * ctx0_offset | ctx map | - * | ... | - * |--------------------| - * | redzone | - * reg0_offset | region 0 | - * | redzone | - * |--------------------| \ - * | redzone | | - * | region 1 | > reg_interval - * | redzone | / - * |--------------------| - * | ... | - * | ... | - * | ... | - * |--------------------| - * | redzone | - * | region nregs-1 | - * | redzone | - * |--------------------| - * | alignment pad? | - * \--------------------/ - * - * reg_interval has at least the same minimum alignment as reg_size; this - * preserves the alignment constraint that sa2u() depends on. Alignment pad is - * either 0 or redzone_size; it is present only if needed to align reg0_offset. - */ -struct arena_bin_info_s { - /* Size of regions in a run for this bin's size class. */ - size_t reg_size; - - /* Redzone size. */ - size_t redzone_size; - - /* Interval between regions (reg_size + (redzone_size << 1)). */ - size_t reg_interval; - - /* Total size of a run for this bin's size class. */ - size_t run_size; - - /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; - - /* - * Offset of first bitmap_t element in a run header for this bin's size - * class. - */ - uint32_t bitmap_offset; - - /* - * Metadata used to manipulate bitmaps for runs associated with this - * bin. - */ - bitmap_info_t bitmap_info; - - /* - * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (config_prof == false || opt_prof == false). - */ - uint32_t ctx0_offset; - - /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; -}; - -struct arena_bin_s { - /* - * All operations on runcur, runs, and stats require that lock be - * locked. Run allocation/deallocation are protected by the arena lock, - * which may be acquired while holding one or more bin locks, but not - * vise versa. - */ - malloc_mutex_t lock; - - /* - * Current run being used to service allocations of this bin's size - * class. - */ - arena_run_t *runcur; - - /* - * Tree of non-full runs. This tree is used when looking for an - * existing run when runcur is no longer usable. We choose the - * non-full run that is lowest in memory; this policy tends to keep - * objects packed well, and it can also help reduce the number of - * almost-empty chunks. - */ - arena_run_tree_t runs; - - /* Bin statistics. */ - malloc_bin_stats_t stats; -}; - -struct arena_s { - /* This arena's index within the arenas array. */ - unsigned ind; - - /* - * Number of threads currently assigned to this arena. This field is - * protected by arenas_lock. - */ - unsigned nthreads; - - /* - * There are three classes of arena operations from a locking - * perspective: - * 1) Thread asssignment (modifies nthreads) is protected by - * arenas_lock. - * 2) Bin-related operations are protected by bin locks. - * 3) Chunk- and run-related operations are protected by this mutex. - */ - malloc_mutex_t lock; - - arena_stats_t stats; - /* - * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit. - */ - ql_head(tcache_t) tcache_ql; - - uint64_t prof_accumbytes; - - dss_prec_t dss_prec; - - /* Tree of dirty-page-containing chunks this arena manages. */ - arena_chunk_tree_t chunks_dirty; - - /* - * In order to avoid rapid chunk allocation/deallocation when an arena - * oscillates right on the cusp of needing a new chunk, cache the most - * recently freed chunk. The spare is left in the arena's chunk trees - * until it is deleted. - * - * There is one spare chunk per arena, rather than one spare total, in - * order to avoid interactions between multiple threads that could make - * a single spare inadequate. - */ - arena_chunk_t *spare; - - /* Number of pages in active runs. */ - size_t nactive; - - /* - * Current count of pages within unused runs that are potentially - * dirty, and for which madvise(... MADV_DONTNEED) has not been called. - * By tracking this, we can institute a limit on how much dirty unused - * memory is mapped for each arena. - */ - size_t ndirty; - - /* - * Approximate number of pages being purged. It is possible for - * multiple threads to purge dirty pages concurrently, and they use - * npurgatory to indicate the total number of pages all threads are - * attempting to purge. - */ - size_t npurgatory; - - /* - * Size/address-ordered trees of this arena's available runs. The trees - * are used for first-best-fit run allocation. - */ - arena_avail_tree_t runs_avail; - - /* bins is used to store trees of free regions. */ - arena_bin_t bins[NBINS]; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern ssize_t opt_lg_dirty_mult; -/* - * small_size2bin is a compact lookup table that rounds request sizes up to - * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via the SMALL_SIZE2BIN macro. - */ -extern uint8_t const small_size2bin[]; -#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) - -extern arena_bin_info_t arena_bin_info[NBINS]; - -/* Number of large size classes. */ -#define nlclasses (chunk_npages - map_bias) - -void arena_purge_all(arena_t *arena); -void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - size_t binind, uint64_t prof_accumbytes); -void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, - bool zero); -void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); -void *arena_malloc_small(arena_t *arena, size_t size, bool zero); -void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); -void arena_prof_promoted(const void *ptr, size_t size); -void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_t *mapelm); -void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind, arena_chunk_map_t *mapelm); -void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind); -void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, - void *ptr); -void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); -void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero); -void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc); -dss_prec_t arena_dss_prec_get(arena_t *arena); -void arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); -void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, - size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats); -bool arena_new(arena_t *arena, unsigned ind); -void arena_boot(void); -void arena_prefork(arena_t *arena); -void arena_postfork_parent(arena_t *arena); -void arena_postfork_child(arena_t *arena); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); -size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, - size_t pageind); -size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); -void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, - size_t size, size_t flags); -void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, - size_t size); -void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, - size_t size, size_t flags); -void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind); -void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, - size_t runind, size_t binind, size_t flags); -void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, - size_t unzeroed); -bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); -size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); -size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); -unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, - const void *ptr); -prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); -size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, - bool try_tcache); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) -# ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_ALWAYS_INLINE arena_chunk_map_t * -arena_mapp_get(arena_chunk_t *chunk, size_t pageind) -{ - - assert(pageind >= map_bias); - assert(pageind < chunk_npages); - - return (&chunk->map[pageind-map_bias]); -} - -JEMALLOC_ALWAYS_INLINE size_t * -arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) -{ - - return (&arena_mapp_get(chunk, pageind)->bits); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) -{ - - return (*arena_mapbitsp_get(chunk, pageind)); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - return (mapbits & ~PAGE_MASK); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == - (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); - return (mapbits & ~PAGE_MASK); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == - CHUNK_MAP_ALLOCATED); - return (mapbits >> LG_PAGE); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - size_t binind; - - mapbits = arena_mapbits_get(chunk, pageind); - binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; - assert(binind < NBINS || binind == BININD_INVALID); - return (binind); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - return (mapbits & CHUNK_MAP_DIRTY); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - return (mapbits & CHUNK_MAP_UNZEROED); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - return (mapbits & CHUNK_MAP_LARGE); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - return (mapbits & CHUNK_MAP_ALLOCATED); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, - size_t flags) -{ - size_t *mapbitsp; - - mapbitsp = arena_mapbitsp_get(chunk, pageind); - assert((size & PAGE_MASK) == 0); - assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); - assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags); - *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, - size_t size) -{ - size_t *mapbitsp; - - mapbitsp = arena_mapbitsp_get(chunk, pageind); - assert((size & PAGE_MASK) == 0); - assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - *mapbitsp = size | (*mapbitsp & PAGE_MASK); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, - size_t flags) -{ - size_t *mapbitsp; - size_t unzeroed; - - mapbitsp = arena_mapbitsp_get(chunk, pageind); - assert((size & PAGE_MASK) == 0); - assert((flags & CHUNK_MAP_DIRTY) == flags); - unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ - *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | unzeroed | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind) -{ - size_t *mapbitsp; - - assert(binind <= BININD_INVALID); - mapbitsp = arena_mapbitsp_get(chunk, pageind); - assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); - *mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind << - CHUNK_MAP_BININD_SHIFT); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, - size_t binind, size_t flags) -{ - size_t *mapbitsp; - size_t unzeroed; - - assert(binind < BININD_INVALID); - mapbitsp = arena_mapbitsp_get(chunk, pageind); - assert(pageind - runind >= map_bias); - assert((flags & CHUNK_MAP_DIRTY) == flags); - unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ - *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) | - flags | unzeroed | CHUNK_MAP_ALLOCATED; -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, - size_t unzeroed) -{ - size_t *mapbitsp; - - mapbitsp = arena_mapbitsp_get(chunk, pageind); - *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; -} - -JEMALLOC_INLINE bool -arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - assert(prof_interval != 0); - - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - arena->prof_accumbytes -= prof_interval; - return (true); - } - return (false); -} - -JEMALLOC_INLINE bool -arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (prof_interval == 0) - return (false); - return (arena_prof_accum_impl(arena, accumbytes)); -} - -JEMALLOC_INLINE bool -arena_prof_accum(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (prof_interval == 0) - return (false); - - { - bool ret; - - malloc_mutex_lock(&arena->lock); - ret = arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(&arena->lock); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_ptr_small_binind_get(const void *ptr, size_t mapbits) -{ - size_t binind; - - binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; - - if (config_debug) { - arena_chunk_t *chunk; - arena_t *arena; - size_t pageind; - size_t actual_mapbits; - arena_run_t *run; - arena_bin_t *bin; - size_t actual_binind; - arena_bin_info_t *bin_info; - - assert(binind != BININD_INVALID); - assert(binind < NBINS); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - actual_mapbits = arena_mapbits_get(chunk, pageind); - assert(mapbits == actual_mapbits); - assert(arena_mapbits_large_get(chunk, pageind) == 0); - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (actual_mapbits >> LG_PAGE)) << LG_PAGE)); - bin = run->bin; - actual_binind = bin - arena->bins; - assert(binind == actual_binind); - bin_info = &arena_bin_info[actual_binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval - == 0); - } - - return (binind); -} -# endif /* JEMALLOC_ARENA_INLINE_A */ - -# ifdef JEMALLOC_ARENA_INLINE_B -JEMALLOC_INLINE size_t -arena_bin_index(arena_t *arena, arena_bin_t *bin) -{ - size_t binind = bin - arena->bins; - assert(binind < NBINS); - return (binind); -} - -JEMALLOC_INLINE unsigned -arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) -{ - unsigned shift, diff, regind; - size_t interval; - - /* - * Freeing a pointer lower than region zero can cause assertion - * failure. - */ - assert((uintptr_t)ptr >= (uintptr_t)run + - (uintptr_t)bin_info->reg0_offset); - - /* - * Avoid doing division with a variable divisor if possible. Using - * actual division here can reduce allocator throughput by over 20%! - */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - - bin_info->reg0_offset); - - /* Rescale (factor powers of 2 out of the numerator and denominator). */ - interval = bin_info->reg_interval; - shift = ffs(interval) - 1; - diff >>= shift; - interval >>= shift; - - if (interval == 1) { - /* The divisor was a power of 2. */ - regind = diff; - } else { - /* - * To divide by a number D that is not a power of two we - * multiply by (2^21 / D) and then right shift by 21 positions. - * - * X / D - * - * becomes - * - * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT - * - * We can omit the first three elements, because we never - * divide by 0, and 1 and 2 are both powers of two, which are - * handled above. - */ -#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned interval_invs[] = { - SIZE_INV(3), - SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), - SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), - SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), - SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), - SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), - SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), - SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) - }; - - if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + - 2)) { - regind = (diff * interval_invs[interval - 3]) >> - SIZE_INV_SHIFT; - } else - regind = diff / interval; -#undef SIZE_INV -#undef SIZE_INV_SHIFT - } - assert(diff == regind * interval); - assert(regind < bin_info->nregs); - - return (regind); -} - -JEMALLOC_INLINE prof_ctx_t * -arena_prof_ctx_get(const void *ptr) -{ - prof_ctx_t *ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - cassert(config_prof); - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote) - ret = (prof_ctx_t *)(uintptr_t)1U; - else { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << - LG_PAGE)); - size_t binind = arena_ptr_small_binind_get(ptr, - mapbits); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind; - - regind = arena_run_regind(run, bin_info, ptr); - ret = *(prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * - sizeof(prof_ctx_t *))); - } - } else - ret = arena_mapp_get(chunk, pageind)->prof_ctx; - - return (ret); -} - -JEMALLOC_INLINE void -arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) -{ - arena_chunk_t *chunk; - size_t pageind, mapbits; - - cassert(config_prof); - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote == false) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << - LG_PAGE)); - size_t binind; - arena_bin_info_t *bin_info; - unsigned regind; - - binind = arena_ptr_small_binind_get(ptr, mapbits); - bin_info = &arena_bin_info[binind]; - regind = arena_run_regind(run, bin_info, ptr); - - *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset - + (regind * sizeof(prof_ctx_t *)))) = ctx; - } else - assert((uintptr_t)ctx == (uintptr_t)1U); - } else - arena_mapp_get(chunk, pageind)->prof_ctx = ctx; -} - -JEMALLOC_ALWAYS_INLINE void * -arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) -{ - tcache_t *tcache; - - assert(size != 0); - assert(size <= arena_maxclass); - - if (size <= SMALL_MAXCLASS) { - if (try_tcache && (tcache = tcache_get(true)) != NULL) - return (tcache_alloc_small(tcache, size, zero)); - else { - return (arena_malloc_small(choose_arena(arena), size, - zero)); - } - } else { - /* - * Initialize tcache after checking size in order to avoid - * infinite recursion during tcache initialization. - */ - if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(true)) != NULL) - return (tcache_alloc_large(tcache, size, zero)); - else { - return (arena_malloc_large(choose_arena(arena), size, - zero)); - } - } -} - -/* Return the size of the allocation pointed to by ptr. */ -JEMALLOC_ALWAYS_INLINE size_t -arena_salloc(const void *ptr, bool demote) -{ - size_t ret; - arena_chunk_t *chunk; - size_t pageind, binind; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - binind = arena_mapbits_binind_get(chunk, pageind); - if (binind == BININD_INVALID || (config_prof && demote == false && - prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { - /* - * Large allocation. In the common case (demote == true), and - * as this is an inline function, most callers will only end up - * looking at binind to determine that ptr is a small - * allocation. - */ - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = arena_mapbits_large_size_get(chunk, pageind); - assert(ret != 0); - assert(pageind + (ret>>LG_PAGE) <= chunk_npages); - assert(ret == PAGE || arena_mapbits_large_size_get(chunk, - pageind+(ret>>LG_PAGE)-1) == 0); - assert(binind == arena_mapbits_binind_get(chunk, - pageind+(ret>>LG_PAGE)-1)); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); - } else { - /* - * Small allocation (possibly promoted to a large object due to - * prof_promote). - */ - assert(arena_mapbits_large_get(chunk, pageind) != 0 || - arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, - pageind)) == binind); - ret = arena_bin_info[binind].reg_size; - } - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) -{ - size_t pageind, mapbits; - tcache_t *tcache; - - assert(arena != NULL); - assert(chunk->arena == arena); - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - /* Small allocation. */ - if (try_tcache && (tcache = tcache_get(false)) != NULL) { - size_t binind; - - binind = arena_ptr_small_binind_get(ptr, mapbits); - tcache_dalloc_small(tcache, ptr, binind); - } else - arena_dalloc_small(arena, chunk, ptr, pageind); - } else { - size_t size = arena_mapbits_large_size_get(chunk, pageind); - - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - - if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(false)) != NULL) { - tcache_dalloc_large(tcache, ptr, size); - } else - arena_dalloc_large(arena, chunk, ptr); - } -} -# endif /* JEMALLOC_ARENA_INLINE_B */ -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/atomic.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/atomic.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/atomic.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/atomic.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,304 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#define atomic_read_uint64(p) atomic_add_uint64(p, 0) -#define atomic_read_uint32(p) atomic_add_uint32(p, 0) -#define atomic_read_z(p) atomic_add_z(p, 0) -#define atomic_read_u(p) atomic_add_u(p, 0) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); -uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); -uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); -uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); -size_t atomic_add_z(size_t *p, size_t x); -size_t atomic_sub_z(size_t *p, size_t x); -unsigned atomic_add_u(unsigned *p, unsigned x); -unsigned atomic_sub_u(unsigned *p, unsigned x); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) -/******************************************************************************/ -/* 64-bit operations. */ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} -#elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (InterlockedExchangeAdd64(p, x)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (InterlockedExchangeAdd64(p, -((int64_t)x))); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); -} -# elif (defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - x = (uint64_t)(-(int64_t)x); - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} -# elif (defined(JEMALLOC_ATOMIC9)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - /* - * atomic_fetchadd_64() doesn't exist, but we only ever use this - * function on LP64 systems, so atomic_fetchadd_long() will do. - */ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (atomic_fetchadd_long(p, (unsigned long)x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); -} -# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} -# else -# error "Missing implementation for 64-bit atomic operations" -# endif -#endif - -/******************************************************************************/ -/* 32-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} -#elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (InterlockedExchangeAdd(p, x)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (InterlockedExchangeAdd(p, -((int32_t)x))); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); -} -#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - x = (uint32_t)(-(int32_t)x); - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} -#elif (defined(JEMALLOC_ATOMIC9)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (atomic_fetchadd_32(p, x) + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); -} -#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} -#else -# error "Missing implementation for 32-bit atomic operations" -#endif - -/******************************************************************************/ -/* size_t operations. */ -JEMALLOC_INLINE size_t -atomic_add_z(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE size_t -atomic_sub_z(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((size_t)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_PTR == 2) - return ((size_t)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x))); -#endif -} - -/******************************************************************************/ -/* unsigned operations. */ -JEMALLOC_INLINE unsigned -atomic_add_u(unsigned *p, unsigned x) -{ - -#if (LG_SIZEOF_INT == 3) - return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_INT == 2) - return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE unsigned -atomic_sub_u(unsigned *p, unsigned x) -{ - -#if (LG_SIZEOF_INT == 3) - return ((unsigned)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_INT == 2) - return ((unsigned)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x))); -#endif -} -/******************************************************************************/ -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/base.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/base.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/base.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/base.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *base_alloc(size_t size); -void *base_calloc(size_t number, size_t size); -extent_node_t *base_node_alloc(void); -void base_node_dealloc(extent_node_t *node); -bool base_boot(void); -void base_prefork(void); -void base_postfork_parent(void); -void base_postfork_child(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/bitmap.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/bitmap.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/bitmap.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/bitmap.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,184 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ -#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS - -typedef struct bitmap_level_s bitmap_level_t; -typedef struct bitmap_info_s bitmap_info_t; -typedef unsigned long bitmap_t; -#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG - -/* Number of bits per group. */ -#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) -#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) -#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) - -/* Maximum number of levels possible. */ -#define BITMAP_MAX_LEVELS \ - (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ - + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct bitmap_level_s { - /* Offset of this level's groups within the array of groups. */ - size_t group_offset; -}; - -struct bitmap_info_s { - /* Logical number of bits in bitmap (stored at bottom level). */ - size_t nbits; - - /* Number of levels necessary for nbits. */ - unsigned nlevels; - - /* - * Only the first (nlevels+1) elements are used, and levels are ordered - * bottom to top (e.g. the bottom level is stored in levels[0]). - */ - bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); -size_t bitmap_info_ngroups(const bitmap_info_t *binfo); -size_t bitmap_size(size_t nbits); -void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); -bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); -void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) -JEMALLOC_INLINE bool -bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; - bitmap_t rg = bitmap[rgoff]; - /* The bitmap is full iff the root group is 0. */ - return (rg == 0); -} - -JEMALLOC_INLINE bool -bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t g; - - assert(bit < binfo->nbits); - goff = bit >> LG_BITMAP_GROUP_NBITS; - g = bitmap[goff]; - return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); -} - -JEMALLOC_INLINE void -bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t *gp; - bitmap_t g; - - assert(bit < binfo->nbits); - assert(bitmap_get(bitmap, binfo, bit) == false); - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[goff]; - g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - assert(bitmap_get(bitmap, binfo, bit)); - /* Propagate group state transitions up the tree. */ - if (g == 0) { - unsigned i; - for (i = 1; i < binfo->nlevels; i++) { - bit = goff; - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[binfo->levels[i].group_offset + goff]; - g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - if (g != 0) - break; - } - } -} - -/* sfu: set first unset. */ -JEMALLOC_INLINE size_t -bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - size_t bit; - bitmap_t g; - unsigned i; - - assert(bitmap_full(bitmap, binfo) == false); - - i = binfo->nlevels - 1; - g = bitmap[binfo->levels[i].group_offset]; - bit = ffsl(g) - 1; - while (i > 0) { - i--; - g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); - } - - bitmap_set(bitmap, binfo, bit); - return (bit); -} - -JEMALLOC_INLINE void -bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t *gp; - bitmap_t g; - bool propagate; - - assert(bit < binfo->nbits); - assert(bitmap_get(bitmap, binfo, bit)); - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[goff]; - g = *gp; - propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - assert(bitmap_get(bitmap, binfo, bit) == false); - /* Propagate group state transitions up the tree. */ - if (propagate) { - unsigned i; - for (i = 1; i < binfo->nlevels; i++) { - bit = goff; - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[binfo->levels[i].group_offset + goff]; - g = *gp; - propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) - == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - if (propagate == false) - break; - } - } -} - -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/chunk_dss.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/chunk_dss.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/chunk_dss.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/chunk_dss.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef enum { - dss_prec_disabled = 0, - dss_prec_primary = 1, - dss_prec_secondary = 2, - - dss_prec_limit = 3 -} dss_prec_t ; -#define DSS_PREC_DEFAULT dss_prec_secondary -#define DSS_DEFAULT "secondary" - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -extern const char *dss_prec_names[]; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -dss_prec_t chunk_dss_prec_get(void); -bool chunk_dss_prec_set(dss_prec_t dss_prec); -void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); -bool chunk_in_dss(void *chunk); -bool chunk_dss_boot(void); -void chunk_dss_prefork(void); -void chunk_dss_postfork_parent(void); -void chunk_dss_postfork_child(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/chunk.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/chunk.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/chunk.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/chunk.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,63 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* - * Size and alignment of memory chunks that are allocated by the OS's virtual - * memory system. - */ -#define LG_CHUNK_DEFAULT 22 - -/* Return the chunk address for allocation address a. */ -#define CHUNK_ADDR2BASE(a) \ - ((void *)((uintptr_t)(a) & ~chunksize_mask)) - -/* Return the chunk offset of address a. */ -#define CHUNK_ADDR2OFFSET(a) \ - ((size_t)((uintptr_t)(a) & chunksize_mask)) - -/* Return the smallest chunk multiple that is >= s. */ -#define CHUNK_CEILING(s) \ - (((s) + chunksize_mask) & ~chunksize_mask) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern size_t opt_lg_chunk; -extern const char *opt_dss; - -/* Protects stats_chunks; currently not used for any other purpose. */ -extern malloc_mutex_t chunks_mtx; -/* Chunk statistics. */ -extern chunk_stats_t stats_chunks; - -extern rtree_t *chunks_rtree; - -extern size_t chunksize; -extern size_t chunksize_mask; /* (chunksize - 1). */ -extern size_t chunk_npages; -extern size_t map_bias; /* Number of arena chunk header pages. */ -extern size_t arena_maxclass; /* Max size class for arenas. */ - -void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, - dss_prec_t dss_prec); -void chunk_unmap(void *chunk, size_t size); -void chunk_dealloc(void *chunk, size_t size, bool unmap); -bool chunk_boot(void); -void chunk_prefork(void); -void chunk_postfork_parent(void); -void chunk_postfork_child(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - -#include "jemalloc/internal/chunk_dss.h" -#include "jemalloc/internal/chunk_mmap.h" diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/chunk_mmap.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/chunk_mmap.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/chunk_mmap.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/chunk_mmap.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -bool pages_purge(void *addr, size_t length); - -void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); -bool chunk_dealloc_mmap(void *chunk, size_t size); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/ckh.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/ckh.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/ckh.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/ckh.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,88 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ckh_s ckh_t; -typedef struct ckhc_s ckhc_t; - -/* Typedefs to allow easy function pointer passing. */ -typedef void ckh_hash_t (const void *, size_t[2]); -typedef bool ckh_keycomp_t (const void *, const void *); - -/* Maintain counters used to get an idea of performance. */ -/* #define CKH_COUNT */ -/* Print counter values in ckh_delete() (requires CKH_COUNT). */ -/* #define CKH_VERBOSE */ - -/* - * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit - * one bucket per L1 cache line. - */ -#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Hash table cell. */ -struct ckhc_s { - const void *key; - const void *data; -}; - -struct ckh_s { -#ifdef CKH_COUNT - /* Counters used to get an idea of performance. */ - uint64_t ngrows; - uint64_t nshrinks; - uint64_t nshrinkfails; - uint64_t ninserts; - uint64_t nrelocs; -#endif - - /* Used for pseudo-random number generation. */ -#define CKH_A 1103515241 -#define CKH_C 12347 - uint32_t prng_state; - - /* Total number of items. */ - size_t count; - - /* - * Minimum and current number of hash table buckets. There are - * 2^LG_CKH_BUCKET_CELLS cells per bucket. - */ - unsigned lg_minbuckets; - unsigned lg_curbuckets; - - /* Hash and comparison functions. */ - ckh_hash_t *hash; - ckh_keycomp_t *keycomp; - - /* Hash table with 2^lg_curbuckets buckets. */ - ckhc_t *tab; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, - ckh_keycomp_t *keycomp); -void ckh_delete(ckh_t *ckh); -size_t ckh_count(ckh_t *ckh); -bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); -bool ckh_insert(ckh_t *ckh, const void *key, const void *data); -bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, - void **data); -bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); -void ckh_string_hash(const void *key, size_t r_hash[2]); -bool ckh_string_keycomp(const void *k1, const void *k2); -void ckh_pointer_hash(const void *key, size_t r_hash[2]); -bool ckh_pointer_keycomp(const void *k1, const void *k2); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/ctl.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/ctl.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/ctl.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/ctl.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,117 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ctl_node_s ctl_node_t; -typedef struct ctl_named_node_s ctl_named_node_t; -typedef struct ctl_indexed_node_s ctl_indexed_node_t; -typedef struct ctl_arena_stats_s ctl_arena_stats_t; -typedef struct ctl_stats_s ctl_stats_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct ctl_node_s { - bool named; -}; - -struct ctl_named_node_s { - struct ctl_node_s node; - const char *name; - /* If (nchildren == 0), this is a terminal node. */ - unsigned nchildren; - const ctl_node_t *children; - int (*ctl)(const size_t *, size_t, void *, size_t *, - void *, size_t); -}; - -struct ctl_indexed_node_s { - struct ctl_node_s node; - const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); -}; - -struct ctl_arena_stats_s { - bool initialized; - unsigned nthreads; - const char *dss; - size_t pactive; - size_t pdirty; - arena_stats_t astats; - - /* Aggregate stats for small size classes, based on bin stats. */ - size_t allocated_small; - uint64_t nmalloc_small; - uint64_t ndalloc_small; - uint64_t nrequests_small; - - malloc_bin_stats_t bstats[NBINS]; - malloc_large_stats_t *lstats; /* nlclasses elements. */ -}; - -struct ctl_stats_s { - size_t allocated; - size_t active; - size_t mapped; - struct { - size_t current; /* stats_chunks.curchunks */ - uint64_t total; /* stats_chunks.nchunks */ - size_t high; /* stats_chunks.highchunks */ - } chunks; - struct { - size_t allocated; /* huge_allocated */ - uint64_t nmalloc; /* huge_nmalloc */ - uint64_t ndalloc; /* huge_ndalloc */ - } huge; - unsigned narenas; - ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen); -int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp); - -int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen); -bool ctl_boot(void); -void ctl_prefork(void); -void ctl_postfork_parent(void); -void ctl_postfork_child(void); - -#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ - if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ - != 0) { \ - malloc_printf( \ - ": Failure in xmallctl(\"%s\", ...)\n", \ - name); \ - abort(); \ - } \ -} while (0) - -#define xmallctlnametomib(name, mibp, miblenp) do { \ - if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ - malloc_printf(": Failure in " \ - "xmallctlnametomib(\"%s\", ...)\n", name); \ - abort(); \ - } \ -} while (0) - -#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ - if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ - newlen) != 0) { \ - malloc_write( \ - ": Failure in xmallctlbymib()\n"); \ - abort(); \ - } \ -} while (0) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/extent.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/extent.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/extent.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/extent.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct extent_node_s extent_node_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Tree of extents. */ -struct extent_node_s { - /* Linkage for the size/address-ordered tree. */ - rb_node(extent_node_t) link_szad; - - /* Linkage for the address-ordered tree. */ - rb_node(extent_node_t) link_ad; - - /* Profile counters, used for huge objects. */ - prof_ctx_t *prof_ctx; - - /* Pointer to the extent that this tree node is responsible for. */ - void *addr; - - /* Total region size. */ - size_t size; - - /* True if zero-filled; used by chunk recycling code. */ - bool zeroed; -}; -typedef rb_tree(extent_node_t) extent_tree_t; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) - -rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/hash.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/hash.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/hash.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/hash.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,331 +0,0 @@ -/* - * The following hash function is based on MurmurHash3, placed into the public - * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for - * details. - */ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void hash(const void *key, size_t len, const uint32_t seed, - size_t r_hash[2]); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) -/******************************************************************************/ -/* Internal implementation. */ -JEMALLOC_INLINE uint32_t -hash_rotl_32(uint32_t x, int8_t r) -{ - - return (x << r) | (x >> (32 - r)); -} - -JEMALLOC_INLINE uint64_t -hash_rotl_64(uint64_t x, int8_t r) -{ - return (x << r) | (x >> (64 - r)); -} - -JEMALLOC_INLINE uint32_t -hash_get_block_32(const uint32_t *p, int i) -{ - - return p[i]; -} - -JEMALLOC_INLINE uint64_t -hash_get_block_64(const uint64_t *p, int i) -{ - - return p[i]; -} - -JEMALLOC_INLINE uint32_t -hash_fmix_32(uint32_t h) -{ - - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - - return h; -} - -JEMALLOC_INLINE uint64_t -hash_fmix_64(uint64_t k) -{ - - k ^= k >> 33; - k *= QU(0xff51afd7ed558ccdLLU); - k ^= k >> 33; - k *= QU(0xc4ceb9fe1a85ec53LLU); - k ^= k >> 33; - - return k; -} - -JEMALLOC_INLINE uint32_t -hash_x86_32(const void *key, int len, uint32_t seed) -{ - const uint8_t *data = (const uint8_t *) key; - const int nblocks = len / 4; - - uint32_t h1 = seed; - - const uint32_t c1 = 0xcc9e2d51; - const uint32_t c2 = 0x1b873593; - - /* body */ - { - const uint32_t *blocks = (const uint32_t *) (data + nblocks*4); - int i; - - for (i = -nblocks; i; i++) { - uint32_t k1 = hash_get_block_32(blocks, i); - - k1 *= c1; - k1 = hash_rotl_32(k1, 15); - k1 *= c2; - - h1 ^= k1; - h1 = hash_rotl_32(h1, 13); - h1 = h1*5 + 0xe6546b64; - } - } - - /* tail */ - { - const uint8_t *tail = (const uint8_t *) (data + nblocks*4); - - uint32_t k1 = 0; - - switch (len & 3) { - case 3: k1 ^= tail[2] << 16; - case 2: k1 ^= tail[1] << 8; - case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15); - k1 *= c2; h1 ^= k1; - } - } - - /* finalization */ - h1 ^= len; - - h1 = hash_fmix_32(h1); - - return h1; -} - -UNUSED JEMALLOC_INLINE void -hash_x86_128(const void *key, const int len, uint32_t seed, - uint64_t r_out[2]) -{ - const uint8_t * data = (const uint8_t *) key; - const int nblocks = len / 16; - - uint32_t h1 = seed; - uint32_t h2 = seed; - uint32_t h3 = seed; - uint32_t h4 = seed; - - const uint32_t c1 = 0x239b961b; - const uint32_t c2 = 0xab0e9789; - const uint32_t c3 = 0x38b34ae5; - const uint32_t c4 = 0xa1e38b93; - - /* body */ - { - const uint32_t *blocks = (const uint32_t *) (data + nblocks*16); - int i; - - for (i = -nblocks; i; i++) { - uint32_t k1 = hash_get_block_32(blocks, i*4 + 0); - uint32_t k2 = hash_get_block_32(blocks, i*4 + 1); - uint32_t k3 = hash_get_block_32(blocks, i*4 + 2); - uint32_t k4 = hash_get_block_32(blocks, i*4 + 3); - - k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; - - h1 = hash_rotl_32(h1, 19); h1 += h2; - h1 = h1*5 + 0x561ccd1b; - - k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; - - h2 = hash_rotl_32(h2, 17); h2 += h3; - h2 = h2*5 + 0x0bcaa747; - - k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; - - h3 = hash_rotl_32(h3, 15); h3 += h4; - h3 = h3*5 + 0x96cd1c35; - - k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; - - h4 = hash_rotl_32(h4, 13); h4 += h1; - h4 = h4*5 + 0x32ac3b17; - } - } - - /* tail */ - { - const uint8_t *tail = (const uint8_t *) (data + nblocks*16); - uint32_t k1 = 0; - uint32_t k2 = 0; - uint32_t k3 = 0; - uint32_t k4 = 0; - - switch (len & 15) { - case 15: k4 ^= tail[14] << 16; - case 14: k4 ^= tail[13] << 8; - case 13: k4 ^= tail[12] << 0; - k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; - - case 12: k3 ^= tail[11] << 24; - case 11: k3 ^= tail[10] << 16; - case 10: k3 ^= tail[ 9] << 8; - case 9: k3 ^= tail[ 8] << 0; - k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; - - case 8: k2 ^= tail[ 7] << 24; - case 7: k2 ^= tail[ 6] << 16; - case 6: k2 ^= tail[ 5] << 8; - case 5: k2 ^= tail[ 4] << 0; - k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; - - case 4: k1 ^= tail[ 3] << 24; - case 3: k1 ^= tail[ 2] << 16; - case 2: k1 ^= tail[ 1] << 8; - case 1: k1 ^= tail[ 0] << 0; - k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; - } - } - - /* finalization */ - h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; - - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - - h1 = hash_fmix_32(h1); - h2 = hash_fmix_32(h2); - h3 = hash_fmix_32(h3); - h4 = hash_fmix_32(h4); - - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - - r_out[0] = (((uint64_t) h2) << 32) | h1; - r_out[1] = (((uint64_t) h4) << 32) | h3; -} - -UNUSED JEMALLOC_INLINE void -hash_x64_128(const void *key, const int len, const uint32_t seed, - uint64_t r_out[2]) -{ - const uint8_t *data = (const uint8_t *) key; - const int nblocks = len / 16; - - uint64_t h1 = seed; - uint64_t h2 = seed; - - const uint64_t c1 = QU(0x87c37b91114253d5LLU); - const uint64_t c2 = QU(0x4cf5ad432745937fLLU); - - /* body */ - { - const uint64_t *blocks = (const uint64_t *) (data); - int i; - - for (i = 0; i < nblocks; i++) { - uint64_t k1 = hash_get_block_64(blocks, i*2 + 0); - uint64_t k2 = hash_get_block_64(blocks, i*2 + 1); - - k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; - - h1 = hash_rotl_64(h1, 27); h1 += h2; - h1 = h1*5 + 0x52dce729; - - k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; - - h2 = hash_rotl_64(h2, 31); h2 += h1; - h2 = h2*5 + 0x38495ab5; - } - } - - /* tail */ - { - const uint8_t *tail = (const uint8_t*)(data + nblocks*16); - uint64_t k1 = 0; - uint64_t k2 = 0; - - switch (len & 15) { - case 15: k2 ^= ((uint64_t)(tail[14])) << 48; - case 14: k2 ^= ((uint64_t)(tail[13])) << 40; - case 13: k2 ^= ((uint64_t)(tail[12])) << 32; - case 12: k2 ^= ((uint64_t)(tail[11])) << 24; - case 11: k2 ^= ((uint64_t)(tail[10])) << 16; - case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; - case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; - k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; - - case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; - case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; - case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; - case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; - case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; - case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; - case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; - case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; - k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; - } - } - - /* finalization */ - h1 ^= len; h2 ^= len; - - h1 += h2; - h2 += h1; - - h1 = hash_fmix_64(h1); - h2 = hash_fmix_64(h2); - - h1 += h2; - h2 += h1; - - r_out[0] = h1; - r_out[1] = h2; -} - - -/******************************************************************************/ -/* API. */ -JEMALLOC_INLINE void -hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) -{ -#if (LG_SIZEOF_PTR == 3) - hash_x64_128(key, len, seed, (uint64_t *)r_hash); -#else - uint64_t hashes[2]; - hash_x86_128(key, len, seed, hashes); - r_hash[0] = (size_t)hashes[0]; - r_hash[1] = (size_t)hashes[1]; -#endif -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/huge.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/huge.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/huge.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/huge.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -/* Huge allocation statistics. */ -extern uint64_t huge_nmalloc; -extern uint64_t huge_ndalloc; -extern size_t huge_allocated; - -/* Protects chunk-related data structures. */ -extern malloc_mutex_t huge_mtx; - -void *huge_malloc(size_t size, bool zero); -void *huge_palloc(size_t size, size_t alignment, bool zero); -void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, - size_t extra); -void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_dalloc); -void huge_dalloc(void *ptr, bool unmap); -size_t huge_salloc(const void *ptr); -prof_ctx_t *huge_prof_ctx_get(const void *ptr); -void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -bool huge_boot(void); -void huge_prefork(void); -void huge_postfork_parent(void); -void huge_postfork_child(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,1026 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_H -#define JEMALLOC_INTERNAL_H -#include -#ifdef _WIN32 -# include -# define ENOENT ERROR_PATH_NOT_FOUND -# define EINVAL ERROR_BAD_ARGUMENTS -# define EAGAIN ERROR_OUTOFMEMORY -# define EPERM ERROR_WRITE_FAULT -# define EFAULT ERROR_INVALID_ADDRESS -# define ENOMEM ERROR_NOT_ENOUGH_MEMORY -# undef ERANGE -# define ERANGE ERROR_INVALID_DATA -#else -# include -# include -# include -# if !defined(SYS_write) && defined(__NR_write) -# define SYS_write __NR_write -# endif -# include -# include -# include -#endif -#include - -#include -#ifndef SIZE_T_MAX -# define SIZE_T_MAX SIZE_MAX -#endif -#include -#include -#include -#include -#include -#include -#ifndef offsetof -# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) -#endif -#include -#include -#include -#include -#ifdef _MSC_VER -# include -typedef intptr_t ssize_t; -# define PATH_MAX 1024 -# define STDERR_FILENO 2 -# define __func__ __FUNCTION__ -/* Disable warnings about deprecated system functions */ -# pragma warning(disable: 4996) -#else -# include -#endif -#include - -#define JEMALLOC_NO_DEMANGLE -#include "../jemalloc@install_suffix@.h" - -#ifdef JEMALLOC_UTRACE -#include -#endif - -#ifdef JEMALLOC_VALGRIND -#include -#include -#endif - -#include "jemalloc/internal/private_namespace.h" - -#ifdef JEMALLOC_CC_SILENCE -#define UNUSED JEMALLOC_ATTR(unused) -#else -#define UNUSED -#endif - -static const bool config_debug = -#ifdef JEMALLOC_DEBUG - true -#else - false -#endif - ; -static const bool config_dss = -#ifdef JEMALLOC_DSS - true -#else - false -#endif - ; -static const bool config_fill = -#ifdef JEMALLOC_FILL - true -#else - false -#endif - ; -static const bool config_lazy_lock = -#ifdef JEMALLOC_LAZY_LOCK - true -#else - false -#endif - ; -static const bool config_prof = -#ifdef JEMALLOC_PROF - true -#else - false -#endif - ; -static const bool config_prof_libgcc = -#ifdef JEMALLOC_PROF_LIBGCC - true -#else - false -#endif - ; -static const bool config_prof_libunwind = -#ifdef JEMALLOC_PROF_LIBUNWIND - true -#else - false -#endif - ; -static const bool config_mremap = -#ifdef JEMALLOC_MREMAP - true -#else - false -#endif - ; -static const bool config_munmap = -#ifdef JEMALLOC_MUNMAP - true -#else - false -#endif - ; -static const bool config_stats = -#ifdef JEMALLOC_STATS - true -#else - false -#endif - ; -static const bool config_tcache = -#ifdef JEMALLOC_TCACHE - true -#else - false -#endif - ; -static const bool config_tls = -#ifdef JEMALLOC_TLS - true -#else - false -#endif - ; -static const bool config_utrace = -#ifdef JEMALLOC_UTRACE - true -#else - false -#endif - ; -static const bool config_valgrind = -#ifdef JEMALLOC_VALGRIND - true -#else - false -#endif - ; -static const bool config_xmalloc = -#ifdef JEMALLOC_XMALLOC - true -#else - false -#endif - ; -static const bool config_ivsalloc = -#ifdef JEMALLOC_IVSALLOC - true -#else - false -#endif - ; - -#ifdef JEMALLOC_ATOMIC9 -#include -#endif - -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) -#include -#endif - -#ifdef JEMALLOC_ZONE -#include -#include -#include -#include -#endif - -#define RB_COMPACT -#include "jemalloc/internal/rb.h" -#include "jemalloc/internal/qr.h" -#include "jemalloc/internal/ql.h" - -/* - * jemalloc can conceptually be broken into components (arena, tcache, etc.), - * but there are circular dependencies that cannot be broken without - * substantial performance degradation. In order to reduce the effect on - * visual code flow, read the header files in multiple passes, with one of the - * following cpp variables defined during each pass: - * - * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data - * types. - * JEMALLOC_H_STRUCTS : Data structures. - * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. - * JEMALLOC_H_INLINES : Inline functions. - */ -/******************************************************************************/ -#define JEMALLOC_H_TYPES - -#define ALLOCM_LG_ALIGN_MASK ((int)0x3f) - -#define ZU(z) ((size_t)z) -#define QU(q) ((uint64_t)q) - -#ifndef __DECONST -# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) -#endif - -#ifdef JEMALLOC_DEBUG - /* Disable inlining to make debugging easier. */ -# define JEMALLOC_ALWAYS_INLINE -# define JEMALLOC_INLINE -# define inline -#else -# define JEMALLOC_ENABLE_INLINE -# ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ALWAYS_INLINE \ - static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) -# else -# define JEMALLOC_ALWAYS_INLINE static inline -# endif -# define JEMALLOC_INLINE static inline -# ifdef _MSC_VER -# define inline _inline -# endif -#endif - -/* Smallest size class to support. */ -#define LG_TINY_MIN 3 -#define TINY_MIN (1U << LG_TINY_MIN) - -/* - * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size - * classes). - */ -#ifndef LG_QUANTUM -# if (defined(__i386__) || defined(_M_IX86)) -# define LG_QUANTUM 4 -# endif -# ifdef __ia64__ -# define LG_QUANTUM 4 -# endif -# ifdef __alpha__ -# define LG_QUANTUM 4 -# endif -# ifdef __sparc64__ -# define LG_QUANTUM 4 -# endif -# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) -# define LG_QUANTUM 4 -# endif -# ifdef __arm__ -# define LG_QUANTUM 3 -# endif -# ifdef __hppa__ -# define LG_QUANTUM 4 -# endif -# ifdef __mips__ -# define LG_QUANTUM 3 -# endif -# ifdef __powerpc__ -# define LG_QUANTUM 4 -# endif -# ifdef __s390__ -# define LG_QUANTUM 4 -# endif -# ifdef __SH4__ -# define LG_QUANTUM 4 -# endif -# ifdef __tile__ -# define LG_QUANTUM 4 -# endif -# ifndef LG_QUANTUM -# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" -# endif -#endif - -#define QUANTUM ((size_t)(1U << LG_QUANTUM)) -#define QUANTUM_MASK (QUANTUM - 1) - -/* Return the smallest quantum multiple that is >= a. */ -#define QUANTUM_CEILING(a) \ - (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) - -#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) -#define LONG_MASK (LONG - 1) - -/* Return the smallest long multiple that is >= a. */ -#define LONG_CEILING(a) \ - (((a) + LONG_MASK) & ~LONG_MASK) - -#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) -#define PTR_MASK (SIZEOF_PTR - 1) - -/* Return the smallest (void *) multiple that is >= a. */ -#define PTR_CEILING(a) \ - (((a) + PTR_MASK) & ~PTR_MASK) - -/* - * Maximum size of L1 cache line. This is used to avoid cache line aliasing. - * In addition, this controls the spacing of cacheline-spaced size classes. - * - * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can - * only handle raw constants. - */ -#define LG_CACHELINE 6 -#define CACHELINE 64 -#define CACHELINE_MASK (CACHELINE - 1) - -/* Return the smallest cacheline multiple that is >= s. */ -#define CACHELINE_CEILING(s) \ - (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) - -/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ -#ifdef PAGE_MASK -# undef PAGE_MASK -#endif -#define LG_PAGE STATIC_PAGE_SHIFT -#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT)) -#define PAGE_MASK ((size_t)(PAGE - 1)) - -/* Return the smallest pagesize multiple that is >= s. */ -#define PAGE_CEILING(s) \ - (((s) + PAGE_MASK) & ~PAGE_MASK) - -/* Return the nearest aligned address at or below a. */ -#define ALIGNMENT_ADDR2BASE(a, alignment) \ - ((void *)((uintptr_t)(a) & (-(alignment)))) - -/* Return the offset between a and the nearest aligned address at or below a. */ -#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ - ((size_t)((uintptr_t)(a) & (alignment - 1))) - -/* Return the smallest alignment multiple that is >= s. */ -#define ALIGNMENT_CEILING(s, alignment) \ - (((s) + (alignment - 1)) & (-(alignment))) - -/* Declare a variable length array */ -#if __STDC_VERSION__ < 199901L -# ifdef _MSC_VER -# include -# define alloca _alloca -# else -# ifdef JEMALLOC_HAS_ALLOCA_H -# include -# else -# include -# endif -# endif -# define VARIABLE_ARRAY(type, name, count) \ - type *name = alloca(sizeof(type) * count) -#else -# define VARIABLE_ARRAY(type, name, count) type name[count] -#endif - -#ifdef JEMALLOC_VALGRIND -/* - * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions - * so that when Valgrind reports errors, there are no extra stack frames - * in the backtraces. - * - * The size that is reported to valgrind must be consistent through a chain of - * malloc..realloc..realloc calls. Request size isn't recorded anywhere in - * jemalloc, so it is critical that all callers of these macros provide usize - * rather than request size. As a result, buffer overflow detection is - * technically weakened for the standard API, though it is generally accepted - * practice to consider any extra bytes reported by malloc_usable_size() as - * usable space. - */ -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ - if (config_valgrind && opt_valgrind && cond) \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ -} while (0) -#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ - old_rzsize, zero) do { \ - if (config_valgrind && opt_valgrind) { \ - size_t rzsize = p2rz(ptr); \ - \ - if (ptr == old_ptr) { \ - VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ - usize, rzsize); \ - if (zero && old_usize < usize) { \ - VALGRIND_MAKE_MEM_DEFINED( \ - (void *)((uintptr_t)ptr + \ - old_usize), usize - old_usize); \ - } \ - } else { \ - if (old_ptr != NULL) { \ - VALGRIND_FREELIKE_BLOCK(old_ptr, \ - old_rzsize); \ - } \ - if (ptr != NULL) { \ - size_t copy_size = (old_usize < usize) \ - ? old_usize : usize; \ - size_t tail_size = usize - copy_size; \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ - rzsize, false); \ - if (copy_size > 0) { \ - VALGRIND_MAKE_MEM_DEFINED(ptr, \ - copy_size); \ - } \ - if (zero && tail_size > 0) { \ - VALGRIND_MAKE_MEM_DEFINED( \ - (void *)((uintptr_t)ptr + \ - copy_size), tail_size); \ - } \ - } \ - } \ - } \ -} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ - if (config_valgrind && opt_valgrind) \ - VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ -} while (0) -#else -#define RUNNING_ON_VALGRIND ((unsigned)0) -#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ - do {} while (0) -#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \ - do {} while (0) -#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0) -#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0) -#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0) -#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0) -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ - old_rzsize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) -#endif - -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_TYPES -/******************************************************************************/ -#define JEMALLOC_H_STRUCTS - -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" -#include "jemalloc/internal/prof.h" - -typedef struct { - uint64_t allocated; - uint64_t deallocated; -} thread_allocated_t; -/* - * The JEMALLOC_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro - * argument. - */ -#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_CONCAT({0, 0}) - -#undef JEMALLOC_H_STRUCTS -/******************************************************************************/ -#define JEMALLOC_H_EXTERNS - -extern bool opt_abort; -extern bool opt_junk; -extern size_t opt_quarantine; -extern bool opt_redzone; -extern bool opt_utrace; -extern bool opt_valgrind; -extern bool opt_xmalloc; -extern bool opt_zero; -extern size_t opt_narenas; - -/* Number of CPUs. */ -extern unsigned ncpus; - -/* Protects arenas initialization (arenas, arenas_total). */ -extern malloc_mutex_t arenas_lock; -/* - * Arenas that are used to service external requests. Not all elements of the - * arenas array are necessarily used; arenas are created lazily as needed. - * - * arenas[0..narenas_auto) are used for automatic multiplexing of threads and - * arenas. arenas[narenas_auto..narenas_total) are only used if the application - * takes some action to create them and allocate from them. - */ -extern arena_t **arenas; -extern unsigned narenas_total; -extern unsigned narenas_auto; /* Read-only after initialization. */ - -arena_t *arenas_extend(unsigned ind); -void arenas_cleanup(void *arg); -arena_t *choose_arena_hard(void); -void jemalloc_prefork(void); -void jemalloc_postfork_parent(void); -void jemalloc_postfork_child(void); - -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_EXTERNS -/******************************************************************************/ -#define JEMALLOC_H_INLINES - -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" - -#ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) - -size_t s2u(size_t size); -size_t sa2u(size_t size, size_t alignment); -unsigned narenas_total_get(void); -arena_t *choose_arena(arena_t *arena); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* - * Map of pthread_self() --> arenas[???], used for selecting an arena to use - * for allocations. - */ -malloc_tsd_externs(arenas, arena_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL, - arenas_cleanup) - -/* - * Compute usable size that would result from allocating an object with the - * specified size. - */ -JEMALLOC_ALWAYS_INLINE size_t -s2u(size_t size) -{ - - if (size <= SMALL_MAXCLASS) - return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); - if (size <= arena_maxclass) - return (PAGE_CEILING(size)); - return (CHUNK_CEILING(size)); -} - -/* - * Compute usable size that would result from allocating an object with the - * specified size and alignment. - */ -JEMALLOC_ALWAYS_INLINE size_t -sa2u(size_t size, size_t alignment) -{ - size_t usize; - - assert(alignment != 0 && ((alignment - 1) & alignment) == 0); - - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each small - * size class, every object is aligned at the smallest power of two - * that is non-zero in the base two representation of the size. For - * example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - */ - usize = ALIGNMENT_CEILING(size, alignment); - /* - * (usize < size) protects against the combination of maximal - * alignment and size greater than maximal alignment. - */ - if (usize < size) { - /* size_t overflow. */ - return (0); - } - - if (usize <= arena_maxclass && alignment <= PAGE) { - if (usize <= SMALL_MAXCLASS) - return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); - return (PAGE_CEILING(usize)); - } else { - size_t run_size; - - /* - * We can't achieve subpage alignment, so round up alignment - * permanently; it makes later calculations simpler. - */ - alignment = PAGE_CEILING(alignment); - usize = PAGE_CEILING(size); - /* - * (usize < size) protects against very large sizes within - * PAGE of SIZE_T_MAX. - * - * (usize + alignment < usize) protects against the - * combination of maximal alignment and usize large enough - * to cause overflow. This is similar to the first overflow - * check above, but it needs to be repeated due to the new - * usize value, which may now be *equal* to maximal - * alignment, whereas before we only detected overflow if the - * original size was *greater* than maximal alignment. - */ - if (usize < size || usize + alignment < usize) { - /* size_t overflow. */ - return (0); - } - - /* - * Calculate the size of the over-size run that arena_palloc() - * would need to allocate in order to guarantee the alignment. - * If the run wouldn't fit within a chunk, round up to a huge - * allocation size. - */ - run_size = usize + alignment - PAGE; - if (run_size <= arena_maxclass) - return (PAGE_CEILING(usize)); - return (CHUNK_CEILING(usize)); - } -} - -JEMALLOC_INLINE unsigned -narenas_total_get(void) -{ - unsigned narenas; - - malloc_mutex_lock(&arenas_lock); - narenas = narenas_total; - malloc_mutex_unlock(&arenas_lock); - - return (narenas); -} - -/* Choose an arena based on a per-thread value. */ -JEMALLOC_INLINE arena_t * -choose_arena(arena_t *arena) -{ - arena_t *ret; - - if (arena != NULL) - return (arena); - - if ((ret = *arenas_tsd_get()) == NULL) { - ret = choose_arena_hard(); - assert(ret != NULL); - } - - return (ret); -} -#endif - -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/rtree.h" -/* - * Include arena.h twice in order to resolve circular dependencies with - * tcache.h. - */ -#define JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/tcache.h" -#define JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" - -#ifndef JEMALLOC_ENABLE_INLINE -void *imallocx(size_t size, bool try_tcache, arena_t *arena); -void *imalloc(size_t size); -void *icallocx(size_t size, bool try_tcache, arena_t *arena); -void *icalloc(size_t size); -void *ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena); -void *ipalloc(size_t usize, size_t alignment, bool zero); -size_t isalloc(const void *ptr, bool demote); -size_t ivsalloc(const void *ptr, bool demote); -size_t u2rz(size_t usize); -size_t p2rz(const void *ptr); -void idallocx(void *ptr, bool try_tcache); -void idalloc(void *ptr); -void iqallocx(void *ptr, bool try_tcache); -void iqalloc(void *ptr); -void *irallocx(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero, bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena); -void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero, bool no_move); -malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_ALWAYS_INLINE void * -imallocx(size_t size, bool try_tcache, arena_t *arena) -{ - - assert(size != 0); - - if (size <= arena_maxclass) - return (arena_malloc(arena, size, false, try_tcache)); - else - return (huge_malloc(size, false)); -} - -JEMALLOC_ALWAYS_INLINE void * -imalloc(size_t size) -{ - - return (imallocx(size, true, NULL)); -} - -JEMALLOC_ALWAYS_INLINE void * -icallocx(size_t size, bool try_tcache, arena_t *arena) -{ - - if (size <= arena_maxclass) - return (arena_malloc(arena, size, true, try_tcache)); - else - return (huge_malloc(size, true)); -} - -JEMALLOC_ALWAYS_INLINE void * -icalloc(size_t size) -{ - - return (icallocx(size, true, NULL)); -} - -JEMALLOC_ALWAYS_INLINE void * -ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) -{ - void *ret; - - assert(usize != 0); - assert(usize == sa2u(usize, alignment)); - - if (usize <= arena_maxclass && alignment <= PAGE) - ret = arena_malloc(arena, usize, zero, try_tcache); - else { - if (usize <= arena_maxclass) { - ret = arena_palloc(choose_arena(arena), usize, - alignment, zero); - } else if (alignment <= chunksize) - ret = huge_malloc(usize, zero); - else - ret = huge_palloc(usize, alignment, zero); - } - - assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -ipalloc(size_t usize, size_t alignment, bool zero) -{ - - return (ipallocx(usize, alignment, zero, true, NULL)); -} - -/* - * Typical usage: - * void *ptr = [...] - * size_t sz = isalloc(ptr, config_prof); - */ -JEMALLOC_ALWAYS_INLINE size_t -isalloc(const void *ptr, bool demote) -{ - size_t ret; - arena_chunk_t *chunk; - - assert(ptr != NULL); - /* Demotion only makes sense if config_prof is true. */ - assert(config_prof || demote == false); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - ret = arena_salloc(ptr, demote); - else - ret = huge_salloc(ptr); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE size_t -ivsalloc(const void *ptr, bool demote) -{ - - /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) - return (0); - - return (isalloc(ptr, demote)); -} - -JEMALLOC_INLINE size_t -u2rz(size_t usize) -{ - size_t ret; - - if (usize <= SMALL_MAXCLASS) { - size_t binind = SMALL_SIZE2BIN(usize); - ret = arena_bin_info[binind].redzone_size; - } else - ret = 0; - - return (ret); -} - -JEMALLOC_INLINE size_t -p2rz(const void *ptr) -{ - size_t usize = isalloc(ptr, false); - - return (u2rz(usize)); -} - -JEMALLOC_ALWAYS_INLINE void -idallocx(void *ptr, bool try_tcache) -{ - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr, try_tcache); - else - huge_dalloc(ptr, true); -} - -JEMALLOC_ALWAYS_INLINE void -idalloc(void *ptr) -{ - - idallocx(ptr, true); -} - -JEMALLOC_ALWAYS_INLINE void -iqallocx(void *ptr, bool try_tcache) -{ - - if (config_fill && opt_quarantine) - quarantine(ptr); - else - idallocx(ptr, try_tcache); -} - -JEMALLOC_ALWAYS_INLINE void -iqalloc(void *ptr) -{ - - iqallocx(ptr, true); -} - -JEMALLOC_ALWAYS_INLINE void * -irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, - bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) -{ - void *ret; - size_t oldsize; - - assert(ptr != NULL); - assert(size != 0); - - oldsize = isalloc(ptr, config_prof); - - if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) - != 0) { - size_t usize, copysize; - - /* - * Existing object alignment is inadequate; allocate new space - * and copy. - */ - if (no_move) - return (NULL); - usize = sa2u(size + extra, alignment); - if (usize == 0) - return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, arena); - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, without extra this time. */ - usize = sa2u(size, alignment); - if (usize == 0) - return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, - arena); - if (ret == NULL) - return (NULL); - } - /* - * Copy at most size bytes (not size+extra), since the caller - * has no expectation that the extra bytes will be reliably - * preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - memcpy(ret, ptr, copysize); - iqallocx(ptr, try_tcache_dalloc); - return (ret); - } - - if (no_move) { - if (size <= arena_maxclass) { - return (arena_ralloc_no_move(ptr, oldsize, size, - extra, zero)); - } else { - return (huge_ralloc_no_move(ptr, oldsize, size, - extra)); - } - } else { - if (size + extra <= arena_maxclass) { - return (arena_ralloc(arena, ptr, oldsize, size, extra, - alignment, zero, try_tcache_alloc, - try_tcache_dalloc)); - } else { - return (huge_ralloc(ptr, oldsize, size, extra, - alignment, zero, try_tcache_dalloc)); - } - } -} - -JEMALLOC_ALWAYS_INLINE void * -iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, - bool no_move) -{ - - return (irallocx(ptr, size, extra, alignment, zero, no_move, true, true, - NULL)); -} - -malloc_tsd_externs(thread_allocated, thread_allocated_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t, - THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) -#endif - -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_INLINES -/******************************************************************************/ -#endif /* JEMALLOC_INTERNAL_H */ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/mb.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/mb.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/mb.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/mb.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,115 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void mb_write(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_)) -#ifdef __i386__ -/* - * According to the Intel Architecture Software Developer's Manual, current - * processors execute instructions in order from the perspective of other - * processors in a multiprocessor system, but 1) Intel reserves the right to - * change that, and 2) the compiler's optimizer could re-order instructions if - * there weren't some form of barrier. Therefore, even if running on an - * architecture that does not need memory barriers (everything through at least - * i686), an "optimizer barrier" is necessary. - */ -JEMALLOC_INLINE void -mb_write(void) -{ - -# if 0 - /* This is a true memory barrier. */ - asm volatile ("pusha;" - "xor %%eax,%%eax;" - "cpuid;" - "popa;" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -#else - /* - * This is hopefully enough to keep the compiler from reordering - * instructions around this one. - */ - asm volatile ("nop;" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -#endif -} -#elif (defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("sfence" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__powerpc__) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("eieio" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__sparc64__) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("membar #StoreStore" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__tile__) -JEMALLOC_INLINE void -mb_write(void) -{ - - __sync_synchronize(); -} -#else -/* - * This is much slower than a simple memory barrier, but the semantics of mutex - * unlock make this work. - */ -JEMALLOC_INLINE void -mb_write(void) -{ - malloc_mutex_t mtx; - - malloc_mutex_init(&mtx); - malloc_mutex_lock(&mtx); - malloc_mutex_unlock(&mtx); -} -#endif -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/mutex.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/mutex.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/mutex.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/mutex.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,99 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct malloc_mutex_s malloc_mutex_t; - -#ifdef _WIN32 -# define MALLOC_MUTEX_INITIALIZER -#elif (defined(JEMALLOC_OSSPIN)) -# define MALLOC_MUTEX_INITIALIZER {0} -#elif (defined(JEMALLOC_MUTEX_INIT_CB)) -# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} -#else -# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ - defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) -# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP -# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} -# else -# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT -# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER} -# endif -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct malloc_mutex_s { -#ifdef _WIN32 - CRITICAL_SECTION lock; -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLock lock; -#elif (defined(JEMALLOC_MUTEX_INIT_CB)) - pthread_mutex_t lock; - malloc_mutex_t *postponed_next; -#else - pthread_mutex_t lock; -#endif -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#ifdef JEMALLOC_LAZY_LOCK -extern bool isthreaded; -#else -# undef isthreaded /* Undo private_namespace.h definition. */ -# define isthreaded true -#endif - -bool malloc_mutex_init(malloc_mutex_t *mutex); -void malloc_mutex_prefork(malloc_mutex_t *mutex); -void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); -void malloc_mutex_postfork_child(malloc_mutex_t *mutex); -bool mutex_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void malloc_mutex_lock(malloc_mutex_t *mutex); -void malloc_mutex_unlock(malloc_mutex_t *mutex); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) -JEMALLOC_INLINE void -malloc_mutex_lock(malloc_mutex_t *mutex) -{ - - if (isthreaded) { -#ifdef _WIN32 - EnterCriticalSection(&mutex->lock); -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLockLock(&mutex->lock); -#else - pthread_mutex_lock(&mutex->lock); -#endif - } -} - -JEMALLOC_INLINE void -malloc_mutex_unlock(malloc_mutex_t *mutex) -{ - - if (isthreaded) { -#ifdef _WIN32 - LeaveCriticalSection(&mutex->lock); -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLockUnlock(&mutex->lock); -#else - pthread_mutex_unlock(&mutex->lock); -#endif - } -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/private_namespace.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/private_namespace.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/private_namespace.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/private_namespace.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,390 +0,0 @@ -#define a0calloc JEMALLOC_N(a0calloc) -#define a0free JEMALLOC_N(a0free) -#define a0malloc JEMALLOC_N(a0malloc) -#define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) -#define arena_bin_index JEMALLOC_N(arena_bin_index) -#define arena_bin_info JEMALLOC_N(arena_bin_info) -#define arena_boot JEMALLOC_N(arena_boot) -#define arena_dalloc JEMALLOC_N(arena_dalloc) -#define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) -#define arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked) -#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) -#define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) -#define arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked) -#define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) -#define arena_dss_prec_get JEMALLOC_N(arena_dss_prec_get) -#define arena_dss_prec_set JEMALLOC_N(arena_dss_prec_set) -#define arena_malloc JEMALLOC_N(arena_malloc) -#define arena_malloc_large JEMALLOC_N(arena_malloc_large) -#define arena_malloc_small JEMALLOC_N(arena_malloc_small) -#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) -#define arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get) -#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) -#define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) -#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) -#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) -#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) -#define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) -#define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) -#define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) -#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) -#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) -#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) -#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) -#define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) -#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) -#define arena_mapp_get JEMALLOC_N(arena_mapp_get) -#define arena_maxclass JEMALLOC_N(arena_maxclass) -#define arena_new JEMALLOC_N(arena_new) -#define arena_palloc JEMALLOC_N(arena_palloc) -#define arena_postfork_child JEMALLOC_N(arena_postfork_child) -#define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) -#define arena_prefork JEMALLOC_N(arena_prefork) -#define arena_prof_accum JEMALLOC_N(arena_prof_accum) -#define arena_prof_accum_impl JEMALLOC_N(arena_prof_accum_impl) -#define arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked) -#define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) -#define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) -#define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) -#define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) -#define arena_purge_all JEMALLOC_N(arena_purge_all) -#define arena_ralloc JEMALLOC_N(arena_ralloc) -#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) -#define arena_run_regind JEMALLOC_N(arena_run_regind) -#define arena_salloc JEMALLOC_N(arena_salloc) -#define arena_stats_merge JEMALLOC_N(arena_stats_merge) -#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) -#define arenas JEMALLOC_N(arenas) -#define arenas_booted JEMALLOC_N(arenas_booted) -#define arenas_cleanup JEMALLOC_N(arenas_cleanup) -#define arenas_extend JEMALLOC_N(arenas_extend) -#define arenas_initialized JEMALLOC_N(arenas_initialized) -#define arenas_lock JEMALLOC_N(arenas_lock) -#define arenas_tls JEMALLOC_N(arenas_tls) -#define arenas_tsd JEMALLOC_N(arenas_tsd) -#define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) -#define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) -#define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) -#define arenas_tsd_get_wrapper JEMALLOC_N(arenas_tsd_get_wrapper) -#define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) -#define atomic_add_u JEMALLOC_N(atomic_add_u) -#define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) -#define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) -#define atomic_add_z JEMALLOC_N(atomic_add_z) -#define atomic_sub_u JEMALLOC_N(atomic_sub_u) -#define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) -#define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) -#define atomic_sub_z JEMALLOC_N(atomic_sub_z) -#define base_alloc JEMALLOC_N(base_alloc) -#define base_boot JEMALLOC_N(base_boot) -#define base_calloc JEMALLOC_N(base_calloc) -#define base_node_alloc JEMALLOC_N(base_node_alloc) -#define base_node_dealloc JEMALLOC_N(base_node_dealloc) -#define base_postfork_child JEMALLOC_N(base_postfork_child) -#define base_postfork_parent JEMALLOC_N(base_postfork_parent) -#define base_prefork JEMALLOC_N(base_prefork) -#define bitmap_full JEMALLOC_N(bitmap_full) -#define bitmap_get JEMALLOC_N(bitmap_get) -#define bitmap_info_init JEMALLOC_N(bitmap_info_init) -#define bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups) -#define bitmap_init JEMALLOC_N(bitmap_init) -#define bitmap_set JEMALLOC_N(bitmap_set) -#define bitmap_sfu JEMALLOC_N(bitmap_sfu) -#define bitmap_size JEMALLOC_N(bitmap_size) -#define bitmap_unset JEMALLOC_N(bitmap_unset) -#define bt_init JEMALLOC_N(bt_init) -#define buferror JEMALLOC_N(buferror) -#define choose_arena JEMALLOC_N(choose_arena) -#define choose_arena_hard JEMALLOC_N(choose_arena_hard) -#define chunk_alloc JEMALLOC_N(chunk_alloc) -#define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) -#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_boot JEMALLOC_N(chunk_boot) -#define chunk_dealloc JEMALLOC_N(chunk_dealloc) -#define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) -#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) -#define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) -#define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) -#define chunk_dss_prec_get JEMALLOC_N(chunk_dss_prec_get) -#define chunk_dss_prec_set JEMALLOC_N(chunk_dss_prec_set) -#define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) -#define chunk_in_dss JEMALLOC_N(chunk_in_dss) -#define chunk_npages JEMALLOC_N(chunk_npages) -#define chunk_postfork_child JEMALLOC_N(chunk_postfork_child) -#define chunk_postfork_parent JEMALLOC_N(chunk_postfork_parent) -#define chunk_prefork JEMALLOC_N(chunk_prefork) -#define chunk_unmap JEMALLOC_N(chunk_unmap) -#define chunks_mtx JEMALLOC_N(chunks_mtx) -#define chunks_rtree JEMALLOC_N(chunks_rtree) -#define chunksize JEMALLOC_N(chunksize) -#define chunksize_mask JEMALLOC_N(chunksize_mask) -#define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) -#define ckh_count JEMALLOC_N(ckh_count) -#define ckh_delete JEMALLOC_N(ckh_delete) -#define ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert) -#define ckh_insert JEMALLOC_N(ckh_insert) -#define ckh_isearch JEMALLOC_N(ckh_isearch) -#define ckh_iter JEMALLOC_N(ckh_iter) -#define ckh_new JEMALLOC_N(ckh_new) -#define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash) -#define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp) -#define ckh_rebuild JEMALLOC_N(ckh_rebuild) -#define ckh_remove JEMALLOC_N(ckh_remove) -#define ckh_search JEMALLOC_N(ckh_search) -#define ckh_string_hash JEMALLOC_N(ckh_string_hash) -#define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) -#define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) -#define ckh_try_insert JEMALLOC_N(ckh_try_insert) -#define ctl_boot JEMALLOC_N(ctl_boot) -#define ctl_bymib JEMALLOC_N(ctl_bymib) -#define ctl_byname JEMALLOC_N(ctl_byname) -#define ctl_nametomib JEMALLOC_N(ctl_nametomib) -#define ctl_postfork_child JEMALLOC_N(ctl_postfork_child) -#define ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent) -#define ctl_prefork JEMALLOC_N(ctl_prefork) -#define dss_prec_names JEMALLOC_N(dss_prec_names) -#define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) -#define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) -#define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) -#define extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse) -#define extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start) -#define extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last) -#define extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new) -#define extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next) -#define extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch) -#define extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev) -#define extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch) -#define extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove) -#define extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter) -#define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse) -#define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start) -#define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search) -#define extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first) -#define extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert) -#define extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter) -#define extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse) -#define extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start) -#define extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last) -#define extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new) -#define extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next) -#define extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch) -#define extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev) -#define extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch) -#define extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove) -#define extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter) -#define extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse) -#define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) -#define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) -#define get_errno JEMALLOC_N(get_errno) -#define hash JEMALLOC_N(hash) -#define hash_fmix_32 JEMALLOC_N(hash_fmix_32) -#define hash_fmix_64 JEMALLOC_N(hash_fmix_64) -#define hash_get_block_32 JEMALLOC_N(hash_get_block_32) -#define hash_get_block_64 JEMALLOC_N(hash_get_block_64) -#define hash_rotl_32 JEMALLOC_N(hash_rotl_32) -#define hash_rotl_64 JEMALLOC_N(hash_rotl_64) -#define hash_x64_128 JEMALLOC_N(hash_x64_128) -#define hash_x86_128 JEMALLOC_N(hash_x86_128) -#define hash_x86_32 JEMALLOC_N(hash_x86_32) -#define huge_allocated JEMALLOC_N(huge_allocated) -#define huge_boot JEMALLOC_N(huge_boot) -#define huge_dalloc JEMALLOC_N(huge_dalloc) -#define huge_malloc JEMALLOC_N(huge_malloc) -#define huge_mtx JEMALLOC_N(huge_mtx) -#define huge_ndalloc JEMALLOC_N(huge_ndalloc) -#define huge_nmalloc JEMALLOC_N(huge_nmalloc) -#define huge_palloc JEMALLOC_N(huge_palloc) -#define huge_postfork_child JEMALLOC_N(huge_postfork_child) -#define huge_postfork_parent JEMALLOC_N(huge_postfork_parent) -#define huge_prefork JEMALLOC_N(huge_prefork) -#define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) -#define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) -#define huge_ralloc JEMALLOC_N(huge_ralloc) -#define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move) -#define huge_salloc JEMALLOC_N(huge_salloc) -#define iallocm JEMALLOC_N(iallocm) -#define icalloc JEMALLOC_N(icalloc) -#define icallocx JEMALLOC_N(icallocx) -#define idalloc JEMALLOC_N(idalloc) -#define idallocx JEMALLOC_N(idallocx) -#define imalloc JEMALLOC_N(imalloc) -#define imallocx JEMALLOC_N(imallocx) -#define ipalloc JEMALLOC_N(ipalloc) -#define ipallocx JEMALLOC_N(ipallocx) -#define iqalloc JEMALLOC_N(iqalloc) -#define iqallocx JEMALLOC_N(iqallocx) -#define iralloc JEMALLOC_N(iralloc) -#define irallocx JEMALLOC_N(irallocx) -#define isalloc JEMALLOC_N(isalloc) -#define isthreaded JEMALLOC_N(isthreaded) -#define ivsalloc JEMALLOC_N(ivsalloc) -#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) -#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) -#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) -#define malloc_cprintf JEMALLOC_N(malloc_cprintf) -#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) -#define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) -#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) -#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) -#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) -#define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) -#define malloc_printf JEMALLOC_N(malloc_printf) -#define malloc_snprintf JEMALLOC_N(malloc_snprintf) -#define malloc_strtoumax JEMALLOC_N(malloc_strtoumax) -#define malloc_tsd_boot JEMALLOC_N(malloc_tsd_boot) -#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register) -#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc) -#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc) -#define malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup) -#define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) -#define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) -#define malloc_write JEMALLOC_N(malloc_write) -#define map_bias JEMALLOC_N(map_bias) -#define mb_write JEMALLOC_N(mb_write) -#define mutex_boot JEMALLOC_N(mutex_boot) -#define narenas_auto JEMALLOC_N(narenas_auto) -#define narenas_total JEMALLOC_N(narenas_total) -#define narenas_total_get JEMALLOC_N(narenas_total_get) -#define ncpus JEMALLOC_N(ncpus) -#define nhbins JEMALLOC_N(nhbins) -#define opt_abort JEMALLOC_N(opt_abort) -#define opt_junk JEMALLOC_N(opt_junk) -#define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) -#define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult) -#define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval) -#define opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample) -#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max) -#define opt_narenas JEMALLOC_N(opt_narenas) -#define opt_prof JEMALLOC_N(opt_prof) -#define opt_prof_accum JEMALLOC_N(opt_prof_accum) -#define opt_prof_active JEMALLOC_N(opt_prof_active) -#define opt_prof_final JEMALLOC_N(opt_prof_final) -#define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) -#define opt_prof_leak JEMALLOC_N(opt_prof_leak) -#define opt_prof_prefix JEMALLOC_N(opt_prof_prefix) -#define opt_quarantine JEMALLOC_N(opt_quarantine) -#define opt_redzone JEMALLOC_N(opt_redzone) -#define opt_stats_print JEMALLOC_N(opt_stats_print) -#define opt_tcache JEMALLOC_N(opt_tcache) -#define opt_utrace JEMALLOC_N(opt_utrace) -#define opt_valgrind JEMALLOC_N(opt_valgrind) -#define opt_xmalloc JEMALLOC_N(opt_xmalloc) -#define opt_zero JEMALLOC_N(opt_zero) -#define p2rz JEMALLOC_N(p2rz) -#define pages_purge JEMALLOC_N(pages_purge) -#define pow2_ceil JEMALLOC_N(pow2_ceil) -#define prof_backtrace JEMALLOC_N(prof_backtrace) -#define prof_boot0 JEMALLOC_N(prof_boot0) -#define prof_boot1 JEMALLOC_N(prof_boot1) -#define prof_boot2 JEMALLOC_N(prof_boot2) -#define prof_ctx_get JEMALLOC_N(prof_ctx_get) -#define prof_ctx_set JEMALLOC_N(prof_ctx_set) -#define prof_free JEMALLOC_N(prof_free) -#define prof_gdump JEMALLOC_N(prof_gdump) -#define prof_idump JEMALLOC_N(prof_idump) -#define prof_interval JEMALLOC_N(prof_interval) -#define prof_lookup JEMALLOC_N(prof_lookup) -#define prof_malloc JEMALLOC_N(prof_malloc) -#define prof_mdump JEMALLOC_N(prof_mdump) -#define prof_postfork_child JEMALLOC_N(prof_postfork_child) -#define prof_postfork_parent JEMALLOC_N(prof_postfork_parent) -#define prof_prefork JEMALLOC_N(prof_prefork) -#define prof_promote JEMALLOC_N(prof_promote) -#define prof_realloc JEMALLOC_N(prof_realloc) -#define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) -#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) -#define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) -#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) -#define prof_tdata_get JEMALLOC_N(prof_tdata_get) -#define prof_tdata_init JEMALLOC_N(prof_tdata_init) -#define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) -#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) -#define prof_tdata_tsd JEMALLOC_N(prof_tdata_tsd) -#define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) -#define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) -#define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) -#define prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper) -#define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) -#define quarantine JEMALLOC_N(quarantine) -#define quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook) -#define quarantine_boot JEMALLOC_N(quarantine_boot) -#define quarantine_booted JEMALLOC_N(quarantine_booted) -#define quarantine_cleanup JEMALLOC_N(quarantine_cleanup) -#define quarantine_init JEMALLOC_N(quarantine_init) -#define quarantine_tls JEMALLOC_N(quarantine_tls) -#define quarantine_tsd JEMALLOC_N(quarantine_tsd) -#define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) -#define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) -#define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) -#define quarantine_tsd_get_wrapper JEMALLOC_N(quarantine_tsd_get_wrapper) -#define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) -#define register_zone JEMALLOC_N(register_zone) -#define rtree_get JEMALLOC_N(rtree_get) -#define rtree_get_locked JEMALLOC_N(rtree_get_locked) -#define rtree_new JEMALLOC_N(rtree_new) -#define rtree_postfork_child JEMALLOC_N(rtree_postfork_child) -#define rtree_postfork_parent JEMALLOC_N(rtree_postfork_parent) -#define rtree_prefork JEMALLOC_N(rtree_prefork) -#define rtree_set JEMALLOC_N(rtree_set) -#define s2u JEMALLOC_N(s2u) -#define sa2u JEMALLOC_N(sa2u) -#define set_errno JEMALLOC_N(set_errno) -#define stats_cactive JEMALLOC_N(stats_cactive) -#define stats_cactive_add JEMALLOC_N(stats_cactive_add) -#define stats_cactive_get JEMALLOC_N(stats_cactive_get) -#define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) -#define stats_chunks JEMALLOC_N(stats_chunks) -#define stats_print JEMALLOC_N(stats_print) -#define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) -#define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) -#define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) -#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) -#define tcache_arena_associate JEMALLOC_N(tcache_arena_associate) -#define tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate) -#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) -#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) -#define tcache_bin_info JEMALLOC_N(tcache_bin_info) -#define tcache_boot0 JEMALLOC_N(tcache_boot0) -#define tcache_boot1 JEMALLOC_N(tcache_boot1) -#define tcache_booted JEMALLOC_N(tcache_booted) -#define tcache_create JEMALLOC_N(tcache_create) -#define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) -#define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) -#define tcache_destroy JEMALLOC_N(tcache_destroy) -#define tcache_enabled_booted JEMALLOC_N(tcache_enabled_booted) -#define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) -#define tcache_enabled_initialized JEMALLOC_N(tcache_enabled_initialized) -#define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) -#define tcache_enabled_tls JEMALLOC_N(tcache_enabled_tls) -#define tcache_enabled_tsd JEMALLOC_N(tcache_enabled_tsd) -#define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) -#define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) -#define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) -#define tcache_enabled_tsd_get_wrapper JEMALLOC_N(tcache_enabled_tsd_get_wrapper) -#define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) -#define tcache_event JEMALLOC_N(tcache_event) -#define tcache_event_hard JEMALLOC_N(tcache_event_hard) -#define tcache_flush JEMALLOC_N(tcache_flush) -#define tcache_get JEMALLOC_N(tcache_get) -#define tcache_initialized JEMALLOC_N(tcache_initialized) -#define tcache_maxclass JEMALLOC_N(tcache_maxclass) -#define tcache_salloc JEMALLOC_N(tcache_salloc) -#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) -#define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) -#define tcache_tls JEMALLOC_N(tcache_tls) -#define tcache_tsd JEMALLOC_N(tcache_tsd) -#define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) -#define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) -#define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) -#define tcache_tsd_get_wrapper JEMALLOC_N(tcache_tsd_get_wrapper) -#define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) -#define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) -#define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) -#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) -#define thread_allocated_tsd JEMALLOC_N(thread_allocated_tsd) -#define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) -#define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) -#define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) -#define thread_allocated_tsd_get_wrapper JEMALLOC_N(thread_allocated_tsd_get_wrapper) -#define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) -#define u2rz JEMALLOC_N(u2rz) diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/prng.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/prng.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/prng.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/prng.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,60 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* - * Simple linear congruential pseudo-random number generator: - * - * prng(y) = (a*x + c) % m - * - * where the following constants ensure maximal period: - * - * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. - * c == Odd number (relatively prime to 2^n). - * m == 2^32 - * - * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. - * - * This choice of m has the disadvantage that the quality of the bits is - * proportional to bit position. For example. the lowest bit has a cycle of 2, - * the next has a cycle of 4, etc. For this reason, we prefer to use the upper - * bits. - * - * Macro parameters: - * uint32_t r : Result. - * unsigned lg_range : (0..32], number of least significant bits to return. - * uint32_t state : Seed value. - * const uint32_t a, c : See above discussion. - */ -#define prng32(r, lg_range, state, a, c) do { \ - assert(lg_range > 0); \ - assert(lg_range <= 32); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (32 - lg_range); \ -} while (false) - -/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prng64(r, lg_range, state, a, c) do { \ - assert(lg_range > 0); \ - assert(lg_range <= 64); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (64 - lg_range); \ -} while (false) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/prof.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/prof.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/prof.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/prof.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,579 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct prof_bt_s prof_bt_t; -typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_thr_cnt_s prof_thr_cnt_t; -typedef struct prof_ctx_s prof_ctx_t; -typedef struct prof_tdata_s prof_tdata_t; - -/* Option defaults. */ -#define PROF_PREFIX_DEFAULT "jeprof" -#define LG_PROF_SAMPLE_DEFAULT 19 -#define LG_PROF_INTERVAL_DEFAULT -1 - -/* - * Hard limit on stack backtrace depth. The version of prof_backtrace() that - * is based on __builtin_return_address() necessarily has a hard-coded number - * of backtrace frame handlers, and should be kept in sync with this setting. - */ -#define PROF_BT_MAX 128 - -/* Maximum number of backtraces to store in each per thread LRU cache. */ -#define PROF_TCMAX 1024 - -/* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 - -/* Size of memory buffer to use when writing dump files. */ -#define PROF_DUMP_BUFSIZE 65536 - -/* Size of stack-allocated buffer used by prof_printf(). */ -#define PROF_PRINTF_BUFSIZE 128 - -/* - * Number of mutexes shared among all ctx's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NCTX_LOCKS 1024 - -/* - * prof_tdata pointers close to NULL are used to encode state information that - * is used for cleaning up during thread shutdown. - */ -#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) -#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) -#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct prof_bt_s { - /* Backtrace, stored as len program counters. */ - void **vec; - unsigned len; -}; - -#ifdef JEMALLOC_PROF_LIBGCC -/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ -typedef struct { - prof_bt_t *bt; - unsigned nignore; - unsigned max; -} prof_unwind_data_t; -#endif - -struct prof_cnt_s { - /* - * Profiling counters. An allocation/deallocation pair can operate on - * different prof_thr_cnt_t objects that are linked into the same - * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go - * negative. In principle it is possible for the *bytes counters to - * overflow/underflow, but a general solution would require something - * like 128-bit counters; this implementation doesn't bother to solve - * that problem. - */ - int64_t curobjs; - int64_t curbytes; - uint64_t accumobjs; - uint64_t accumbytes; -}; - -struct prof_thr_cnt_s { - /* Linkage into prof_ctx_t's cnts_ql. */ - ql_elm(prof_thr_cnt_t) cnts_link; - - /* Linkage into thread's LRU. */ - ql_elm(prof_thr_cnt_t) lru_link; - - /* - * Associated context. If a thread frees an object that it did not - * allocate, it is possible that the context is not cached in the - * thread's hash table, in which case it must be able to look up the - * context, insert a new prof_thr_cnt_t into the thread's hash table, - * and link it into the prof_ctx_t's cnts_ql. - */ - prof_ctx_t *ctx; - - /* - * Threads use memory barriers to update the counters. Since there is - * only ever one writer, the only challenge is for the reader to get a - * consistent read of the counters. - * - * The writer uses this series of operations: - * - * 1) Increment epoch to an odd number. - * 2) Update counters. - * 3) Increment epoch to an even number. - * - * The reader must assure 1) that the epoch is even while it reads the - * counters, and 2) that the epoch doesn't change between the time it - * starts and finishes reading the counters. - */ - unsigned epoch; - - /* Profiling counters. */ - prof_cnt_t cnts; -}; - -struct prof_ctx_s { - /* Associated backtrace. */ - prof_bt_t *bt; - - /* Protects nlimbo, cnt_merged, and cnts_ql. */ - malloc_mutex_t *lock; - - /* - * Number of threads that currently cause this ctx to be in a state of - * limbo due to one of: - * - Initializing per thread counters associated with this ctx. - * - Preparing to destroy this ctx. - * nlimbo must be 1 (single destroyer) in order to safely destroy the - * ctx. - */ - unsigned nlimbo; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* When threads exit, they merge their stats into cnt_merged. */ - prof_cnt_t cnt_merged; - - /* - * List of profile counters, one for each thread that has allocated in - * this context. - */ - ql_head(prof_thr_cnt_t) cnts_ql; -}; - -struct prof_tdata_s { - /* - * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a - * cache of backtraces, with associated thread-specific prof_thr_cnt_t - * objects. Other threads may read the prof_thr_cnt_t contents, but no - * others will ever write them. - * - * Upon thread exit, the thread must merge all the prof_thr_cnt_t - * counter data into the associated prof_ctx_t objects, and unlink/free - * the prof_thr_cnt_t objects. - */ - ckh_t bt2cnt; - - /* LRU for contents of bt2cnt. */ - ql_head(prof_thr_cnt_t) lru_ql; - - /* Backtrace vector, used for calls to prof_backtrace(). */ - void **vec; - - /* Sampling state. */ - uint64_t prng_state; - uint64_t threshold; - uint64_t accum; - - /* State used to avoid dumping while operating on prof internals. */ - bool enq; - bool enq_idump; - bool enq_gdump; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_prof; -/* - * Even if opt_prof is true, sampling can be temporarily disabled by setting - * opt_prof_active to false. No locking is used when updating opt_prof_active, - * so there are no guarantees regarding how long it will take for all threads - * to notice state changes. - */ -extern bool opt_prof_active; -extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ -extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_gdump; /* High-water memory dumping. */ -extern bool opt_prof_final; /* Final profile dumping. */ -extern bool opt_prof_leak; /* Dump leak summary at exit. */ -extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern char opt_prof_prefix[PATH_MAX + 1]; - -/* - * Profile dump interval, measured in bytes allocated. Each arena triggers a - * profile dump when it reaches this threshold. The effect is that the - * interval between profile dumps averages prof_interval, though the actual - * interval between dumps will tend to be sporadic, and the interval will be a - * maximum of approximately (prof_interval * narenas). - */ -extern uint64_t prof_interval; - -/* - * If true, promote small sampled objects to large objects, since small run - * headers do not have embedded profile context pointers. - */ -extern bool prof_promote; - -void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt, unsigned nignore); -prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); -void prof_idump(void); -bool prof_mdump(const char *filename); -void prof_gdump(void); -prof_tdata_t *prof_tdata_init(void); -void prof_tdata_cleanup(void *arg); -void prof_boot0(void); -void prof_boot1(void); -bool prof_boot2(void); -void prof_prefork(void); -void prof_postfork_parent(void); -void prof_postfork_child(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#define PROF_ALLOC_PREP(nignore, size, ret) do { \ - prof_tdata_t *prof_tdata; \ - prof_bt_t bt; \ - \ - assert(size == s2u(size)); \ - \ - prof_tdata = prof_tdata_get(true); \ - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ - if (prof_tdata != NULL) \ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - else \ - ret = NULL; \ - break; \ - } \ - \ - if (opt_prof_active == false) { \ - /* Sampling is currently inactive, so avoid sampling. */\ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - } else if (opt_lg_prof_sample == 0) { \ - /* Don't bother with sampling logic, since sampling */\ - /* interval is 1. */\ - bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore); \ - ret = prof_lookup(&bt); \ - } else { \ - if (prof_tdata->threshold == 0) { \ - /* Initialize. Seed the prng differently for */\ - /* each thread. */\ - prof_tdata->prng_state = \ - (uint64_t)(uintptr_t)&size; \ - prof_sample_threshold_update(prof_tdata); \ - } \ - \ - /* Determine whether to capture a backtrace based on */\ - /* whether size is enough for prof_accum to reach */\ - /* prof_tdata->threshold. However, delay updating */\ - /* these variables until prof_{m,re}alloc(), because */\ - /* we don't know for sure that the allocation will */\ - /* succeed. */\ - /* */\ - /* Use subtraction rather than addition to avoid */\ - /* potential integer overflow. */\ - if (size >= prof_tdata->threshold - \ - prof_tdata->accum) { \ - bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore); \ - ret = prof_lookup(&bt); \ - } else \ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - } \ -} while (0) - -#ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) - -prof_tdata_t *prof_tdata_get(bool create); -void prof_sample_threshold_update(prof_tdata_t *prof_tdata); -prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -bool prof_sample_accum_update(size_t size); -void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - size_t old_size, prof_ctx_t *old_ctx); -void prof_free(const void *ptr, size_t size); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ -malloc_tsd_externs(prof_tdata, prof_tdata_t *) -malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, - prof_tdata_cleanup) - -JEMALLOC_INLINE prof_tdata_t * -prof_tdata_get(bool create) -{ - prof_tdata_t *prof_tdata; - - cassert(config_prof); - - prof_tdata = *prof_tdata_tsd_get(); - if (create && prof_tdata == NULL) - prof_tdata = prof_tdata_init(); - - return (prof_tdata); -} - -JEMALLOC_INLINE void -prof_sample_threshold_update(prof_tdata_t *prof_tdata) -{ - uint64_t r; - double u; - - cassert(config_prof); - - /* - * Compute sample threshold as a geometrically distributed random - * variable with mean (2^opt_lg_prof_sample). - * - * __ __ - * | log(u) | 1 - * prof_tdata->threshold = | -------- |, where p = ------------------- - * | log(1-p) | opt_lg_prof_sample - * 2 - * - * For more information on the math, see: - * - * Non-Uniform Random Variate Generation - * Luc Devroye - * Springer-Verlag, New York, 1986 - * pp 500 - * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) - */ - prng64(r, 53, prof_tdata->prng_state, - UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); - u = (double)r * (1.0/9007199254740992.0L); - prof_tdata->threshold = (uint64_t)(log(u) / - log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) - + (uint64_t)1U; -} - -JEMALLOC_INLINE prof_ctx_t * -prof_ctx_get(const void *ptr) -{ - prof_ctx_t *ret; - arena_chunk_t *chunk; - - cassert(config_prof); - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - ret = arena_prof_ctx_get(ptr); - } else - ret = huge_prof_ctx_get(ptr); - - return (ret); -} - -JEMALLOC_INLINE void -prof_ctx_set(const void *ptr, prof_ctx_t *ctx) -{ - arena_chunk_t *chunk; - - cassert(config_prof); - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - arena_prof_ctx_set(ptr, ctx); - } else - huge_prof_ctx_set(ptr, ctx); -} - -JEMALLOC_INLINE bool -prof_sample_accum_update(size_t size) -{ - prof_tdata_t *prof_tdata; - - cassert(config_prof); - /* Sampling logic is unnecessary if the interval is 1. */ - assert(opt_lg_prof_sample != 0); - - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) - return (true); - - /* Take care to avoid integer overflow. */ - if (size >= prof_tdata->threshold - prof_tdata->accum) { - prof_tdata->accum -= (prof_tdata->threshold - size); - /* Compute new sample threshold. */ - prof_sample_threshold_update(prof_tdata); - while (prof_tdata->accum >= prof_tdata->threshold) { - prof_tdata->accum -= prof_tdata->threshold; - prof_sample_threshold_update(prof_tdata); - } - return (false); - } else { - prof_tdata->accum += size; - return (true); - } -} - -JEMALLOC_INLINE void -prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) -{ - - cassert(config_prof); - assert(ptr != NULL); - assert(size == isalloc(ptr, true)); - - if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(size)) { - /* - * Don't sample. For malloc()-like allocation, it is - * always possible to tell in advance how large an - * object's usable size will be, so there should never - * be a difference between the size passed to - * PROF_ALLOC_PREP() and prof_malloc(). - */ - assert((uintptr_t)cnt == (uintptr_t)1U); - } - } - - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); - - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; - } - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); -} - -JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - size_t old_size, prof_ctx_t *old_ctx) -{ - prof_thr_cnt_t *told_cnt; - - cassert(config_prof); - assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); - - if (ptr != NULL) { - assert(size == isalloc(ptr, true)); - if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(size)) { - /* - * Don't sample. The size passed to - * PROF_ALLOC_PREP() was larger than what - * actually got allocated, so a backtrace was - * captured for this allocation, even though - * its actual size was insufficient to cross - * the sample threshold. - */ - cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - } - } - } - - if ((uintptr_t)old_ctx > (uintptr_t)1U) { - told_cnt = prof_lookup(old_ctx->bt); - if (told_cnt == NULL) { - /* - * It's too late to propagate OOM for this realloc(), - * so operate directly on old_cnt->ctx->cnt_merged. - */ - malloc_mutex_lock(old_ctx->lock); - old_ctx->cnt_merged.curobjs--; - old_ctx->cnt_merged.curbytes -= old_size; - malloc_mutex_unlock(old_ctx->lock); - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - } - } else - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); - cnt->epoch++; - } else if (ptr != NULL) - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) { - told_cnt->cnts.curobjs--; - told_cnt->cnts.curbytes -= old_size; - } - if ((uintptr_t)cnt > (uintptr_t)1U) { - cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; - } - } - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) - cnt->epoch++; - /*********/ - mb_write(); /* Not strictly necessary. */ -} - -JEMALLOC_INLINE void -prof_free(const void *ptr, size_t size) -{ - prof_ctx_t *ctx = prof_ctx_get(ptr); - - cassert(config_prof); - - if ((uintptr_t)ctx > (uintptr_t)1) { - prof_thr_cnt_t *tcnt; - assert(size == isalloc(ptr, true)); - tcnt = prof_lookup(ctx->bt); - - if (tcnt != NULL) { - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - tcnt->cnts.curobjs--; - tcnt->cnts.curbytes -= size; - /*********/ - mb_write(); - /*********/ - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else { - /* - * OOM during free() cannot be propagated, so operate - * directly on cnt->ctx->cnt_merged. - */ - malloc_mutex_lock(ctx->lock); - ctx->cnt_merged.curobjs--; - ctx->cnt_merged.curbytes -= size; - malloc_mutex_unlock(ctx->lock); - } - } -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/ql.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/ql.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/ql.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/ql.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -/* - * List definitions. - */ -#define ql_head(a_type) \ -struct { \ - a_type *qlh_first; \ -} - -#define ql_head_initializer(a_head) {NULL} - -#define ql_elm(a_type) qr(a_type) - -/* List functions. */ -#define ql_new(a_head) do { \ - (a_head)->qlh_first = NULL; \ -} while (0) - -#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) - -#define ql_first(a_head) ((a_head)->qlh_first) - -#define ql_last(a_head, a_field) \ - ((ql_first(a_head) != NULL) \ - ? qr_prev(ql_first(a_head), a_field) : NULL) - -#define ql_next(a_head, a_elm, a_field) \ - ((ql_last(a_head, a_field) != (a_elm)) \ - ? qr_next((a_elm), a_field) : NULL) - -#define ql_prev(a_head, a_elm, a_field) \ - ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \ - : NULL) - -#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ - qr_before_insert((a_qlelm), (a_elm), a_field); \ - if (ql_first(a_head) == (a_qlelm)) { \ - ql_first(a_head) = (a_elm); \ - } \ -} while (0) - -#define ql_after_insert(a_qlelm, a_elm, a_field) \ - qr_after_insert((a_qlelm), (a_elm), a_field) - -#define ql_head_insert(a_head, a_elm, a_field) do { \ - if (ql_first(a_head) != NULL) { \ - qr_before_insert(ql_first(a_head), (a_elm), a_field); \ - } \ - ql_first(a_head) = (a_elm); \ -} while (0) - -#define ql_tail_insert(a_head, a_elm, a_field) do { \ - if (ql_first(a_head) != NULL) { \ - qr_before_insert(ql_first(a_head), (a_elm), a_field); \ - } \ - ql_first(a_head) = qr_next((a_elm), a_field); \ -} while (0) - -#define ql_remove(a_head, a_elm, a_field) do { \ - if (ql_first(a_head) == (a_elm)) { \ - ql_first(a_head) = qr_next(ql_first(a_head), a_field); \ - } \ - if (ql_first(a_head) != (a_elm)) { \ - qr_remove((a_elm), a_field); \ - } else { \ - ql_first(a_head) = NULL; \ - } \ -} while (0) - -#define ql_head_remove(a_head, a_type, a_field) do { \ - a_type *t = ql_first(a_head); \ - ql_remove((a_head), t, a_field); \ -} while (0) - -#define ql_tail_remove(a_head, a_type, a_field) do { \ - a_type *t = ql_last(a_head, a_field); \ - ql_remove((a_head), t, a_field); \ -} while (0) - -#define ql_foreach(a_var, a_head, a_field) \ - qr_foreach((a_var), ql_first(a_head), a_field) - -#define ql_reverse_foreach(a_var, a_head, a_field) \ - qr_reverse_foreach((a_var), ql_first(a_head), a_field) diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/qr.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/qr.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/qr.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/qr.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,67 +0,0 @@ -/* Ring definitions. */ -#define qr(a_type) \ -struct { \ - a_type *qre_next; \ - a_type *qre_prev; \ -} - -/* Ring functions. */ -#define qr_new(a_qr, a_field) do { \ - (a_qr)->a_field.qre_next = (a_qr); \ - (a_qr)->a_field.qre_prev = (a_qr); \ -} while (0) - -#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) - -#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) - -#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ - (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ - (a_qr)->a_field.qre_next = (a_qrelm); \ - (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ - (a_qrelm)->a_field.qre_prev = (a_qr); \ -} while (0) - -#define qr_after_insert(a_qrelm, a_qr, a_field) \ - do \ - { \ - (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ - (a_qr)->a_field.qre_prev = (a_qrelm); \ - (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \ - (a_qrelm)->a_field.qre_next = (a_qr); \ - } while (0) - -#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ - void *t; \ - (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ - (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ - t = (a_qr_a)->a_field.qre_prev; \ - (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \ - (a_qr_b)->a_field.qre_prev = t; \ -} while (0) - -/* qr_meld() and qr_split() are functionally equivalent, so there's no need to - * have two copies of the code. */ -#define qr_split(a_qr_a, a_qr_b, a_field) \ - qr_meld((a_qr_a), (a_qr_b), a_field) - -#define qr_remove(a_qr, a_field) do { \ - (a_qr)->a_field.qre_prev->a_field.qre_next \ - = (a_qr)->a_field.qre_next; \ - (a_qr)->a_field.qre_next->a_field.qre_prev \ - = (a_qr)->a_field.qre_prev; \ - (a_qr)->a_field.qre_next = (a_qr); \ - (a_qr)->a_field.qre_prev = (a_qr); \ -} while (0) - -#define qr_foreach(var, a_qr, a_field) \ - for ((var) = (a_qr); \ - (var) != NULL; \ - (var) = (((var)->a_field.qre_next != (a_qr)) \ - ? (var)->a_field.qre_next : NULL)) - -#define qr_reverse_foreach(var, a_qr, a_field) \ - for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ - (var) != NULL; \ - (var) = (((var) != (a_qr)) \ - ? (var)->a_field.qre_prev : NULL)) diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/quarantine.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/quarantine.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/quarantine.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/quarantine.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,67 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct quarantine_obj_s quarantine_obj_t; -typedef struct quarantine_s quarantine_t; - -/* Default per thread quarantine size if valgrind is enabled. */ -#define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct quarantine_obj_s { - void *ptr; - size_t usize; -}; - -struct quarantine_s { - size_t curbytes; - size_t curobjs; - size_t first; -#define LG_MAXOBJS_INIT 10 - size_t lg_maxobjs; - quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -quarantine_t *quarantine_init(size_t lg_maxobjs); -void quarantine(void *ptr); -void quarantine_cleanup(void *arg); -bool quarantine_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *) - -void quarantine_alloc_hook(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_)) -malloc_tsd_externs(quarantine, quarantine_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL, - quarantine_cleanup) - -JEMALLOC_ALWAYS_INLINE void -quarantine_alloc_hook(void) -{ - quarantine_t *quarantine; - - assert(config_fill && opt_quarantine); - - quarantine = *quarantine_tsd_get(); - if (quarantine == NULL) - quarantine_init(LG_MAXOBJS_INIT); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/rb.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/rb.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/rb.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/rb.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,973 +0,0 @@ -/*- - ******************************************************************************* - * - * cpp macro implementation of left-leaning 2-3 red-black trees. Parent - * pointers are not used, and color bits are stored in the least significant - * bit of right-child pointers (if RB_COMPACT is defined), thus making node - * linkage as compact as is possible for red-black trees. - * - * Usage: - * - * #include - * #include - * #define NDEBUG // (Optional, see assert(3).) - * #include - * #define RB_COMPACT // (Optional, embed color bits in right-child pointers.) - * #include - * ... - * - ******************************************************************************* - */ - -#ifndef RB_H_ -#define RB_H_ - -#if 0 -__FBSDID("$FreeBSD: head/lib/libc/stdlib/rb.h 204493 2010-02-28 22:57:13Z jasone $"); -#endif - -#ifdef RB_COMPACT -/* Node structure. */ -#define rb_node(a_type) \ -struct { \ - a_type *rbn_left; \ - a_type *rbn_right_red; \ -} -#else -#define rb_node(a_type) \ -struct { \ - a_type *rbn_left; \ - a_type *rbn_right; \ - bool rbn_red; \ -} -#endif - -/* Root structure. */ -#define rb_tree(a_type) \ -struct { \ - a_type *rbt_root; \ - a_type rbt_nil; \ -} - -/* Left accessors. */ -#define rbtn_left_get(a_type, a_field, a_node) \ - ((a_node)->a_field.rbn_left) -#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ - (a_node)->a_field.rbn_left = a_left; \ -} while (0) - -#ifdef RB_COMPACT -/* Right accessors. */ -#define rbtn_right_get(a_type, a_field, a_node) \ - ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \ - & ((ssize_t)-2))) -#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \ - | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \ -} while (0) - -/* Color accessors. */ -#define rbtn_red_get(a_type, a_field, a_node) \ - ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \ - & ((size_t)1))) -#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \ - (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \ - | ((ssize_t)a_red)); \ -} while (0) -#define rbtn_red_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \ - (a_node)->a_field.rbn_right_red) | ((size_t)1)); \ -} while (0) -#define rbtn_black_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ - (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ -} while (0) -#else -/* Right accessors. */ -#define rbtn_right_get(a_type, a_field, a_node) \ - ((a_node)->a_field.rbn_right) -#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ - (a_node)->a_field.rbn_right = a_right; \ -} while (0) - -/* Color accessors. */ -#define rbtn_red_get(a_type, a_field, a_node) \ - ((a_node)->a_field.rbn_red) -#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ - (a_node)->a_field.rbn_red = (a_red); \ -} while (0) -#define rbtn_red_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_red = true; \ -} while (0) -#define rbtn_black_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_red = false; \ -} while (0) -#endif - -/* Node initializer. */ -#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ - rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ - rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ - rbtn_red_set(a_type, a_field, (a_node)); \ -} while (0) - -/* Tree initializer. */ -#define rb_new(a_type, a_field, a_rbt) do { \ - (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ - rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ - rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ -} while (0) - -/* Internal utility macros. */ -#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ - (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ - for (; \ - rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ - (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ - } \ - } \ -} while (0) - -#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ - (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ - for (; rbtn_right_get(a_type, a_field, (r_node)) != \ - &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ - (r_node))) { \ - } \ - } \ -} while (0) - -#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ - (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \ - rbtn_right_set(a_type, a_field, (a_node), \ - rbtn_left_get(a_type, a_field, (r_node))); \ - rbtn_left_set(a_type, a_field, (r_node), (a_node)); \ -} while (0) - -#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ - (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \ - rbtn_left_set(a_type, a_field, (a_node), \ - rbtn_right_get(a_type, a_field, (r_node))); \ - rbtn_right_set(a_type, a_field, (r_node), (a_node)); \ -} while (0) - -/* - * The rb_proto() macro generates function prototypes that correspond to the - * functions generated by an equivalently parameterized call to rb_gen(). - */ - -#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ -a_attr void \ -a_prefix##new(a_rbt_type *rbtree); \ -a_attr a_type * \ -a_prefix##first(a_rbt_type *rbtree); \ -a_attr a_type * \ -a_prefix##last(a_rbt_type *rbtree); \ -a_attr a_type * \ -a_prefix##next(a_rbt_type *rbtree, a_type *node); \ -a_attr a_type * \ -a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ -a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key); \ -a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ -a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ -a_attr void \ -a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ -a_attr void \ -a_prefix##remove(a_rbt_type *rbtree, a_type *node); \ -a_attr a_type * \ -a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ - a_rbt_type *, a_type *, void *), void *arg); \ -a_attr a_type * \ -a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); - -/* - * The rb_gen() macro generates a type-specific red-black tree implementation, - * based on the above cpp macros. - * - * Arguments: - * - * a_attr : Function attribute for generated functions (ex: static). - * a_prefix : Prefix for generated functions (ex: ex_). - * a_rb_type : Type for red-black tree data structure (ex: ex_t). - * a_type : Type for red-black tree node data structure (ex: ex_node_t). - * a_field : Name of red-black tree node linkage (ex: ex_link). - * a_cmp : Node comparison function name, with the following prototype: - * int (a_cmp *)(a_type *a_node, a_type *a_other); - * ^^^^^^ - * or a_key - * Interpretation of comparision function return values: - * -1 : a_node < a_other - * 0 : a_node == a_other - * 1 : a_node > a_other - * In all cases, the a_node or a_key macro argument is the first - * argument to the comparison function, which makes it possible - * to write comparison functions that treat the first argument - * specially. - * - * Assuming the following setup: - * - * typedef struct ex_node_s ex_node_t; - * struct ex_node_s { - * rb_node(ex_node_t) ex_link; - * }; - * typedef rb_tree(ex_node_t) ex_t; - * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp) - * - * The following API is generated: - * - * static void - * ex_new(ex_t *tree); - * Description: Initialize a red-black tree structure. - * Args: - * tree: Pointer to an uninitialized red-black tree object. - * - * static ex_node_t * - * ex_first(ex_t *tree); - * static ex_node_t * - * ex_last(ex_t *tree); - * Description: Get the first/last node in tree. - * Args: - * tree: Pointer to an initialized red-black tree object. - * Ret: First/last node in tree, or NULL if tree is empty. - * - * static ex_node_t * - * ex_next(ex_t *tree, ex_node_t *node); - * static ex_node_t * - * ex_prev(ex_t *tree, ex_node_t *node); - * Description: Get node's successor/predecessor. - * Args: - * tree: Pointer to an initialized red-black tree object. - * node: A node in tree. - * Ret: node's successor/predecessor in tree, or NULL if node is - * last/first. - * - * static ex_node_t * - * ex_search(ex_t *tree, ex_node_t *key); - * Description: Search for node that matches key. - * Args: - * tree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in tree that matches key, or NULL if no match. - * - * static ex_node_t * - * ex_nsearch(ex_t *tree, ex_node_t *key); - * static ex_node_t * - * ex_psearch(ex_t *tree, ex_node_t *key); - * Description: Search for node that matches key. If no match is found, - * return what would be key's successor/predecessor, were - * key in tree. - * Args: - * tree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in tree that matches key, or if no match, hypothetical node's - * successor/predecessor (NULL if no successor/predecessor). - * - * static void - * ex_insert(ex_t *tree, ex_node_t *node); - * Description: Insert node into tree. - * Args: - * tree: Pointer to an initialized red-black tree object. - * node: Node to be inserted into tree. - * - * static void - * ex_remove(ex_t *tree, ex_node_t *node); - * Description: Remove node from tree. - * Args: - * tree: Pointer to an initialized red-black tree object. - * node: Node in tree to be removed. - * - * static ex_node_t * - * ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, - * ex_node_t *, void *), void *arg); - * static ex_node_t * - * ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *, - * ex_node_t *, void *), void *arg); - * Description: Iterate forward/backward over tree, starting at node. If - * tree is modified, iteration must be immediately - * terminated by the callback function that causes the - * modification. - * Args: - * tree : Pointer to an initialized red-black tree object. - * start: Node at which to start iteration, or NULL to start at - * first/last node. - * cb : Callback function, which is called for each node during - * iteration. Under normal circumstances the callback function - * should return NULL, which causes iteration to continue. If a - * callback function returns non-NULL, iteration is immediately - * terminated and the non-NULL return value is returned by the - * iterator. This is useful for re-starting iteration after - * modifying tree. - * arg : Opaque pointer passed to cb(). - * Ret: NULL if iteration completed, or the non-NULL callback return value - * that caused termination of the iteration. - */ -#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ -a_attr void \ -a_prefix##new(a_rbt_type *rbtree) { \ - rb_new(a_type, a_field, rbtree); \ -} \ -a_attr a_type * \ -a_prefix##first(a_rbt_type *rbtree) { \ - a_type *ret; \ - rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##last(a_rbt_type *rbtree) { \ - a_type *ret; \ - rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ - a_type *ret; \ - if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ - rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ - a_field, node), ret); \ - } else { \ - a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ - while (true) { \ - int cmp = (a_cmp)(node, tnode); \ - if (cmp < 0) { \ - ret = tnode; \ - tnode = rbtn_left_get(a_type, a_field, tnode); \ - } else if (cmp > 0) { \ - tnode = rbtn_right_get(a_type, a_field, tnode); \ - } else { \ - break; \ - } \ - assert(tnode != &rbtree->rbt_nil); \ - } \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ - a_type *ret; \ - if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ - rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ - a_field, node), ret); \ - } else { \ - a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ - while (true) { \ - int cmp = (a_cmp)(node, tnode); \ - if (cmp < 0) { \ - tnode = rbtn_left_get(a_type, a_field, tnode); \ - } else if (cmp > 0) { \ - ret = tnode; \ - tnode = rbtn_right_get(a_type, a_field, tnode); \ - } else { \ - break; \ - } \ - assert(tnode != &rbtree->rbt_nil); \ - } \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ - a_type *ret; \ - int cmp; \ - ret = rbtree->rbt_root; \ - while (ret != &rbtree->rbt_nil \ - && (cmp = (a_cmp)(key, ret)) != 0) { \ - if (cmp < 0) { \ - ret = rbtn_left_get(a_type, a_field, ret); \ - } else { \ - ret = rbtn_right_get(a_type, a_field, ret); \ - } \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ - a_type *ret; \ - a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ - int cmp = (a_cmp)(key, tnode); \ - if (cmp < 0) { \ - ret = tnode; \ - tnode = rbtn_left_get(a_type, a_field, tnode); \ - } else if (cmp > 0) { \ - tnode = rbtn_right_get(a_type, a_field, tnode); \ - } else { \ - ret = tnode; \ - break; \ - } \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ - a_type *ret; \ - a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ - int cmp = (a_cmp)(key, tnode); \ - if (cmp < 0) { \ - tnode = rbtn_left_get(a_type, a_field, tnode); \ - } else if (cmp > 0) { \ - ret = tnode; \ - tnode = rbtn_right_get(a_type, a_field, tnode); \ - } else { \ - ret = tnode; \ - break; \ - } \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ -} \ -a_attr void \ -a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ - struct { \ - a_type *node; \ - int cmp; \ - } path[sizeof(void *) << 4], *pathp; \ - rbt_node_new(a_type, a_field, rbtree, node); \ - /* Wind. */ \ - path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ - int cmp = pathp->cmp = a_cmp(node, pathp->node); \ - assert(cmp != 0); \ - if (cmp < 0) { \ - pathp[1].node = rbtn_left_get(a_type, a_field, \ - pathp->node); \ - } else { \ - pathp[1].node = rbtn_right_get(a_type, a_field, \ - pathp->node); \ - } \ - } \ - pathp->node = node; \ - /* Unwind. */ \ - for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ - a_type *cnode = pathp->node; \ - if (pathp->cmp < 0) { \ - a_type *left = pathp[1].node; \ - rbtn_left_set(a_type, a_field, cnode, left); \ - if (rbtn_red_get(a_type, a_field, left)) { \ - a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ - /* Fix up 4-node. */ \ - a_type *tnode; \ - rbtn_black_set(a_type, a_field, leftleft); \ - rbtn_rotate_right(a_type, a_field, cnode, tnode); \ - cnode = tnode; \ - } \ - } else { \ - return; \ - } \ - } else { \ - a_type *right = pathp[1].node; \ - rbtn_right_set(a_type, a_field, cnode, right); \ - if (rbtn_red_get(a_type, a_field, right)) { \ - a_type *left = rbtn_left_get(a_type, a_field, cnode); \ - if (rbtn_red_get(a_type, a_field, left)) { \ - /* Split 4-node. */ \ - rbtn_black_set(a_type, a_field, left); \ - rbtn_black_set(a_type, a_field, right); \ - rbtn_red_set(a_type, a_field, cnode); \ - } else { \ - /* Lean left. */ \ - a_type *tnode; \ - bool tred = rbtn_red_get(a_type, a_field, cnode); \ - rbtn_rotate_left(a_type, a_field, cnode, tnode); \ - rbtn_color_set(a_type, a_field, tnode, tred); \ - rbtn_red_set(a_type, a_field, cnode); \ - cnode = tnode; \ - } \ - } else { \ - return; \ - } \ - } \ - pathp->node = cnode; \ - } \ - /* Set root, and make it black. */ \ - rbtree->rbt_root = path->node; \ - rbtn_black_set(a_type, a_field, rbtree->rbt_root); \ -} \ -a_attr void \ -a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ - struct { \ - a_type *node; \ - int cmp; \ - } *pathp, *nodep, path[sizeof(void *) << 4]; \ - /* Wind. */ \ - nodep = NULL; /* Silence compiler warning. */ \ - path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ - int cmp = pathp->cmp = a_cmp(node, pathp->node); \ - if (cmp < 0) { \ - pathp[1].node = rbtn_left_get(a_type, a_field, \ - pathp->node); \ - } else { \ - pathp[1].node = rbtn_right_get(a_type, a_field, \ - pathp->node); \ - if (cmp == 0) { \ - /* Find node's successor, in preparation for swap. */ \ - pathp->cmp = 1; \ - nodep = pathp; \ - for (pathp++; pathp->node != &rbtree->rbt_nil; \ - pathp++) { \ - pathp->cmp = -1; \ - pathp[1].node = rbtn_left_get(a_type, a_field, \ - pathp->node); \ - } \ - break; \ - } \ - } \ - } \ - assert(nodep->node == node); \ - pathp--; \ - if (pathp->node != node) { \ - /* Swap node with its successor. */ \ - bool tred = rbtn_red_get(a_type, a_field, pathp->node); \ - rbtn_color_set(a_type, a_field, pathp->node, \ - rbtn_red_get(a_type, a_field, node)); \ - rbtn_left_set(a_type, a_field, pathp->node, \ - rbtn_left_get(a_type, a_field, node)); \ - /* If node's successor is its right child, the following code */\ - /* will do the wrong thing for the right child pointer. */\ - /* However, it doesn't matter, because the pointer will be */\ - /* properly set when the successor is pruned. */\ - rbtn_right_set(a_type, a_field, pathp->node, \ - rbtn_right_get(a_type, a_field, node)); \ - rbtn_color_set(a_type, a_field, node, tred); \ - /* The pruned leaf node's child pointers are never accessed */\ - /* again, so don't bother setting them to nil. */\ - nodep->node = pathp->node; \ - pathp->node = node; \ - if (nodep == path) { \ - rbtree->rbt_root = nodep->node; \ - } else { \ - if (nodep[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, nodep[-1].node, \ - nodep->node); \ - } else { \ - rbtn_right_set(a_type, a_field, nodep[-1].node, \ - nodep->node); \ - } \ - } \ - } else { \ - a_type *left = rbtn_left_get(a_type, a_field, node); \ - if (left != &rbtree->rbt_nil) { \ - /* node has no successor, but it has a left child. */\ - /* Splice node out, without losing the left child. */\ - assert(rbtn_red_get(a_type, a_field, node) == false); \ - assert(rbtn_red_get(a_type, a_field, left)); \ - rbtn_black_set(a_type, a_field, left); \ - if (pathp == path) { \ - rbtree->rbt_root = left; \ - } else { \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - left); \ - } else { \ - rbtn_right_set(a_type, a_field, pathp[-1].node, \ - left); \ - } \ - } \ - return; \ - } else if (pathp == path) { \ - /* The tree only contained one node. */ \ - rbtree->rbt_root = &rbtree->rbt_nil; \ - return; \ - } \ - } \ - if (rbtn_red_get(a_type, a_field, pathp->node)) { \ - /* Prune red node, which requires no fixup. */ \ - assert(pathp[-1].cmp < 0); \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - &rbtree->rbt_nil); \ - return; \ - } \ - /* The node to be pruned is black, so unwind until balance is */\ - /* restored. */\ - pathp->node = &rbtree->rbt_nil; \ - for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ - assert(pathp->cmp != 0); \ - if (pathp->cmp < 0) { \ - rbtn_left_set(a_type, a_field, pathp->node, \ - pathp[1].node); \ - assert(rbtn_red_get(a_type, a_field, pathp[1].node) \ - == false); \ - if (rbtn_red_get(a_type, a_field, pathp->node)) { \ - a_type *right = rbtn_right_get(a_type, a_field, \ - pathp->node); \ - a_type *rightleft = rbtn_left_get(a_type, a_field, \ - right); \ - a_type *tnode; \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ - /* In the following diagrams, ||, //, and \\ */\ - /* indicate the path to the removed node. */\ - /* */\ - /* || */\ - /* pathp(r) */\ - /* // \ */\ - /* (b) (b) */\ - /* / */\ - /* (r) */\ - /* */\ - rbtn_black_set(a_type, a_field, pathp->node); \ - rbtn_rotate_right(a_type, a_field, right, tnode); \ - rbtn_right_set(a_type, a_field, pathp->node, tnode);\ - rbtn_rotate_left(a_type, a_field, pathp->node, \ - tnode); \ - } else { \ - /* || */\ - /* pathp(r) */\ - /* // \ */\ - /* (b) (b) */\ - /* / */\ - /* (b) */\ - /* */\ - rbtn_rotate_left(a_type, a_field, pathp->node, \ - tnode); \ - } \ - /* Balance restored, but rotation modified subtree */\ - /* root. */\ - assert((uintptr_t)pathp > (uintptr_t)path); \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } else { \ - rbtn_right_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } \ - return; \ - } else { \ - a_type *right = rbtn_right_get(a_type, a_field, \ - pathp->node); \ - a_type *rightleft = rbtn_left_get(a_type, a_field, \ - right); \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ - /* || */\ - /* pathp(b) */\ - /* // \ */\ - /* (b) (b) */\ - /* / */\ - /* (r) */\ - a_type *tnode; \ - rbtn_black_set(a_type, a_field, rightleft); \ - rbtn_rotate_right(a_type, a_field, right, tnode); \ - rbtn_right_set(a_type, a_field, pathp->node, tnode);\ - rbtn_rotate_left(a_type, a_field, pathp->node, \ - tnode); \ - /* Balance restored, but rotation modified */\ - /* subree root, which may actually be the tree */\ - /* root. */\ - if (pathp == path) { \ - /* Set root. */ \ - rbtree->rbt_root = tnode; \ - } else { \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, \ - pathp[-1].node, tnode); \ - } else { \ - rbtn_right_set(a_type, a_field, \ - pathp[-1].node, tnode); \ - } \ - } \ - return; \ - } else { \ - /* || */\ - /* pathp(b) */\ - /* // \ */\ - /* (b) (b) */\ - /* / */\ - /* (b) */\ - a_type *tnode; \ - rbtn_red_set(a_type, a_field, pathp->node); \ - rbtn_rotate_left(a_type, a_field, pathp->node, \ - tnode); \ - pathp->node = tnode; \ - } \ - } \ - } else { \ - a_type *left; \ - rbtn_right_set(a_type, a_field, pathp->node, \ - pathp[1].node); \ - left = rbtn_left_get(a_type, a_field, pathp->node); \ - if (rbtn_red_get(a_type, a_field, left)) { \ - a_type *tnode; \ - a_type *leftright = rbtn_right_get(a_type, a_field, \ - left); \ - a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ - leftright); \ - if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ - /* || */\ - /* pathp(b) */\ - /* / \\ */\ - /* (r) (b) */\ - /* \ */\ - /* (b) */\ - /* / */\ - /* (r) */\ - a_type *unode; \ - rbtn_black_set(a_type, a_field, leftrightleft); \ - rbtn_rotate_right(a_type, a_field, pathp->node, \ - unode); \ - rbtn_rotate_right(a_type, a_field, pathp->node, \ - tnode); \ - rbtn_right_set(a_type, a_field, unode, tnode); \ - rbtn_rotate_left(a_type, a_field, unode, tnode); \ - } else { \ - /* || */\ - /* pathp(b) */\ - /* / \\ */\ - /* (r) (b) */\ - /* \ */\ - /* (b) */\ - /* / */\ - /* (b) */\ - assert(leftright != &rbtree->rbt_nil); \ - rbtn_red_set(a_type, a_field, leftright); \ - rbtn_rotate_right(a_type, a_field, pathp->node, \ - tnode); \ - rbtn_black_set(a_type, a_field, tnode); \ - } \ - /* Balance restored, but rotation modified subtree */\ - /* root, which may actually be the tree root. */\ - if (pathp == path) { \ - /* Set root. */ \ - rbtree->rbt_root = tnode; \ - } else { \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } else { \ - rbtn_right_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } \ - } \ - return; \ - } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ - a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ - /* || */\ - /* pathp(r) */\ - /* / \\ */\ - /* (b) (b) */\ - /* / */\ - /* (r) */\ - a_type *tnode; \ - rbtn_black_set(a_type, a_field, pathp->node); \ - rbtn_red_set(a_type, a_field, left); \ - rbtn_black_set(a_type, a_field, leftleft); \ - rbtn_rotate_right(a_type, a_field, pathp->node, \ - tnode); \ - /* Balance restored, but rotation modified */\ - /* subtree root. */\ - assert((uintptr_t)pathp > (uintptr_t)path); \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } else { \ - rbtn_right_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } \ - return; \ - } else { \ - /* || */\ - /* pathp(r) */\ - /* / \\ */\ - /* (b) (b) */\ - /* / */\ - /* (b) */\ - rbtn_red_set(a_type, a_field, left); \ - rbtn_black_set(a_type, a_field, pathp->node); \ - /* Balance restored. */ \ - return; \ - } \ - } else { \ - a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ - /* || */\ - /* pathp(b) */\ - /* / \\ */\ - /* (b) (b) */\ - /* / */\ - /* (r) */\ - a_type *tnode; \ - rbtn_black_set(a_type, a_field, leftleft); \ - rbtn_rotate_right(a_type, a_field, pathp->node, \ - tnode); \ - /* Balance restored, but rotation modified */\ - /* subtree root, which may actually be the tree */\ - /* root. */\ - if (pathp == path) { \ - /* Set root. */ \ - rbtree->rbt_root = tnode; \ - } else { \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, \ - pathp[-1].node, tnode); \ - } else { \ - rbtn_right_set(a_type, a_field, \ - pathp[-1].node, tnode); \ - } \ - } \ - return; \ - } else { \ - /* || */\ - /* pathp(b) */\ - /* / \\ */\ - /* (b) (b) */\ - /* / */\ - /* (b) */\ - rbtn_red_set(a_type, a_field, left); \ - } \ - } \ - } \ - } \ - /* Set root. */ \ - rbtree->rbt_root = path->node; \ - assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \ -} \ -a_attr a_type * \ -a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ - } else { \ - a_type *ret; \ - if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ - a_field, node), cb, arg)) != &rbtree->rbt_nil \ - || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ - } \ -} \ -a_attr a_type * \ -a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - int cmp = a_cmp(start, node); \ - if (cmp < 0) { \ - a_type *ret; \ - if ((ret = a_prefix##iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ - } else if (cmp > 0) { \ - return (a_prefix##iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)); \ - } else { \ - a_type *ret; \ - if ((ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ - } \ -} \ -a_attr a_type * \ -a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ - a_rbt_type *, a_type *, void *), void *arg) { \ - a_type *ret; \ - if (start != NULL) { \ - ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root, \ - cb, arg); \ - } else { \ - ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ - } else { \ - a_type *ret; \ - if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ - } \ -} \ -a_attr a_type * \ -a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ - a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \ - void *arg) { \ - int cmp = a_cmp(start, node); \ - if (cmp > 0) { \ - a_type *ret; \ - if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ - } else if (cmp < 0) { \ - return (a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ - } else { \ - a_type *ret; \ - if ((ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ - } \ -} \ -a_attr a_type * \ -a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - a_type *ret; \ - if (start != NULL) { \ - ret = a_prefix##reverse_iter_start(rbtree, start, \ - rbtree->rbt_root, cb, arg); \ - } else { \ - ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ - cb, arg); \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ -} - -#endif /* RB_H_ */ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/rtree.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/rtree.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/rtree.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/rtree.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,164 +0,0 @@ -/* - * This radix tree implementation is tailored to the singular purpose of - * tracking which chunks are currently owned by jemalloc. This functionality - * is mandatory for OS X, where jemalloc must be able to respond to object - * ownership queries. - * - ******************************************************************************* - */ -#ifdef JEMALLOC_H_TYPES - -typedef struct rtree_s rtree_t; - -/* - * Size of each radix tree node (must be a power of 2). This impacts tree - * depth. - */ -#if (LG_SIZEOF_PTR == 2) -# define RTREE_NODESIZE (1U << 14) -#else -# define RTREE_NODESIZE CACHELINE -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct rtree_s { - malloc_mutex_t mutex; - void **root; - unsigned height; - unsigned level2bits[1]; /* Dynamically sized. */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -rtree_t *rtree_new(unsigned bits); -void rtree_prefork(rtree_t *rtree); -void rtree_postfork_parent(rtree_t *rtree); -void rtree_postfork_child(rtree_t *rtree); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -#ifndef JEMALLOC_DEBUG -void *rtree_get_locked(rtree_t *rtree, uintptr_t key); -#endif -void *rtree_get(rtree_t *rtree, uintptr_t key); -bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -#define RTREE_GET_GENERATE(f) \ -/* The least significant bits of the key are ignored. */ \ -JEMALLOC_INLINE void * \ -f(rtree_t *rtree, uintptr_t key) \ -{ \ - void *ret; \ - uintptr_t subkey; \ - unsigned i, lshift, height, bits; \ - void **node, **child; \ - \ - RTREE_LOCK(&rtree->mutex); \ - for (i = lshift = 0, height = rtree->height, node = rtree->root;\ - i < height - 1; \ - i++, lshift += bits, node = child) { \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ - 3)) - bits); \ - child = (void**)node[subkey]; \ - if (child == NULL) { \ - RTREE_UNLOCK(&rtree->mutex); \ - return (NULL); \ - } \ - } \ - \ - /* \ - * node is a leaf, so it contains values rather than node \ - * pointers. \ - */ \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ - bits); \ - ret = node[subkey]; \ - RTREE_UNLOCK(&rtree->mutex); \ - \ - RTREE_GET_VALIDATE \ - return (ret); \ -} - -#ifdef JEMALLOC_DEBUG -# define RTREE_LOCK(l) malloc_mutex_lock(l) -# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) -# define RTREE_GET_VALIDATE -RTREE_GET_GENERATE(rtree_get_locked) -# undef RTREE_LOCK -# undef RTREE_UNLOCK -# undef RTREE_GET_VALIDATE -#endif - -#define RTREE_LOCK(l) -#define RTREE_UNLOCK(l) -#ifdef JEMALLOC_DEBUG - /* - * Suppose that it were possible for a jemalloc-allocated chunk to be - * munmap()ped, followed by a different allocator in another thread re-using - * overlapping virtual memory, all without invalidating the cached rtree - * value. The result would be a false positive (the rtree would claim that - * jemalloc owns memory that it had actually discarded). This scenario - * seems impossible, but the following assertion is a prudent sanity check. - */ -# define RTREE_GET_VALIDATE \ - assert(rtree_get_locked(rtree, key) == ret); -#else -# define RTREE_GET_VALIDATE -#endif -RTREE_GET_GENERATE(rtree_get) -#undef RTREE_LOCK -#undef RTREE_UNLOCK -#undef RTREE_GET_VALIDATE - -JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, void *val) -{ - uintptr_t subkey; - unsigned i, lshift, height, bits; - void **node, **child; - - malloc_mutex_lock(&rtree->mutex); - for (i = lshift = 0, height = rtree->height, node = rtree->root; - i < height - 1; - i++, lshift += bits, node = child) { - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - - bits); - child = (void**)node[subkey]; - if (child == NULL) { - child = (void**)base_alloc(sizeof(void *) << - rtree->level2bits[i+1]); - if (child == NULL) { - malloc_mutex_unlock(&rtree->mutex); - return (true); - } - memset(child, 0, sizeof(void *) << - rtree->level2bits[i+1]); - node[subkey] = child; - } - } - - /* node is a leaf, so it contains values rather than node pointers. */ - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); - node[subkey] = val; - malloc_mutex_unlock(&rtree->mutex); - - return (false); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/size_classes.sh mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/size_classes.sh --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/size_classes.sh 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/size_classes.sh 1970-01-01 00:00:00.000000000 +0000 @@ -1,122 +0,0 @@ -#!/bin/sh - -# The following limits are chosen such that they cover all supported platforms. - -# Range of quanta. -lg_qmin=3 -lg_qmax=4 - -# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)]. -lg_tmin=3 - -# Range of page sizes. -lg_pmin=12 -lg_pmax=16 - -pow2() { - e=$1 - pow2_result=1 - while [ ${e} -gt 0 ] ; do - pow2_result=$((${pow2_result} + ${pow2_result})) - e=$((${e} - 1)) - done -} - -cat < 255) -# error "Too many small size classes" -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ -EOF diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/stats.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/stats.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/stats.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/stats.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,173 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct tcache_bin_stats_s tcache_bin_stats_t; -typedef struct malloc_bin_stats_s malloc_bin_stats_t; -typedef struct malloc_large_stats_s malloc_large_stats_t; -typedef struct arena_stats_s arena_stats_t; -typedef struct chunk_stats_s chunk_stats_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct tcache_bin_stats_s { - /* - * Number of allocation requests that corresponded to the size of this - * bin. - */ - uint64_t nrequests; -}; - -struct malloc_bin_stats_s { - /* - * Current number of bytes allocated, including objects currently - * cached by tcache. - */ - size_t allocated; - - /* - * Total number of allocation/deallocation requests served directly by - * the bin. Note that tcache may allocate an object, then recycle it - * many times, resulting many increments to nrequests, but only one - * each to nmalloc and ndalloc. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* - * Number of allocation requests that correspond to the size of this - * bin. This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - uint64_t nrequests; - - /* Number of tcache fills from this bin. */ - uint64_t nfills; - - /* Number of tcache flushes to this bin. */ - uint64_t nflushes; - - /* Total number of runs created for this bin's size class. */ - uint64_t nruns; - - /* - * Total number of runs reused by extracting them from the runs tree for - * this bin's size class. - */ - uint64_t reruns; - - /* Current number of runs in this bin. */ - size_t curruns; -}; - -struct malloc_large_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the arena. Note that tcache may allocate an object, then recycle it - * many times, resulting many increments to nrequests, but only one - * each to nmalloc and ndalloc. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* - * Number of allocation requests that correspond to this size class. - * This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - uint64_t nrequests; - - /* Current number of runs of this size class. */ - size_t curruns; -}; - -struct arena_stats_s { - /* Number of bytes currently mapped. */ - size_t mapped; - - /* - * Total number of purge sweeps, total number of madvise calls made, - * and total pages purged in order to keep dirty unused memory under - * control. - */ - uint64_t npurge; - uint64_t nmadvise; - uint64_t purged; - - /* Per-size-category statistics. */ - size_t allocated_large; - uint64_t nmalloc_large; - uint64_t ndalloc_large; - uint64_t nrequests_large; - - /* - * One element for each possible size class, including sizes that - * overlap with bin size classes. This is necessary because ipalloc() - * sometimes has to use such large objects in order to assure proper - * alignment. - */ - malloc_large_stats_t *lstats; -}; - -struct chunk_stats_s { - /* Number of chunks that were allocated. */ - uint64_t nchunks; - - /* High-water mark for number of chunks allocated. */ - size_t highchunks; - - /* - * Current number of chunks allocated. This value isn't maintained for - * any other purpose, so keep track of it in order to be able to set - * highchunks. - */ - size_t curchunks; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_stats_print; - -extern size_t stats_cactive; - -void stats_print(void (*write)(void *, const char *), void *cbopaque, - const char *opts); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -size_t stats_cactive_get(void); -void stats_cactive_add(size_t size); -void stats_cactive_sub(size_t size); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_)) -JEMALLOC_INLINE size_t -stats_cactive_get(void) -{ - - return (atomic_read_z(&stats_cactive)); -} - -JEMALLOC_INLINE void -stats_cactive_add(size_t size) -{ - - atomic_add_z(&stats_cactive, size); -} - -JEMALLOC_INLINE void -stats_cactive_sub(size_t size) -{ - - atomic_sub_z(&stats_cactive, size); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/tcache.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/tcache.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/tcache.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/tcache.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,442 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct tcache_bin_info_s tcache_bin_info_t; -typedef struct tcache_bin_s tcache_bin_t; -typedef struct tcache_s tcache_t; - -/* - * tcache pointers close to NULL are used to encode state information that is - * used for two purposes: preventing thread caching on a per thread basis and - * cleaning up during thread shutdown. - */ -#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) -#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) -#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) -#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY - -/* - * Absolute maximum number of cache slots for each small bin in the thread - * cache. This is an additional constraint beyond that imposed as: twice the - * number of regions per run for this size class. - * - * This constant must be an even number. - */ -#define TCACHE_NSLOTS_SMALL_MAX 200 - -/* Number of cache slots for large size classes. */ -#define TCACHE_NSLOTS_LARGE 20 - -/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ -#define LG_TCACHE_MAXCLASS_DEFAULT 15 - -/* - * TCACHE_GC_SWEEP is the approximate number of allocation events between - * full GC sweeps. Integer rounding may cause the actual number to be - * slightly higher, since GC is performed incrementally. - */ -#define TCACHE_GC_SWEEP 8192 - -/* Number of tcache allocation/deallocation events between incremental GCs. */ -#define TCACHE_GC_INCR \ - ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -typedef enum { - tcache_enabled_false = 0, /* Enable cast to/from bool. */ - tcache_enabled_true = 1, - tcache_enabled_default = 2 -} tcache_enabled_t; - -/* - * Read-only information associated with each element of tcache_t's tbins array - * is stored separately, mainly to reduce memory usage. - */ -struct tcache_bin_info_s { - unsigned ncached_max; /* Upper limit on ncached. */ -}; - -struct tcache_bin_s { - tcache_bin_stats_t tstats; - int low_water; /* Min # cached since last GC. */ - unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ - unsigned ncached; /* # of cached objects. */ - void **avail; /* Stack of available objects. */ -}; - -struct tcache_s { - ql_elm(tcache_t) link; /* Used for aggregating stats. */ - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ - arena_t *arena; /* This thread's arena. */ - unsigned ev_cnt; /* Event count since incremental GC. */ - unsigned next_gc_bin; /* Next bin to GC. */ - tcache_bin_t tbins[1]; /* Dynamically sized. */ - /* - * The pointer stacks associated with tbins follow as a contiguous - * array. During tcache initialization, the avail pointer in each - * element of tbins is initialized to point to the proper offset within - * this array. - */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_tcache; -extern ssize_t opt_lg_tcache_max; - -extern tcache_bin_info_t *tcache_bin_info; - -/* - * Number of tcache bins. There are NBINS small-object bins, plus 0 or more - * large-object bins. - */ -extern size_t nhbins; - -/* Maximum cached size class. */ -extern size_t tcache_maxclass; - -size_t tcache_salloc(const void *ptr); -void tcache_event_hard(tcache_t *tcache); -void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, - size_t binind); -void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, - tcache_t *tcache); -void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, - tcache_t *tcache); -void tcache_arena_associate(tcache_t *tcache, arena_t *arena); -void tcache_arena_dissociate(tcache_t *tcache); -tcache_t *tcache_create(arena_t *arena); -void tcache_destroy(tcache_t *tcache); -void tcache_thread_cleanup(void *arg); -void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -bool tcache_boot0(void); -bool tcache_boot1(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) -malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) - -void tcache_event(tcache_t *tcache); -void tcache_flush(void); -bool tcache_enabled_get(void); -tcache_t *tcache_get(bool create); -void tcache_enabled_set(bool enabled); -void *tcache_alloc_easy(tcache_bin_t *tbin); -void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); -void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); -void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind); -void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) -/* Map of thread-specific caches. */ -malloc_tsd_externs(tcache, tcache_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL, - tcache_thread_cleanup) -/* Per thread flag that allows thread caches to be disabled. */ -malloc_tsd_externs(tcache_enabled, tcache_enabled_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t, - tcache_enabled_default, malloc_tsd_no_cleanup) - -JEMALLOC_INLINE void -tcache_flush(void) -{ - tcache_t *tcache; - - cassert(config_tcache); - - tcache = *tcache_tsd_get(); - if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) - return; - tcache_destroy(tcache); - tcache = NULL; - tcache_tsd_set(&tcache); -} - -JEMALLOC_INLINE bool -tcache_enabled_get(void) -{ - tcache_enabled_t tcache_enabled; - - cassert(config_tcache); - - tcache_enabled = *tcache_enabled_tsd_get(); - if (tcache_enabled == tcache_enabled_default) { - tcache_enabled = (tcache_enabled_t)opt_tcache; - tcache_enabled_tsd_set(&tcache_enabled); - } - - return ((bool)tcache_enabled); -} - -JEMALLOC_INLINE void -tcache_enabled_set(bool enabled) -{ - tcache_enabled_t tcache_enabled; - tcache_t *tcache; - - cassert(config_tcache); - - tcache_enabled = (tcache_enabled_t)enabled; - tcache_enabled_tsd_set(&tcache_enabled); - tcache = *tcache_tsd_get(); - if (enabled) { - if (tcache == TCACHE_STATE_DISABLED) { - tcache = NULL; - tcache_tsd_set(&tcache); - } - } else /* disabled */ { - if (tcache > TCACHE_STATE_MAX) { - tcache_destroy(tcache); - tcache = NULL; - } - if (tcache == NULL) { - tcache = TCACHE_STATE_DISABLED; - tcache_tsd_set(&tcache); - } - } -} - -JEMALLOC_ALWAYS_INLINE tcache_t * -tcache_get(bool create) -{ - tcache_t *tcache; - - if (config_tcache == false) - return (NULL); - if (config_lazy_lock && isthreaded == false) - return (NULL); - - tcache = *tcache_tsd_get(); - if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) { - if (tcache == TCACHE_STATE_DISABLED) - return (NULL); - if (tcache == NULL) { - if (create == false) { - /* - * Creating a tcache here would cause - * allocation as a side effect of free(). - * Ordinarily that would be okay since - * tcache_create() failure is a soft failure - * that doesn't propagate. However, if TLS - * data are freed via free() as in glibc, - * subtle corruption could result from setting - * a TLS variable after its backing memory is - * freed. - */ - return (NULL); - } - if (tcache_enabled_get() == false) { - tcache_enabled_set(false); /* Memoize. */ - return (NULL); - } - return (tcache_create(choose_arena(NULL))); - } - if (tcache == TCACHE_STATE_PURGATORY) { - /* - * Make a note that an allocator function was called - * after tcache_thread_cleanup() was called. - */ - tcache = TCACHE_STATE_REINCARNATED; - tcache_tsd_set(&tcache); - return (NULL); - } - if (tcache == TCACHE_STATE_REINCARNATED) - return (NULL); - not_reached(); - } - - return (tcache); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_event(tcache_t *tcache) -{ - - if (TCACHE_GC_INCR == 0) - return; - - tcache->ev_cnt++; - assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (tcache->ev_cnt == TCACHE_GC_INCR) - tcache_event_hard(tcache); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin) -{ - void *ret; - - if (tbin->ncached == 0) { - tbin->low_water = -1; - return (NULL); - } - tbin->ncached--; - if ((int)tbin->ncached < tbin->low_water) - tbin->low_water = tbin->ncached; - ret = tbin->avail[tbin->ncached]; - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) -{ - void *ret; - size_t binind; - tcache_bin_t *tbin; - - binind = SMALL_SIZE2BIN(size); - assert(binind < NBINS); - tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin); - if (ret == NULL) { - ret = tcache_alloc_small_hard(tcache, tbin, binind); - if (ret == NULL) - return (NULL); - } - assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size); - - if (zero == false) { - if (config_fill) { - if (opt_junk) { - arena_alloc_junk_small(ret, - &arena_bin_info[binind], false); - } else if (opt_zero) - memset(ret, 0, size); - } - } else { - if (config_fill && opt_junk) { - arena_alloc_junk_small(ret, &arena_bin_info[binind], - true); - } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); - } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - - if (config_stats) - tbin->tstats.nrequests++; - if (config_prof) - tcache->prof_accumbytes += arena_bin_info[binind].reg_size; - tcache_event(tcache); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) -{ - void *ret; - size_t binind; - tcache_bin_t *tbin; - - size = PAGE_CEILING(size); - assert(size <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; - assert(binind < nhbins); - tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin); - if (ret == NULL) { - /* - * Only allocate one large object at a time, because it's quite - * expensive to create one and not use it. - */ - ret = arena_malloc_large(tcache->arena, size, zero); - if (ret == NULL) - return (NULL); - } else { - if (config_prof && prof_promote && size == PAGE) { - arena_chunk_t *chunk = - (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> - LG_PAGE); - arena_mapbits_large_binind_set(chunk, pageind, - BININD_INVALID); - } - if (zero == false) { - if (config_fill) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); - } - } else { - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); - } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - - if (config_stats) - tbin->tstats.nrequests++; - if (config_prof) - tcache->prof_accumbytes += size; - } - - tcache_event(tcache); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) -{ - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; - - assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - - if (config_fill && opt_junk) - arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); - - tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { - tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> - 1), tcache); - } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; - tbin->ncached++; - - tcache_event(tcache); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) -{ - size_t binind; - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; - - assert((size & PAGE_MASK) == 0); - assert(tcache_salloc(ptr) > SMALL_MAXCLASS); - assert(tcache_salloc(ptr) <= tcache_maxclass); - - binind = NBINS + (size >> LG_PAGE) - 1; - - if (config_fill && opt_junk) - memset(ptr, 0x5a, size); - - tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { - tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> - 1), tcache); - } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; - tbin->ncached++; - - tcache_event(tcache); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/tsd.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/tsd.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/tsd.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/tsd.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,397 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* Maximum number of malloc_tsd users with cleanup functions. */ -#define MALLOC_TSD_CLEANUPS_MAX 8 - -typedef bool (*malloc_tsd_cleanup_t)(void); - -/* - * TLS/TSD-agnostic macro-based implementation of thread-specific data. There - * are four macros that support (at least) three use cases: file-private, - * library-private, and library-private inlined. Following is an example - * library-private tsd variable: - * - * In example.h: - * typedef struct { - * int x; - * int y; - * } example_t; - * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) - * malloc_tsd_protos(, example, example_t *) - * malloc_tsd_externs(example, example_t *) - * In example.c: - * malloc_tsd_data(, example, example_t *, EX_INITIALIZER) - * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER, - * example_tsd_cleanup) - * - * The result is a set of generated functions, e.g.: - * - * bool example_tsd_boot(void) {...} - * example_t **example_tsd_get() {...} - * void example_tsd_set(example_t **val) {...} - * - * Note that all of the functions deal in terms of (a_type *) rather than - * (a_type) so that it is possible to support non-pointer types (unlike - * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is - * cast to (void *). This means that the cleanup function needs to cast *and* - * dereference the function argument, e.g.: - * - * void - * example_tsd_cleanup(void *arg) - * { - * example_t *example = *(example_t **)arg; - * - * [...] - * if ([want the cleanup function to be called again]) { - * example_tsd_set(&example); - * } - * } - * - * If example_tsd_set() is called within example_tsd_cleanup(), it will be - * called again. This is similar to how pthreads TSD destruction works, except - * that pthreads only calls the cleanup function again if the value was set to - * non-NULL. - */ - -/* malloc_tsd_protos(). */ -#define malloc_tsd_protos(a_attr, a_name, a_type) \ -a_attr bool \ -a_name##_tsd_boot(void); \ -a_attr a_type * \ -a_name##_tsd_get(void); \ -a_attr void \ -a_name##_tsd_set(a_type *val); - -/* malloc_tsd_externs(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##_tls; \ -extern __thread bool a_name##_initialized; \ -extern bool a_name##_booted; -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##_tls; \ -extern pthread_key_t a_name##_tsd; \ -extern bool a_name##_booted; -#elif (defined(_WIN32)) -#define malloc_tsd_externs(a_name, a_type) \ -extern DWORD a_name##_tsd; \ -extern bool a_name##_booted; -#else -#define malloc_tsd_externs(a_name, a_type) \ -extern pthread_key_t a_name##_tsd; \ -extern bool a_name##_booted; -#endif - -/* malloc_tsd_data(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##_tls = a_initializer; \ -a_attr __thread bool JEMALLOC_TLS_MODEL \ - a_name##_initialized = false; \ -a_attr bool a_name##_booted = false; -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##_tls = a_initializer; \ -a_attr pthread_key_t a_name##_tsd; \ -a_attr bool a_name##_booted = false; -#elif (defined(_WIN32)) -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr DWORD a_name##_tsd; \ -a_attr bool a_name##_booted = false; -#else -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr pthread_key_t a_name##_tsd; \ -a_attr bool a_name##_booted = false; -#endif - -/* malloc_tsd_funcs(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##_tsd_cleanup_wrapper(void) \ -{ \ - \ - if (a_name##_initialized) { \ - a_name##_initialized = false; \ - a_cleanup(&a_name##_tls); \ - } \ - return (a_name##_initialized); \ -} \ -a_attr bool \ -a_name##_tsd_boot(void) \ -{ \ - \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_wrapper); \ - } \ - a_name##_booted = true; \ - return (false); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##_tsd_get(void) \ -{ \ - \ - assert(a_name##_booted); \ - return (&a_name##_tls); \ -} \ -a_attr void \ -a_name##_tsd_set(a_type *val) \ -{ \ - \ - assert(a_name##_booted); \ - a_name##_tls = (*val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - a_name##_initialized = true; \ -} -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##_tsd_boot(void) \ -{ \ - \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \ - return (true); \ - } \ - a_name##_booted = true; \ - return (false); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##_tsd_get(void) \ -{ \ - \ - assert(a_name##_booted); \ - return (&a_name##_tls); \ -} \ -a_attr void \ -a_name##_tsd_set(a_type *val) \ -{ \ - \ - assert(a_name##_booted); \ - a_name##_tls = (*val); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_setspecific(a_name##_tsd, \ - (void *)(&a_name##_tls))) { \ - malloc_write(": Error" \ - " setting TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ - } \ - } \ -} -#elif (defined(_WIN32)) -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Data structure. */ \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##_tsd_wrapper_t; \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##_tsd_cleanup_wrapper(void) \ -{ \ - a_name##_tsd_wrapper_t *wrapper; \ - \ - wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \ - if (wrapper == NULL) \ - return (false); \ - if (a_cleanup != malloc_tsd_no_cleanup && \ - wrapper->initialized) { \ - a_type val = wrapper->val; \ - a_type tsd_static_data = a_initializer; \ - wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ - a_cleanup(&val); \ - if (wrapper->initialized) { \ - /* Trigger another cleanup round. */ \ - return (true); \ - } \ - } \ - malloc_tsd_dalloc(wrapper); \ - return (false); \ -} \ -a_attr bool \ -a_name##_tsd_boot(void) \ -{ \ - \ - a_name##_tsd = TlsAlloc(); \ - if (a_name##_tsd == TLS_OUT_OF_INDEXES) \ - return (true); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_wrapper); \ - } \ - a_name##_booted = true; \ - return (false); \ -} \ -/* Get/set. */ \ -a_attr a_name##_tsd_wrapper_t * \ -a_name##_tsd_get_wrapper(void) \ -{ \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ - TlsGetValue(a_name##_tsd); \ - \ - if (wrapper == NULL) { \ - wrapper = (a_name##_tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write(": Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } else { \ - static a_type tsd_static_data = a_initializer; \ - wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ - } \ - if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \ - malloc_write(": Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ - } \ - return (wrapper); \ -} \ -a_attr a_type * \ -a_name##_tsd_get(void) \ -{ \ - a_name##_tsd_wrapper_t *wrapper; \ - \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ - return (&wrapper->val); \ -} \ -a_attr void \ -a_name##_tsd_set(a_type *val) \ -{ \ - a_name##_tsd_wrapper_t *wrapper; \ - \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ - wrapper->val = *(val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - wrapper->initialized = true; \ -} -#else -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Data structure. */ \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##_tsd_wrapper_t; \ -/* Initialization/cleanup. */ \ -a_attr void \ -a_name##_tsd_cleanup_wrapper(void *arg) \ -{ \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\ - \ - if (a_cleanup != malloc_tsd_no_cleanup && \ - wrapper->initialized) { \ - wrapper->initialized = false; \ - a_cleanup(&wrapper->val); \ - if (wrapper->initialized) { \ - /* Trigger another cleanup round. */ \ - if (pthread_setspecific(a_name##_tsd, \ - (void *)wrapper)) { \ - malloc_write(": Error" \ - " setting TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ - } \ - return; \ - } \ - } \ - malloc_tsd_dalloc(wrapper); \ -} \ -a_attr bool \ -a_name##_tsd_boot(void) \ -{ \ - \ - if (pthread_key_create(&a_name##_tsd, \ - a_name##_tsd_cleanup_wrapper) != 0) \ - return (true); \ - a_name##_booted = true; \ - return (false); \ -} \ -/* Get/set. */ \ -a_attr a_name##_tsd_wrapper_t * \ -a_name##_tsd_get_wrapper(void) \ -{ \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ - pthread_getspecific(a_name##_tsd); \ - \ - if (wrapper == NULL) { \ - wrapper = (a_name##_tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write(": Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } else { \ - static a_type tsd_static_data = a_initializer; \ - wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ - } \ - if (pthread_setspecific(a_name##_tsd, \ - (void *)wrapper)) { \ - malloc_write(": Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ - } \ - return (wrapper); \ -} \ -a_attr a_type * \ -a_name##_tsd_get(void) \ -{ \ - a_name##_tsd_wrapper_t *wrapper; \ - \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ - return (&wrapper->val); \ -} \ -a_attr void \ -a_name##_tsd_set(a_type *val) \ -{ \ - a_name##_tsd_wrapper_t *wrapper; \ - \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ - wrapper->val = *(val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - wrapper->initialized = true; \ -} -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *malloc_tsd_malloc(size_t size); -void malloc_tsd_dalloc(void *wrapper); -void malloc_tsd_no_cleanup(void *); -void malloc_tsd_cleanup_register(bool (*f)(void)); -void malloc_tsd_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/util.h mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/util.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/internal/util.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/internal/util.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,160 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* Size of stack-allocated buffer passed to buferror(). */ -#define BUFERROR_BUF 64 - -/* - * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be - * large enough for all possible uses within jemalloc. - */ -#define MALLOC_PRINTF_BUFSIZE 4096 - -/* - * Wrap a cpp argument that contains commas such that it isn't broken up into - * multiple arguments. - */ -#define JEMALLOC_CONCAT(...) __VA_ARGS__ - -/* - * Silence compiler warnings due to uninitialized values. This is used - * wherever the compiler fails to recognize that the variable is never used - * uninitialized. - */ -#ifdef JEMALLOC_CC_SILENCE -# define JEMALLOC_CC_SILENCE_INIT(v) = v -#else -# define JEMALLOC_CC_SILENCE_INIT(v) -#endif - -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifndef assert -#define assert(e) do { \ - if (config_debug && !(e)) { \ - malloc_printf( \ - ": %s:%d: Failed assertion: \"%s\"\n", \ - __FILE__, __LINE__, #e); \ - abort(); \ - } \ -} while (0) -#endif - -/* Use to assert a particular configuration, e.g., cassert(config_debug). */ -#define cassert(c) do { \ - if ((c) == false) \ - assert(false); \ -} while (0) - -#ifndef not_reached -#define not_reached() do { \ - if (config_debug) { \ - malloc_printf( \ - ": %s:%d: Unreachable code reached\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ -} while (0) -#endif - -#ifndef not_implemented -#define not_implemented() do { \ - if (config_debug) { \ - malloc_printf(": %s:%d: Not implemented\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ -} while (0) -#endif - -#define assert_not_implemented(e) do { \ - if (config_debug && !(e)) \ - not_implemented(); \ -} while (0) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -int buferror(char *buf, size_t buflen); -uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); -void malloc_write(const char *s); - -/* - * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating - * point math. - */ -int malloc_vsnprintf(char *str, size_t size, const char *format, - va_list ap); -int malloc_snprintf(char *str, size_t size, const char *format, ...) - JEMALLOC_ATTR(format(printf, 3, 4)); -void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, va_list ap); -void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, - const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); -void malloc_printf(const char *format, ...) - JEMALLOC_ATTR(format(printf, 1, 2)); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -size_t pow2_ceil(size_t x); -void malloc_write(const char *s); -void set_errno(int errnum); -int get_errno(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil(size_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; -#if (LG_SIZEOF_PTR == 3) - x |= x >> 32; -#endif - x++; - return (x); -} - -/* Sets error code */ -JEMALLOC_INLINE void -set_errno(int errnum) -{ - -#ifdef _WIN32 - SetLastError(errnum); -#else - errno = errnum; -#endif -} - -/* Get last error code */ -JEMALLOC_INLINE int -get_errno(void) -{ - -#ifdef _WIN32 - return (GetLastError()); -#else - return (errno); -#endif -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/jemalloc_defs.h.in mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/jemalloc_defs.h.in --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/jemalloc_defs.h.in 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/jemalloc_defs.h.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,267 +0,0 @@ -/* - * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all - * public APIs to be prefixed. This makes it possible, with some care, to use - * multiple allocators simultaneously. - */ -#undef JEMALLOC_PREFIX -#undef JEMALLOC_CPREFIX - -/* - * Name mangling for public symbols is controlled by --with-mangling and - * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by - * these macro definitions. - */ -#undef je_malloc_conf -#undef je_malloc_message -#undef je_malloc -#undef je_calloc -#undef je_posix_memalign -#undef je_aligned_alloc -#undef je_realloc -#undef je_free -#undef je_malloc_usable_size -#undef je_malloc_stats_print -#undef je_mallctl -#undef je_mallctlnametomib -#undef je_mallctlbymib -#undef je_memalign -#undef je_valloc -#undef je_allocm -#undef je_rallocm -#undef je_sallocm -#undef je_dallocm -#undef je_nallocm - -/* - * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. - * For shared libraries, symbol visibility mechanisms prevent these symbols - * from being exported, but for static libraries, naming collisions are a real - * possibility. - */ -#undef JEMALLOC_PRIVATE_NAMESPACE -#undef JEMALLOC_N - -/* - * Hyper-threaded CPUs may need a special instruction inside spin loops in - * order to yield to another virtual CPU. - */ -#undef CPU_SPINWAIT - -/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ -#undef JEMALLOC_ATOMIC9 - -/* - * Defined if OSAtomic*() functions are available, as provided by Darwin, and - * documented in the atomic(3) manual page. - */ -#undef JEMALLOC_OSATOMIC - -/* - * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and - * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines) - */ -#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 - -/* - * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and - * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines) - */ -#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 - -/* - * Defined if OSSpin*() functions are available, as provided by Darwin, and - * documented in the spinlock(3) manual page. - */ -#undef JEMALLOC_OSSPIN - -/* - * Defined if _malloc_thread_cleanup() exists. At least in the case of - * FreeBSD, pthread_key_create() allocates, which if used during malloc - * bootstrapping will cause recursion into the pthreads library. Therefore, if - * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in - * malloc_tsd. - */ -#undef JEMALLOC_MALLOC_THREAD_CLEANUP - -/* - * Defined if threaded initialization is known to be safe on this platform. - * Among other things, it must be possible to initialize a mutex without - * triggering allocation in order for threaded allocation to be safe. - */ -#undef JEMALLOC_THREADED_INIT - -/* - * Defined if the pthreads implementation defines - * _pthread_mutex_init_calloc_cb(), in which case the function is used in order - * to avoid recursive allocation during mutex initialization. - */ -#undef JEMALLOC_MUTEX_INIT_CB - -/* Defined if __attribute__((...)) syntax is supported. */ -#undef JEMALLOC_HAVE_ATTR -#ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -#elif _MSC_VER -# define JEMALLOC_ATTR(s) -# ifdef DLLEXPORT -# define JEMALLOC_EXPORT __declspec(dllexport) -# else -# define JEMALLOC_EXPORT __declspec(dllimport) -# endif -# define JEMALLOC_ALIGNED(s) __declspec(align(s)) -# define JEMALLOC_SECTION(s) __declspec(allocate(s)) -# define JEMALLOC_NOINLINE __declspec(noinline) -#else -# define JEMALLOC_ATTR(s) -# define JEMALLOC_EXPORT -# define JEMALLOC_ALIGNED(s) -# define JEMALLOC_SECTION(s) -# define JEMALLOC_NOINLINE -#endif - -/* Defined if sbrk() is supported. */ -#undef JEMALLOC_HAVE_SBRK - -/* Non-empty if the tls_model attribute is supported. */ -#undef JEMALLOC_TLS_MODEL - -/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ -#undef JEMALLOC_CC_SILENCE - -/* - * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables - * inline functions. - */ -#undef JEMALLOC_DEBUG - -/* JEMALLOC_STATS enables statistics calculation. */ -#undef JEMALLOC_STATS - -/* JEMALLOC_PROF enables allocation profiling. */ -#undef JEMALLOC_PROF - -/* Use libunwind for profile backtracing if defined. */ -#undef JEMALLOC_PROF_LIBUNWIND - -/* Use libgcc for profile backtracing if defined. */ -#undef JEMALLOC_PROF_LIBGCC - -/* Use gcc intrinsics for profile backtracing if defined. */ -#undef JEMALLOC_PROF_GCC - -/* - * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. - * This makes it possible to allocate/deallocate objects without any locking - * when the cache is in the steady state. - */ -#undef JEMALLOC_TCACHE - -/* - * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage - * segment (DSS). - */ -#undef JEMALLOC_DSS - -/* Support memory filling (junk/zero/quarantine/redzone). */ -#undef JEMALLOC_FILL - -/* Support the experimental API. */ -#undef JEMALLOC_EXPERIMENTAL - -/* Support utrace(2)-based tracing. */ -#undef JEMALLOC_UTRACE - -/* Support Valgrind. */ -#undef JEMALLOC_VALGRIND - -/* Support optional abort() on OOM. */ -#undef JEMALLOC_XMALLOC - -/* Support lazy locking (avoid locking unless a second thread is launched). */ -#undef JEMALLOC_LAZY_LOCK - -/* One page is 2^STATIC_PAGE_SHIFT bytes. */ -#undef STATIC_PAGE_SHIFT - -/* - * If defined, use munmap() to unmap freed chunks, rather than storing them for - * later reuse. This is disabled by default on Linux because common sequences - * of mmap()/munmap() calls will cause virtual memory map holes. - */ -#undef JEMALLOC_MUNMAP - -/* - * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is - * disabled by default because it is Linux-specific and it will cause virtual - * memory map holes, much like munmap(2) does. - */ -#undef JEMALLOC_MREMAP - -/* TLS is used to map arenas and magazine caches to threads. */ -#undef JEMALLOC_TLS - -/* - * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside - * within jemalloc-owned chunks before dereferencing them. - */ -#undef JEMALLOC_IVSALLOC - -/* - * Define overrides for non-standard allocator-related functions if they - * are present on the system. - */ -#undef JEMALLOC_OVERRIDE_MEMALIGN -#undef JEMALLOC_OVERRIDE_VALLOC - -/* - * At least Linux omits the "const" in: - * - * size_t malloc_usable_size(const void *ptr); - * - * Match the operating system's prototype. - */ -#undef JEMALLOC_USABLE_SIZE_CONST - -/* - * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. - */ -#undef JEMALLOC_ZONE -#undef JEMALLOC_ZONE_VERSION - -/* - * Methods for purging unused pages differ between operating systems. - * - * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, - * such that new pages will be demand-zeroed if - * the address region is later touched. - * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being - * unused, such that they will be discarded rather - * than swapped out. - */ -#undef JEMALLOC_PURGE_MADVISE_DONTNEED -#undef JEMALLOC_PURGE_MADVISE_FREE - -/* - * Define if operating system has alloca.h header. - */ -#undef JEMALLOC_HAS_ALLOCA_H - -/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#undef LG_SIZEOF_PTR - -/* sizeof(int) == 2^LG_SIZEOF_INT. */ -#undef LG_SIZEOF_INT - -/* sizeof(long) == 2^LG_SIZEOF_LONG. */ -#undef LG_SIZEOF_LONG - -/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ -#undef LG_SIZEOF_INTMAX_T diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/jemalloc.h.in mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/jemalloc.h.in --- mariadb-5.5-5.5.39/extra/jemalloc/include/jemalloc/jemalloc.h.in 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/jemalloc/jemalloc.h.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,157 +0,0 @@ -#ifndef JEMALLOC_H_ -#define JEMALLOC_H_ -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -#define JEMALLOC_VERSION "@jemalloc_version@" -#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ -#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ -#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ -#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ -#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" - -#include "jemalloc_defs@install_suffix@.h" - -#ifdef JEMALLOC_EXPERIMENTAL -#define ALLOCM_LG_ALIGN(la) (la) -#if LG_SIZEOF_PTR == 2 -#define ALLOCM_ALIGN(a) (ffs(a)-1) -#else -#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) -#endif -#define ALLOCM_ZERO ((int)0x40) -#define ALLOCM_NO_MOVE ((int)0x80) -/* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */ -#define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) - -#define ALLOCM_SUCCESS 0 -#define ALLOCM_ERR_OOM 1 -#define ALLOCM_ERR_NOT_MOVED 2 -#endif - -/* - * The je_ prefix on the following public symbol declarations is an artifact of - * namespace management, and should be omitted in application code unless - * JEMALLOC_NO_DEMANGLE is defined (see below). - */ -extern JEMALLOC_EXPORT const char *je_malloc_conf; -extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, - const char *s); - -JEMALLOC_EXPORT void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); -JEMALLOC_EXPORT void *je_calloc(size_t num, size_t size) - JEMALLOC_ATTR(malloc); -JEMALLOC_EXPORT int je_posix_memalign(void **memptr, size_t alignment, - size_t size) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) - JEMALLOC_ATTR(malloc); -JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); -JEMALLOC_EXPORT void je_free(void *ptr); - -#ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) - JEMALLOC_ATTR(malloc); -#endif - -#ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_EXPORT void * je_valloc(size_t size) JEMALLOC_ATTR(malloc); -#endif - -JEMALLOC_EXPORT size_t je_malloc_usable_size( - JEMALLOC_USABLE_SIZE_CONST void *ptr); -JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, - const char *), void *je_cbopaque, const char *opts); -JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT int je_mallctlnametomib(const char *name, size_t *mibp, - size_t *miblenp); -JEMALLOC_EXPORT int je_mallctlbymib(const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen); - -#ifdef JEMALLOC_EXPERIMENTAL -JEMALLOC_EXPORT int je_allocm(void **ptr, size_t *rsize, size_t size, - int flags) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_rallocm(void **ptr, size_t *rsize, size_t size, - size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_sallocm(const void *ptr, size_t *rsize, int flags) - JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_dallocm(void *ptr, int flags) - JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); -#endif - -/* - * By default application code must explicitly refer to mangled symbol names, - * so that it is possible to use jemalloc in conjunction with another allocator - * in the same application. Define JEMALLOC_MANGLE in order to cause automatic - * name mangling that matches the API prefixing that happened as a result of - * --with-mangling and/or --with-jemalloc-prefix configuration settings. - */ -#ifdef JEMALLOC_MANGLE -#ifndef JEMALLOC_NO_DEMANGLE -#define JEMALLOC_NO_DEMANGLE -#endif -#define malloc_conf je_malloc_conf -#define malloc_message je_malloc_message -#define malloc je_malloc -#define calloc je_calloc -#define posix_memalign je_posix_memalign -#define aligned_alloc je_aligned_alloc -#define realloc je_realloc -#define free je_free -#define malloc_usable_size je_malloc_usable_size -#define malloc_stats_print je_malloc_stats_print -#define mallctl je_mallctl -#define mallctlnametomib je_mallctlnametomib -#define mallctlbymib je_mallctlbymib -#define memalign je_memalign -#define valloc je_valloc -#ifdef JEMALLOC_EXPERIMENTAL -#define allocm je_allocm -#define rallocm je_rallocm -#define sallocm je_sallocm -#define dallocm je_dallocm -#define nallocm je_nallocm -#endif -#endif - -/* - * The je_* macros can be used as stable alternative names for the public - * jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily meant - * for use in jemalloc itself, but it can be used by application code to - * provide isolation from the name mangling specified via --with-mangling - * and/or --with-jemalloc-prefix. - */ -#ifndef JEMALLOC_NO_DEMANGLE -#undef je_malloc_conf -#undef je_malloc_message -#undef je_malloc -#undef je_calloc -#undef je_posix_memalign -#undef je_aligned_alloc -#undef je_realloc -#undef je_free -#undef je_malloc_usable_size -#undef je_malloc_stats_print -#undef je_mallctl -#undef je_mallctlnametomib -#undef je_mallctlbymib -#undef je_memalign -#undef je_valloc -#ifdef JEMALLOC_EXPERIMENTAL -#undef je_allocm -#undef je_rallocm -#undef je_sallocm -#undef je_dallocm -#undef je_nallocm -#endif -#endif - -#ifdef __cplusplus -}; -#endif -#endif /* JEMALLOC_H_ */ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/msvc_compat/inttypes.h mariadb-5.5-5.5.40/extra/jemalloc/include/msvc_compat/inttypes.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/msvc_compat/inttypes.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/msvc_compat/inttypes.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,313 +0,0 @@ -// ISO C9x compliant inttypes.h for Microsoft Visual Studio -// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 -// -// Copyright (c) 2006 Alexander Chemeris -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. The name of the author may be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED -// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef _MSC_VER // [ -#error "Use this header only with Microsoft Visual C++ compilers!" -#endif // _MSC_VER ] - -#ifndef _MSC_INTTYPES_H_ // [ -#define _MSC_INTTYPES_H_ - -#if _MSC_VER > 1000 -#pragma once -#endif - -#include "stdint.h" - -// 7.8 Format conversion of integer types - -typedef struct { - intmax_t quot; - intmax_t rem; -} imaxdiv_t; - -// 7.8.1 Macros for format specifiers - -#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 - -#ifdef _WIN64 -# define __PRI64_PREFIX "l" -# define __PRIPTR_PREFIX "l" -#else -# define __PRI64_PREFIX "ll" -# define __PRIPTR_PREFIX -#endif - -// The fprintf macros for signed integers are: -#define PRId8 "d" -#define PRIi8 "i" -#define PRIdLEAST8 "d" -#define PRIiLEAST8 "i" -#define PRIdFAST8 "d" -#define PRIiFAST8 "i" - -#define PRId16 "hd" -#define PRIi16 "hi" -#define PRIdLEAST16 "hd" -#define PRIiLEAST16 "hi" -#define PRIdFAST16 "hd" -#define PRIiFAST16 "hi" - -#define PRId32 "d" -#define PRIi32 "i" -#define PRIdLEAST32 "d" -#define PRIiLEAST32 "i" -#define PRIdFAST32 "d" -#define PRIiFAST32 "i" - -#define PRId64 __PRI64_PREFIX "d" -#define PRIi64 __PRI64_PREFIX "i" -#define PRIdLEAST64 __PRI64_PREFIX "d" -#define PRIiLEAST64 __PRI64_PREFIX "i" -#define PRIdFAST64 __PRI64_PREFIX "d" -#define PRIiFAST64 __PRI64_PREFIX "i" - -#define PRIdMAX __PRI64_PREFIX "d" -#define PRIiMAX __PRI64_PREFIX "i" - -#define PRIdPTR __PRIPTR_PREFIX "d" -#define PRIiPTR __PRIPTR_PREFIX "i" - -// The fprintf macros for unsigned integers are: -#define PRIo8 "o" -#define PRIu8 "u" -#define PRIx8 "x" -#define PRIX8 "X" -#define PRIoLEAST8 "o" -#define PRIuLEAST8 "u" -#define PRIxLEAST8 "x" -#define PRIXLEAST8 "X" -#define PRIoFAST8 "o" -#define PRIuFAST8 "u" -#define PRIxFAST8 "x" -#define PRIXFAST8 "X" - -#define PRIo16 "ho" -#define PRIu16 "hu" -#define PRIx16 "hx" -#define PRIX16 "hX" -#define PRIoLEAST16 "ho" -#define PRIuLEAST16 "hu" -#define PRIxLEAST16 "hx" -#define PRIXLEAST16 "hX" -#define PRIoFAST16 "ho" -#define PRIuFAST16 "hu" -#define PRIxFAST16 "hx" -#define PRIXFAST16 "hX" - -#define PRIo32 "o" -#define PRIu32 "u" -#define PRIx32 "x" -#define PRIX32 "X" -#define PRIoLEAST32 "o" -#define PRIuLEAST32 "u" -#define PRIxLEAST32 "x" -#define PRIXLEAST32 "X" -#define PRIoFAST32 "o" -#define PRIuFAST32 "u" -#define PRIxFAST32 "x" -#define PRIXFAST32 "X" - -#define PRIo64 __PRI64_PREFIX "o" -#define PRIu64 __PRI64_PREFIX "u" -#define PRIx64 __PRI64_PREFIX "x" -#define PRIX64 __PRI64_PREFIX "X" -#define PRIoLEAST64 __PRI64_PREFIX "o" -#define PRIuLEAST64 __PRI64_PREFIX "u" -#define PRIxLEAST64 __PRI64_PREFIX "x" -#define PRIXLEAST64 __PRI64_PREFIX "X" -#define PRIoFAST64 __PRI64_PREFIX "o" -#define PRIuFAST64 __PRI64_PREFIX "u" -#define PRIxFAST64 __PRI64_PREFIX "x" -#define PRIXFAST64 __PRI64_PREFIX "X" - -#define PRIoMAX __PRI64_PREFIX "o" -#define PRIuMAX __PRI64_PREFIX "u" -#define PRIxMAX __PRI64_PREFIX "x" -#define PRIXMAX __PRI64_PREFIX "X" - -#define PRIoPTR __PRIPTR_PREFIX "o" -#define PRIuPTR __PRIPTR_PREFIX "u" -#define PRIxPTR __PRIPTR_PREFIX "x" -#define PRIXPTR __PRIPTR_PREFIX "X" - -// The fscanf macros for signed integers are: -#define SCNd8 "d" -#define SCNi8 "i" -#define SCNdLEAST8 "d" -#define SCNiLEAST8 "i" -#define SCNdFAST8 "d" -#define SCNiFAST8 "i" - -#define SCNd16 "hd" -#define SCNi16 "hi" -#define SCNdLEAST16 "hd" -#define SCNiLEAST16 "hi" -#define SCNdFAST16 "hd" -#define SCNiFAST16 "hi" - -#define SCNd32 "ld" -#define SCNi32 "li" -#define SCNdLEAST32 "ld" -#define SCNiLEAST32 "li" -#define SCNdFAST32 "ld" -#define SCNiFAST32 "li" - -#define SCNd64 "I64d" -#define SCNi64 "I64i" -#define SCNdLEAST64 "I64d" -#define SCNiLEAST64 "I64i" -#define SCNdFAST64 "I64d" -#define SCNiFAST64 "I64i" - -#define SCNdMAX "I64d" -#define SCNiMAX "I64i" - -#ifdef _WIN64 // [ -# define SCNdPTR "I64d" -# define SCNiPTR "I64i" -#else // _WIN64 ][ -# define SCNdPTR "ld" -# define SCNiPTR "li" -#endif // _WIN64 ] - -// The fscanf macros for unsigned integers are: -#define SCNo8 "o" -#define SCNu8 "u" -#define SCNx8 "x" -#define SCNX8 "X" -#define SCNoLEAST8 "o" -#define SCNuLEAST8 "u" -#define SCNxLEAST8 "x" -#define SCNXLEAST8 "X" -#define SCNoFAST8 "o" -#define SCNuFAST8 "u" -#define SCNxFAST8 "x" -#define SCNXFAST8 "X" - -#define SCNo16 "ho" -#define SCNu16 "hu" -#define SCNx16 "hx" -#define SCNX16 "hX" -#define SCNoLEAST16 "ho" -#define SCNuLEAST16 "hu" -#define SCNxLEAST16 "hx" -#define SCNXLEAST16 "hX" -#define SCNoFAST16 "ho" -#define SCNuFAST16 "hu" -#define SCNxFAST16 "hx" -#define SCNXFAST16 "hX" - -#define SCNo32 "lo" -#define SCNu32 "lu" -#define SCNx32 "lx" -#define SCNX32 "lX" -#define SCNoLEAST32 "lo" -#define SCNuLEAST32 "lu" -#define SCNxLEAST32 "lx" -#define SCNXLEAST32 "lX" -#define SCNoFAST32 "lo" -#define SCNuFAST32 "lu" -#define SCNxFAST32 "lx" -#define SCNXFAST32 "lX" - -#define SCNo64 "I64o" -#define SCNu64 "I64u" -#define SCNx64 "I64x" -#define SCNX64 "I64X" -#define SCNoLEAST64 "I64o" -#define SCNuLEAST64 "I64u" -#define SCNxLEAST64 "I64x" -#define SCNXLEAST64 "I64X" -#define SCNoFAST64 "I64o" -#define SCNuFAST64 "I64u" -#define SCNxFAST64 "I64x" -#define SCNXFAST64 "I64X" - -#define SCNoMAX "I64o" -#define SCNuMAX "I64u" -#define SCNxMAX "I64x" -#define SCNXMAX "I64X" - -#ifdef _WIN64 // [ -# define SCNoPTR "I64o" -# define SCNuPTR "I64u" -# define SCNxPTR "I64x" -# define SCNXPTR "I64X" -#else // _WIN64 ][ -# define SCNoPTR "lo" -# define SCNuPTR "lu" -# define SCNxPTR "lx" -# define SCNXPTR "lX" -#endif // _WIN64 ] - -#endif // __STDC_FORMAT_MACROS ] - -// 7.8.2 Functions for greatest-width integer types - -// 7.8.2.1 The imaxabs function -#define imaxabs _abs64 - -// 7.8.2.2 The imaxdiv function - -// This is modified version of div() function from Microsoft's div.c found -// in %MSVC.NET%\crt\src\div.c -#ifdef STATIC_IMAXDIV // [ -static -#else // STATIC_IMAXDIV ][ -_inline -#endif // STATIC_IMAXDIV ] -imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) -{ - imaxdiv_t result; - - result.quot = numer / denom; - result.rem = numer % denom; - - if (numer < 0 && result.rem > 0) { - // did division wrong; must fix up - ++result.quot; - result.rem -= denom; - } - - return result; -} - -// 7.8.2.3 The strtoimax and strtoumax functions -#define strtoimax _strtoi64 -#define strtoumax _strtoui64 - -// 7.8.2.4 The wcstoimax and wcstoumax functions -#define wcstoimax _wcstoi64 -#define wcstoumax _wcstoui64 - - -#endif // _MSC_INTTYPES_H_ ] diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/msvc_compat/stdbool.h mariadb-5.5-5.5.40/extra/jemalloc/include/msvc_compat/stdbool.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/msvc_compat/stdbool.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/msvc_compat/stdbool.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -#ifndef stdbool_h -#define stdbool_h - -#include - -/* MSVC doesn't define _Bool or bool in C, but does have BOOL */ -/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */ -typedef BOOL _Bool; - -#define bool _Bool -#define true 1 -#define false 0 - -#define __bool_true_false_are_defined 1 - -#endif /* stdbool_h */ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/msvc_compat/stdint.h mariadb-5.5-5.5.40/extra/jemalloc/include/msvc_compat/stdint.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/msvc_compat/stdint.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/msvc_compat/stdint.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,247 +0,0 @@ -// ISO C9x compliant stdint.h for Microsoft Visual Studio -// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 -// -// Copyright (c) 2006-2008 Alexander Chemeris -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. The name of the author may be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED -// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef _MSC_VER // [ -#error "Use this header only with Microsoft Visual C++ compilers!" -#endif // _MSC_VER ] - -#ifndef _MSC_STDINT_H_ // [ -#define _MSC_STDINT_H_ - -#if _MSC_VER > 1000 -#pragma once -#endif - -#include - -// For Visual Studio 6 in C++ mode and for many Visual Studio versions when -// compiling for ARM we should wrap include with 'extern "C++" {}' -// or compiler give many errors like this: -// error C2733: second C linkage of overloaded function 'wmemchr' not allowed -#ifdef __cplusplus -extern "C" { -#endif -# include -#ifdef __cplusplus -} -#endif - -// Define _W64 macros to mark types changing their size, like intptr_t. -#ifndef _W64 -# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 -# define _W64 __w64 -# else -# define _W64 -# endif -#endif - - -// 7.18.1 Integer types - -// 7.18.1.1 Exact-width integer types - -// Visual Studio 6 and Embedded Visual C++ 4 doesn't -// realize that, e.g. char has the same size as __int8 -// so we give up on __intX for them. -#if (_MSC_VER < 1300) - typedef signed char int8_t; - typedef signed short int16_t; - typedef signed int int32_t; - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; -#else - typedef signed __int8 int8_t; - typedef signed __int16 int16_t; - typedef signed __int32 int32_t; - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; -#endif -typedef signed __int64 int64_t; -typedef unsigned __int64 uint64_t; - - -// 7.18.1.2 Minimum-width integer types -typedef int8_t int_least8_t; -typedef int16_t int_least16_t; -typedef int32_t int_least32_t; -typedef int64_t int_least64_t; -typedef uint8_t uint_least8_t; -typedef uint16_t uint_least16_t; -typedef uint32_t uint_least32_t; -typedef uint64_t uint_least64_t; - -// 7.18.1.3 Fastest minimum-width integer types -typedef int8_t int_fast8_t; -typedef int16_t int_fast16_t; -typedef int32_t int_fast32_t; -typedef int64_t int_fast64_t; -typedef uint8_t uint_fast8_t; -typedef uint16_t uint_fast16_t; -typedef uint32_t uint_fast32_t; -typedef uint64_t uint_fast64_t; - -// 7.18.1.4 Integer types capable of holding object pointers -#ifdef _WIN64 // [ - typedef signed __int64 intptr_t; - typedef unsigned __int64 uintptr_t; -#else // _WIN64 ][ - typedef _W64 signed int intptr_t; - typedef _W64 unsigned int uintptr_t; -#endif // _WIN64 ] - -// 7.18.1.5 Greatest-width integer types -typedef int64_t intmax_t; -typedef uint64_t uintmax_t; - - -// 7.18.2 Limits of specified-width integer types - -#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 - -// 7.18.2.1 Limits of exact-width integer types -#define INT8_MIN ((int8_t)_I8_MIN) -#define INT8_MAX _I8_MAX -#define INT16_MIN ((int16_t)_I16_MIN) -#define INT16_MAX _I16_MAX -#define INT32_MIN ((int32_t)_I32_MIN) -#define INT32_MAX _I32_MAX -#define INT64_MIN ((int64_t)_I64_MIN) -#define INT64_MAX _I64_MAX -#define UINT8_MAX _UI8_MAX -#define UINT16_MAX _UI16_MAX -#define UINT32_MAX _UI32_MAX -#define UINT64_MAX _UI64_MAX - -// 7.18.2.2 Limits of minimum-width integer types -#define INT_LEAST8_MIN INT8_MIN -#define INT_LEAST8_MAX INT8_MAX -#define INT_LEAST16_MIN INT16_MIN -#define INT_LEAST16_MAX INT16_MAX -#define INT_LEAST32_MIN INT32_MIN -#define INT_LEAST32_MAX INT32_MAX -#define INT_LEAST64_MIN INT64_MIN -#define INT_LEAST64_MAX INT64_MAX -#define UINT_LEAST8_MAX UINT8_MAX -#define UINT_LEAST16_MAX UINT16_MAX -#define UINT_LEAST32_MAX UINT32_MAX -#define UINT_LEAST64_MAX UINT64_MAX - -// 7.18.2.3 Limits of fastest minimum-width integer types -#define INT_FAST8_MIN INT8_MIN -#define INT_FAST8_MAX INT8_MAX -#define INT_FAST16_MIN INT16_MIN -#define INT_FAST16_MAX INT16_MAX -#define INT_FAST32_MIN INT32_MIN -#define INT_FAST32_MAX INT32_MAX -#define INT_FAST64_MIN INT64_MIN -#define INT_FAST64_MAX INT64_MAX -#define UINT_FAST8_MAX UINT8_MAX -#define UINT_FAST16_MAX UINT16_MAX -#define UINT_FAST32_MAX UINT32_MAX -#define UINT_FAST64_MAX UINT64_MAX - -// 7.18.2.4 Limits of integer types capable of holding object pointers -#ifdef _WIN64 // [ -# define INTPTR_MIN INT64_MIN -# define INTPTR_MAX INT64_MAX -# define UINTPTR_MAX UINT64_MAX -#else // _WIN64 ][ -# define INTPTR_MIN INT32_MIN -# define INTPTR_MAX INT32_MAX -# define UINTPTR_MAX UINT32_MAX -#endif // _WIN64 ] - -// 7.18.2.5 Limits of greatest-width integer types -#define INTMAX_MIN INT64_MIN -#define INTMAX_MAX INT64_MAX -#define UINTMAX_MAX UINT64_MAX - -// 7.18.3 Limits of other integer types - -#ifdef _WIN64 // [ -# define PTRDIFF_MIN _I64_MIN -# define PTRDIFF_MAX _I64_MAX -#else // _WIN64 ][ -# define PTRDIFF_MIN _I32_MIN -# define PTRDIFF_MAX _I32_MAX -#endif // _WIN64 ] - -#define SIG_ATOMIC_MIN INT_MIN -#define SIG_ATOMIC_MAX INT_MAX - -#ifndef SIZE_MAX // [ -# ifdef _WIN64 // [ -# define SIZE_MAX _UI64_MAX -# else // _WIN64 ][ -# define SIZE_MAX _UI32_MAX -# endif // _WIN64 ] -#endif // SIZE_MAX ] - -// WCHAR_MIN and WCHAR_MAX are also defined in -#ifndef WCHAR_MIN // [ -# define WCHAR_MIN 0 -#endif // WCHAR_MIN ] -#ifndef WCHAR_MAX // [ -# define WCHAR_MAX _UI16_MAX -#endif // WCHAR_MAX ] - -#define WINT_MIN 0 -#define WINT_MAX _UI16_MAX - -#endif // __STDC_LIMIT_MACROS ] - - -// 7.18.4 Limits of other integer types - -#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 - -// 7.18.4.1 Macros for minimum-width integer constants - -#define INT8_C(val) val##i8 -#define INT16_C(val) val##i16 -#define INT32_C(val) val##i32 -#define INT64_C(val) val##i64 - -#define UINT8_C(val) val##ui8 -#define UINT16_C(val) val##ui16 -#define UINT32_C(val) val##ui32 -#define UINT64_C(val) val##ui64 - -// 7.18.4.2 Macros for greatest-width integer constants -#define INTMAX_C INT64_C -#define UINTMAX_C UINT64_C - -#endif // __STDC_CONSTANT_MACROS ] - - -#endif // _MSC_STDINT_H_ ] diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/include/msvc_compat/strings.h mariadb-5.5-5.5.40/extra/jemalloc/include/msvc_compat/strings.h --- mariadb-5.5-5.5.39/extra/jemalloc/include/msvc_compat/strings.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/include/msvc_compat/strings.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -#ifndef strings_h -#define strings_h - -/* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided - * for both */ -#include -#pragma intrinsic(_BitScanForward) -static __forceinline int ffsl(long x) -{ - unsigned long i; - - if (_BitScanForward(&i, x)) - return (i + 1); - return (0); -} - -static __forceinline int ffs(int x) -{ - - return (ffsl(x)); -} - -#endif diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/INSTALL mariadb-5.5-5.5.40/extra/jemalloc/INSTALL --- mariadb-5.5-5.5.39/extra/jemalloc/INSTALL 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/INSTALL 1970-01-01 00:00:00.000000000 +0000 @@ -1,293 +0,0 @@ -Building and installing jemalloc can be as simple as typing the following while -in the root directory of the source tree: - - ./configure - make - make install - -=== Advanced configuration ===================================================== - -The 'configure' script supports numerous options that allow control of which -functionality is enabled, where jemalloc is installed, etc. Optionally, pass -any of the following arguments (not a definitive list) to 'configure': - ---help - Print a definitive list of options. - ---prefix= - Set the base directory in which to install. For example: - - ./configure --prefix=/usr/local - - will cause files to be installed into /usr/local/include, /usr/local/lib, - and /usr/local/man. - ---with-rpath= - Embed one or more library paths, so that libjemalloc can find the libraries - it is linked to. This works only on ELF-based systems. - ---with-mangling= - Mangle public symbols specified in which is a comma-separated list of - name:mangled pairs. - - For example, to use ld's --wrap option as an alternative method for - overriding libc's malloc implementation, specify something like: - - --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...] - - Note that mangling happens prior to application of the prefix specified by - --with-jemalloc-prefix, and mangled symbols are then ignored when applying - the prefix. - ---with-jemalloc-prefix= - Prefix all public APIs with . For example, if is - "prefix_", API changes like the following occur: - - malloc() --> prefix_malloc() - malloc_conf --> prefix_malloc_conf - /etc/malloc.conf --> /etc/prefix_malloc.conf - MALLOC_CONF --> PREFIX_MALLOC_CONF - - This makes it possible to use jemalloc at the same time as the system - allocator, or even to use multiple copies of jemalloc simultaneously. - - By default, the prefix is "", except on OS X, where it is "je_". On OS X, - jemalloc overlays the default malloc zone, but makes no attempt to actually - replace the "malloc", "calloc", etc. symbols. - ---without-export - Don't export public APIs. This can be useful when building jemalloc as a - static library, or to avoid exporting public APIs when using the zone - allocator on OSX. - ---with-private-namespace= - Prefix all library-private APIs with . For shared libraries, - symbol visibility mechanisms prevent these symbols from being exported, but - for static libraries, naming collisions are a real possibility. By - default, the prefix is "" (empty string). - ---with-install-suffix= - Append to the base name of all installed files, such that multiple - versions of jemalloc can coexist in the same installation directory. For - example, libjemalloc.so.0 becomes libjemalloc.so.0. - ---enable-cc-silence - Enable code that silences non-useful compiler warnings. This is helpful - when trying to tell serious warnings from those due to compiler - limitations, but it potentially incurs a performance penalty. - ---enable-debug - Enable assertions and validation code. This incurs a substantial - performance hit, but is very useful during application development. - Implies --enable-ivsalloc. - ---enable-ivsalloc - Enable validation code, which verifies that pointers reside within - jemalloc-owned chunks before dereferencing them. This incurs a substantial - performance hit. - ---disable-stats - Disable statistics gathering functionality. See the "opt.stats_print" - option documentation for usage details. - ---enable-prof - Enable heap profiling and leak detection functionality. See the "opt.prof" - option documentation for usage details. When enabled, there are several - approaches to backtracing, and the configure script chooses the first one - in the following list that appears to function correctly: - - + libunwind (requires --enable-prof-libunwind) - + libgcc (unless --disable-prof-libgcc) - + gcc intrinsics (unless --disable-prof-gcc) - ---enable-prof-libunwind - Use the libunwind library (http://www.nongnu.org/libunwind/) for stack - backtracing. - ---disable-prof-libgcc - Disable the use of libgcc's backtracing functionality. - ---disable-prof-gcc - Disable the use of gcc intrinsics for backtracing. - ---with-static-libunwind= - Statically link against the specified libunwind.a rather than dynamically - linking with -lunwind. - ---disable-tcache - Disable thread-specific caches for small objects. Objects are cached and - released in bulk, thus reducing the total number of mutex operations. See - the "opt.tcache" option for usage details. - ---enable-mremap - Enable huge realloc() via mremap(2). mremap() is disabled by default - because the flavor used is specific to Linux, which has a quirk in its - virtual memory allocation algorithm that causes semi-permanent VM map holes - under normal jemalloc operation. - ---disable-munmap - Disable virtual memory deallocation via munmap(2); instead keep track of - the virtual memory for later use. munmap() is disabled by default (i.e. - --disable-munmap is implied) on Linux, which has a quirk in its virtual - memory allocation algorithm that causes semi-permanent VM map holes under - normal jemalloc operation. - ---enable-dss - Enable support for page allocation/deallocation via sbrk(2), in addition to - mmap(2). - ---disable-fill - Disable support for junk/zero filling of memory, quarantine, and redzones. - See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option - documentation for usage details. - ---disable-valgrind - Disable support for Valgrind. - ---disable-experimental - Disable support for the experimental API (*allocm()). - ---disable-zone-allocator - Disable zone allocator for Darwin. This means jemalloc won't be hooked as - the default allocator on OSX/iOS. - ---enable-utrace - Enable utrace(2)-based allocation tracing. This feature is not broadly - portable (FreeBSD has it, but Linux and OS X do not). - ---enable-xmalloc - Enable support for optional immediate termination due to out-of-memory - errors, as is commonly implemented by "xmalloc" wrapper function for malloc. - See the "opt.xmalloc" option documentation for usage details. - ---enable-lazy-lock - Enable code that wraps pthread_create() to detect when an application - switches from single-threaded to multi-threaded mode, so that it can avoid - mutex locking/unlocking operations while in single-threaded mode. In - practice, this feature usually has little impact on performance unless - thread-specific caching is disabled. - ---disable-tls - Disable thread-local storage (TLS), which allows for fast access to - thread-local variables via the __thread keyword. If TLS is available, - jemalloc uses it for several purposes. - ---with-xslroot= - Specify where to find DocBook XSL stylesheets when building the - documentation. - -The following environment variables (not a definitive list) impact configure's -behavior: - -CFLAGS="?" - Pass these flags to the compiler. You probably shouldn't define this unless - you know what you are doing. (Use EXTRA_CFLAGS instead.) - -EXTRA_CFLAGS="?" - Append these flags to CFLAGS. This makes it possible to add flags such as - -Werror, while allowing the configure script to determine what other flags - are appropriate for the specified configuration. - - The configure script specifically checks whether an optimization flag (-O*) - is specified in EXTRA_CFLAGS, and refrains from specifying an optimization - level if it finds that one has already been specified. - -CPPFLAGS="?" - Pass these flags to the C preprocessor. Note that CFLAGS is not passed to - 'cpp' when 'configure' is looking for include files, so you must use - CPPFLAGS instead if you need to help 'configure' find header files. - -LD_LIBRARY_PATH="?" - 'ld' uses this colon-separated list to find libraries. - -LDFLAGS="?" - Pass these flags when linking. - -PATH="?" - 'configure' uses this to find programs. - -=== Advanced compilation ======================================================= - -To build only parts of jemalloc, use the following targets: - - build_lib_shared - build_lib_static - build_lib - build_doc_html - build_doc_man - build_doc - -To install only parts of jemalloc, use the following targets: - - install_bin - install_include - install_lib_shared - install_lib_static - install_lib - install_doc_html - install_doc_man - install_doc - -To clean up build results to varying degrees, use the following make targets: - - clean - distclean - relclean - -=== Advanced installation ====================================================== - -Optionally, define make variables when invoking make, including (not -exclusively): - -INCLUDEDIR="?" - Use this as the installation prefix for header files. - -LIBDIR="?" - Use this as the installation prefix for libraries. - -MANDIR="?" - Use this as the installation prefix for man pages. - -DESTDIR="?" - Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR. This is useful - when installing to a different path than was specified via --prefix. - -CC="?" - Use this to invoke the C compiler. - -CFLAGS="?" - Pass these flags to the compiler. - -CPPFLAGS="?" - Pass these flags to the C preprocessor. - -LDFLAGS="?" - Pass these flags when linking. - -PATH="?" - Use this to search for programs used during configuration and building. - -=== Development ================================================================ - -If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh' -script rather than 'configure'. This re-generates 'configure', enables -configuration dependency rules, and enables re-generation of automatically -generated source files. - -The build system supports using an object directory separate from the source -tree. For example, you can create an 'obj' directory, and from within that -directory, issue configuration and build commands: - - autoconf - mkdir obj - cd obj - ../configure --enable-autogen - make - -=== Documentation ============================================================== - -The manual page is generated in both html and roff formats. Any web browser -can be used to view the html manual. The roff manual page can be formatted -prior to installation via the following command: - - nroff -man -t doc/jemalloc.3 diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/install-sh mariadb-5.5-5.5.40/extra/jemalloc/install-sh --- mariadb-5.5-5.5.39/extra/jemalloc/install-sh 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/install-sh 1970-01-01 00:00:00.000000000 +0000 @@ -1,250 +0,0 @@ -#! /bin/sh -# -# install - install a program, script, or datafile -# This comes from X11R5 (mit/util/scripts/install.sh). -# -# Copyright 1991 by the Massachusetts Institute of Technology -# -# Permission to use, copy, modify, distribute, and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in advertising or -# publicity pertaining to distribution of the software without specific, -# written prior permission. M.I.T. makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -transformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" - shift - - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# - -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile - -fi && - - -exit 0 diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/Makefile.in mariadb-5.5-5.5.40/extra/jemalloc/Makefile.in --- mariadb-5.5-5.5.39/extra/jemalloc/Makefile.in 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/Makefile.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,324 +0,0 @@ -# Clear out all vpaths, then set just one (default vpath) for the main build -# directory. -vpath -vpath % . - -# Clear the default suffixes, so that built-in rules are not used. -.SUFFIXES : - -SHELL := /bin/sh - -CC := @CC@ - -# Configuration parameters. -DESTDIR = -BINDIR := $(DESTDIR)@BINDIR@ -INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@ -LIBDIR := $(DESTDIR)@LIBDIR@ -DATADIR := $(DESTDIR)@DATADIR@ -MANDIR := $(DESTDIR)@MANDIR@ -srcroot := @srcroot@ -objroot := @objroot@ -abs_srcroot := @abs_srcroot@ -abs_objroot := @abs_objroot@ - -# Build parameters. -CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include -CFLAGS := @CFLAGS@ -LDFLAGS := @LDFLAGS@ -EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ -LIBS := @LIBS@ -RPATH_EXTRA := @RPATH_EXTRA@ -SO := @so@ -IMPORTLIB := @importlib@ -O := @o@ -A := @a@ -EXE := @exe@ -LIBPREFIX := @libprefix@ -REV := @rev@ -install_suffix := @install_suffix@ -ABI := @abi@ -XSLTPROC := @XSLTPROC@ -AUTOCONF := @AUTOCONF@ -_RPATH = @RPATH@ -RPATH = $(if $(1),$(call _RPATH,$(1))) -cfghdrs_in := @cfghdrs_in@ -cfghdrs_out := @cfghdrs_out@ -cfgoutputs_in := @cfgoutputs_in@ -cfgoutputs_out := @cfgoutputs_out@ -enable_autogen := @enable_autogen@ -enable_experimental := @enable_experimental@ -enable_zone_allocator := @enable_zone_allocator@ -DSO_LDFLAGS = @DSO_LDFLAGS@ -SOREV = @SOREV@ -PIC_CFLAGS = @PIC_CFLAGS@ -CTARGET = @CTARGET@ -LDTARGET = @LDTARGET@ -MKLIB = @MKLIB@ -CC_MM = @CC_MM@ - -ifeq (macho, $(ABI)) -TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" -else -ifeq (pecoff, $(ABI)) -TEST_LIBRARY_PATH := PATH="$(PATH):$(objroot)lib" -else -TEST_LIBRARY_PATH := -endif -endif - -LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) - -# Lists of files. -BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh -CHDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h \ - $(objroot)include/jemalloc/jemalloc_defs$(install_suffix).h -CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ - $(srcroot)src/base.c $(srcroot)src/bitmap.c $(srcroot)src/chunk.c \ - $(srcroot)src/chunk_dss.c $(srcroot)src/chunk_mmap.c \ - $(srcroot)src/ckh.c $(srcroot)src/ctl.c $(srcroot)src/extent.c \ - $(srcroot)src/hash.c $(srcroot)src/huge.c $(srcroot)src/mb.c \ - $(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ - $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ - $(srcroot)src/util.c $(srcroot)src/tsd.c -ifeq ($(enable_zone_allocator), 1) -CSRCS += $(srcroot)src/zone.c -endif -ifeq ($(IMPORTLIB),$(SO)) -STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) -endif -ifdef PIC_CFLAGS -STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) -else -STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_s.$(A) -endif -DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) -ifneq ($(SOREV),$(SO)) -DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) -endif -MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 -DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml -DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) -DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) -DOCS := $(DOCS_HTML) $(DOCS_MAN3) -CTESTS := $(srcroot)test/aligned_alloc.c $(srcroot)test/allocated.c \ - $(srcroot)test/ALLOCM_ARENA.c $(srcroot)test/bitmap.c \ - $(srcroot)test/mremap.c $(srcroot)test/posix_memalign.c \ - $(srcroot)test/thread_arena.c $(srcroot)test/thread_tcache_enabled.c -ifeq ($(enable_experimental), 1) -CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c -endif - -COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O)) -CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) -CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) - -.PHONY: all dist build_doc_html build_doc_man build_doc -.PHONY: install_bin install_include install_lib -.PHONY: install_doc_html install_doc_man install_doc install -.PHONY: tests check clean distclean relclean - -.SECONDARY : $(CTESTOBJS) - -# Default target. -all: build - -dist: build_doc - -$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl - $(XSLTPROC) -o $@ $(objroot)doc/html.xsl $< - -$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl - $(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $< - -build_doc_html: $(DOCS_HTML) -build_doc_man: $(DOCS_MAN3) -build_doc: $(DOCS) - -# -# Include generated dependency files. -# -ifdef CC_MM --include $(COBJS:%.$(O)=%.d) --include $(CPICOBJS:%.$(O)=%.d) --include $(CTESTOBJS:%.$(O)=%.d) -endif - -$(COBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c -$(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c -$(CPICOBJS): CFLAGS += $(PIC_CFLAGS) -$(CTESTOBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c -$(CTESTOBJS): CPPFLAGS += -I$(objroot)test -ifneq ($(IMPORTLIB),$(SO)) -$(COBJS): CPPFLAGS += -DDLLEXPORT -endif - -ifndef CC_MM -# Dependencies -HEADER_DIRS = $(srcroot)include/jemalloc/internal \ - $(objroot)include/jemalloc $(objroot)include/jemalloc/internal -HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)) -$(COBJS) $(CPICOBJS) $(CTESTOBJS): $(HEADERS) -$(CTESTOBJS): $(objroot)test/jemalloc_test.h -endif - -$(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O): - @mkdir -p $(@D) - $(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $< -ifdef CC_MM - @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< -endif - -ifneq ($(SOREV),$(SO)) -%.$(SO) : %.$(SOREV) - @mkdir -p $(@D) - ln -sf $( $(objroot)$${t}.out 2>&1; \ - if test -e "$(srcroot)$${t}.exp"; then \ - diff -w -u $(srcroot)$${t}.exp \ - $(objroot)$${t}.out >/dev/null 2>&1; \ - fail=$$?; \ - if test "$${fail}" -eq "1" ; then \ - failures=`expr $${failures} + 1`; \ - echo "*** FAIL ***"; \ - else \ - echo "pass"; \ - fi; \ - else \ - echo "*** FAIL *** (.exp file is missing)"; \ - failures=`expr $${failures} + 1`; \ - fi; \ - done; \ - echo "========================================="; \ - echo "Failures: $${failures}/$${total}"' - -clean: - rm -f $(COBJS) - rm -f $(CPICOBJS) - rm -f $(COBJS:%.$(O)=%.d) - rm -f $(CPICOBJS:%.$(O)=%.d) - rm -f $(CTESTOBJS:%.$(O)=%$(EXE)) - rm -f $(CTESTOBJS) - rm -f $(CTESTOBJS:%.$(O)=%.d) - rm -f $(CTESTOBJS:%.$(O)=%.out) - rm -f $(DSOS) $(STATIC_LIBS) - -distclean: clean - rm -rf $(objroot)autom4te.cache - rm -f $(objroot)config.log - rm -f $(objroot)config.status - rm -f $(objroot)config.stamp - rm -f $(cfghdrs_out) - rm -f $(cfgoutputs_out) - -relclean: distclean - rm -f $(objroot)configure - rm -f $(srcroot)VERSION - rm -f $(DOCS_HTML) - rm -f $(DOCS_MAN3) - -#=============================================================================== -# Re-configuration rules. - -ifeq ($(enable_autogen), 1) -$(srcroot)configure : $(srcroot)configure.ac - cd ./$(srcroot) && $(AUTOCONF) - -$(objroot)config.status : $(srcroot)configure - ./$(objroot)config.status --recheck - -$(srcroot)config.stamp.in : $(srcroot)configure.ac - echo stamp > $(srcroot)config.stamp.in - -$(objroot)config.stamp : $(cfgoutputs_in) $(cfghdrs_in) $(srcroot)configure - ./$(objroot)config.status - @touch $@ - -# There must be some action in order for make to re-read Makefile when it is -# out of date. -$(cfgoutputs_out) $(cfghdrs_out) : $(objroot)config.stamp - @true -endif diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/README mariadb-5.5-5.5.40/extra/jemalloc/README --- mariadb-5.5-5.5.39/extra/jemalloc/README 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/README 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -jemalloc is a general-purpose scalable concurrent malloc(3) implementation. -This distribution is a "portable" implementation that currently targets -FreeBSD, Linux, Apple OS X, and MinGW. jemalloc is included as the default -allocator in the FreeBSD and NetBSD operating systems, and it is used by the -Mozilla Firefox web browser on Microsoft Windows-related platforms. Depending -on your needs, one of the other divergent versions may suit your needs better -than this distribution. - -The COPYING file contains copyright and licensing information. - -The INSTALL file contains information on how to configure, build, and install -jemalloc. - -The ChangeLog file contains a brief summary of changes for each release. - -URL: http://www.canonware.com/jemalloc/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/arena.c mariadb-5.5-5.5.40/extra/jemalloc/src/arena.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/arena.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/arena.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,2365 +0,0 @@ -#define JEMALLOC_ARENA_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; -arena_bin_info_t arena_bin_info[NBINS]; - -JEMALLOC_ALIGNED(CACHELINE) -const uint8_t small_size2bin[] = { -#define S2B_8(i) i, -#define S2B_16(i) S2B_8(i) S2B_8(i) -#define S2B_32(i) S2B_16(i) S2B_16(i) -#define S2B_64(i) S2B_32(i) S2B_32(i) -#define S2B_128(i) S2B_64(i) S2B_64(i) -#define S2B_256(i) S2B_128(i) S2B_128(i) -#define S2B_512(i) S2B_256(i) S2B_256(i) -#define S2B_1024(i) S2B_512(i) S2B_512(i) -#define S2B_2048(i) S2B_1024(i) S2B_1024(i) -#define S2B_4096(i) S2B_2048(i) S2B_2048(i) -#define S2B_8192(i) S2B_4096(i) S2B_4096(i) -#define SIZE_CLASS(bin, delta, size) \ - S2B_##delta(bin) - SIZE_CLASSES -#undef S2B_8 -#undef S2B_16 -#undef S2B_32 -#undef S2B_64 -#undef S2B_128 -#undef S2B_256 -#undef S2B_512 -#undef S2B_1024 -#undef S2B_2048 -#undef S2B_4096 -#undef S2B_8192 -#undef SIZE_CLASS -}; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, - size_t pageind, size_t npages, bool maybe_adjac_pred, - bool maybe_adjac_succ); -static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, - size_t pageind, size_t npages, bool maybe_adjac_pred, - bool maybe_adjac_succ); -static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, - bool large, size_t binind, bool zero); -static arena_chunk_t *arena_chunk_alloc(arena_t *arena); -static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); -static arena_run_t *arena_run_alloc_helper(arena_t *arena, size_t size, - bool large, size_t binind, bool zero); -static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, - size_t binind, bool zero); -static arena_chunk_t *chunks_dirty_iter_cb(arena_chunk_tree_t *tree, - arena_chunk_t *chunk, void *arg); -static void arena_purge(arena_t *arena, bool all); -static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, - bool cleaned); -static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, size_t oldsize, size_t newsize); -static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); -static arena_run_t *arena_bin_runs_first(arena_bin_t *bin); -static void arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run); -static void arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run); -static arena_run_t *arena_bin_nonfull_run_tryget(arena_bin_t *bin); -static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); -static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); -static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin); -static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, arena_bin_t *bin); -static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, arena_bin_t *bin); -static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t oldsize, size_t size); -static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero); -static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, - size_t min_run_size); -static void bin_info_init(void); - -/******************************************************************************/ - -static inline int -arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) -{ - uintptr_t a_mapelm = (uintptr_t)a; - uintptr_t b_mapelm = (uintptr_t)b; - - assert(a != NULL); - assert(b != NULL); - - return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm)); -} - -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t, - u.rb_link, arena_run_comp) - -static inline int -arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) -{ - int ret; - size_t a_size = a->bits & ~PAGE_MASK; - size_t b_size = b->bits & ~PAGE_MASK; - - ret = (a_size > b_size) - (a_size < b_size); - if (ret == 0) { - uintptr_t a_mapelm, b_mapelm; - - if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY) - a_mapelm = (uintptr_t)a; - else { - /* - * Treat keys as though they are lower than anything - * else. - */ - a_mapelm = 0; - } - b_mapelm = (uintptr_t)b; - - ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); - } - - return (ret); -} - -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, - u.rb_link, arena_avail_comp) - -static inline int -arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b) -{ - - assert(a != NULL); - assert(b != NULL); - - /* - * Short-circuit for self comparison. The following comparison code - * would come to the same result, but at the cost of executing the slow - * path. - */ - if (a == b) - return (0); - - /* - * Order such that chunks with higher fragmentation are "less than" - * those with lower fragmentation -- purging order is from "least" to - * "greatest". Fragmentation is measured as: - * - * mean current avail run size - * -------------------------------- - * mean defragmented avail run size - * - * navail - * ----------- - * nruns_avail nruns_avail-nruns_adjac - * = ========================= = ----------------------- - * navail nruns_avail - * ----------------------- - * nruns_avail-nruns_adjac - * - * The following code multiplies away the denominator prior to - * comparison, in order to avoid division. - * - */ - { - size_t a_val = (a->nruns_avail - a->nruns_adjac) * - b->nruns_avail; - size_t b_val = (b->nruns_avail - b->nruns_adjac) * - a->nruns_avail; - - if (a_val < b_val) - return (1); - if (a_val > b_val) - return (-1); - } - /* - * Break ties by chunk address. For fragmented chunks, report lower - * addresses as "lower", so that fragmentation reduction happens first - * at lower addresses. However, use the opposite ordering for - * unfragmented chunks, in order to increase the chances of - * re-allocating dirty runs. - */ - { - uintptr_t a_chunk = (uintptr_t)a; - uintptr_t b_chunk = (uintptr_t)b; - int ret = ((a_chunk > b_chunk) - (a_chunk < b_chunk)); - if (a->nruns_adjac == 0) { - assert(b->nruns_adjac == 0); - ret = -ret; - } - return (ret); - } -} - -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t, - dirty_link, arena_chunk_dirty_comp) - -static inline bool -arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind) -{ - bool ret; - - if (pageind-1 < map_bias) - ret = false; - else { - ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0); - assert(ret == false || arena_mapbits_dirty_get(chunk, - pageind-1) != arena_mapbits_dirty_get(chunk, pageind)); - } - return (ret); -} - -static inline bool -arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages) -{ - bool ret; - - if (pageind+npages == chunk_npages) - ret = false; - else { - assert(pageind+npages < chunk_npages); - ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0); - assert(ret == false || arena_mapbits_dirty_get(chunk, pageind) - != arena_mapbits_dirty_get(chunk, pageind+npages)); - } - return (ret); -} - -static inline bool -arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages) -{ - - return (arena_avail_adjac_pred(chunk, pageind) || - arena_avail_adjac_succ(chunk, pageind, npages)); -} - -static void -arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, - size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) -{ - - assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> - LG_PAGE)); - - /* - * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be - * removed and reinserted even if the run to be inserted is clean. - */ - if (chunk->ndirty != 0) - arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); - - if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) - chunk->nruns_adjac++; - if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) - chunk->nruns_adjac++; - chunk->nruns_avail++; - assert(chunk->nruns_avail > chunk->nruns_adjac); - - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { - arena->ndirty += npages; - chunk->ndirty += npages; - } - if (chunk->ndirty != 0) - arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); - - arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk, - pageind)); -} - -static void -arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, - size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) -{ - - assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> - LG_PAGE)); - - /* - * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be - * removed and reinserted even if the run to be removed is clean. - */ - if (chunk->ndirty != 0) - arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); - - if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) - chunk->nruns_adjac--; - if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) - chunk->nruns_adjac--; - chunk->nruns_avail--; - assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail - == 0 && chunk->nruns_adjac == 0)); - - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { - arena->ndirty -= npages; - chunk->ndirty -= npages; - } - if (chunk->ndirty != 0) - arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); - - arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk, - pageind)); -} - -static inline void * -arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) -{ - void *ret; - unsigned regind; - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - - assert(run->nfree > 0); - assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); - - regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); - ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + - (uintptr_t)(bin_info->reg_interval * regind)); - run->nfree--; - if (regind == run->nextind) - run->nextind++; - assert(regind < run->nextind); - return (ret); -} - -static inline void -arena_run_reg_dalloc(arena_run_t *run, void *ptr) -{ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t mapbits = arena_mapbits_get(chunk, pageind); - size_t binind = arena_ptr_small_binind_get(ptr, mapbits); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind = arena_run_regind(run, bin_info, ptr); - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - - assert(run->nfree < bin_info->nregs); - /* Freeing an interior pointer can cause assertion failure. */ - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % - (uintptr_t)bin_info->reg_interval == 0); - assert((uintptr_t)ptr >= (uintptr_t)run + - (uintptr_t)bin_info->reg0_offset); - /* Freeing an unallocated pointer can cause assertion failure. */ - assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); - - bitmap_unset(bitmap, &bin_info->bitmap_info, regind); - run->nfree++; -} - -static inline void -arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) -{ - - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), (npages << LG_PAGE)); - memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, - (npages << LG_PAGE)); -} - -static inline void -arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) -{ - size_t i; - UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE)); - - VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), PAGE); - for (i = 0; i < PAGE / sizeof(size_t); i++) - assert(p[i] == 0); -} - -static void -arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, - size_t binind, bool zero) -{ - arena_chunk_t *chunk; - size_t run_ind, total_pages, need_pages, rem_pages, i; - size_t flag_dirty; - - assert((large && binind == BININD_INVALID) || (large == false && binind - != BININD_INVALID)); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); - total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> - LG_PAGE; - assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == - flag_dirty); - need_pages = (size >> LG_PAGE); - assert(need_pages > 0); - assert(need_pages <= total_pages); - rem_pages = total_pages - need_pages; - - arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); - if (config_stats) { - /* - * Update stats_cactive if nactive is crossing a chunk - * multiple. - */ - size_t cactive_diff = CHUNK_CEILING((arena->nactive + - need_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << - LG_PAGE); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); - } - arena->nactive += need_pages; - - /* Keep track of trailing unused pages for later use. */ - if (rem_pages > 0) { - if (flag_dirty != 0) { - arena_mapbits_unallocated_set(chunk, run_ind+need_pages, - (rem_pages << LG_PAGE), CHUNK_MAP_DIRTY); - arena_mapbits_unallocated_set(chunk, - run_ind+total_pages-1, (rem_pages << LG_PAGE), - CHUNK_MAP_DIRTY); - } else { - arena_mapbits_unallocated_set(chunk, run_ind+need_pages, - (rem_pages << LG_PAGE), - arena_mapbits_unzeroed_get(chunk, - run_ind+need_pages)); - arena_mapbits_unallocated_set(chunk, - run_ind+total_pages-1, (rem_pages << LG_PAGE), - arena_mapbits_unzeroed_get(chunk, - run_ind+total_pages-1)); - } - arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages, - false, true); - } - - /* - * Update the page map separately for large vs. small runs, since it is - * possible to avoid iteration for large mallocs. - */ - if (large) { - if (zero) { - if (flag_dirty == 0) { - /* - * The run is clean, so some pages may be - * zeroed (i.e. never before touched). - */ - for (i = 0; i < need_pages; i++) { - if (arena_mapbits_unzeroed_get(chunk, - run_ind+i) != 0) { - arena_run_zero(chunk, run_ind+i, - 1); - } else if (config_debug) { - arena_run_page_validate_zeroed( - chunk, run_ind+i); - } - } - } else { - /* - * The run is dirty, so all pages must be - * zeroed. - */ - arena_run_zero(chunk, run_ind, need_pages); - } - } - - /* - * Set the last element first, in case the run only contains one - * page (i.e. both statements set the same element). - */ - arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, - flag_dirty); - arena_mapbits_large_set(chunk, run_ind, size, flag_dirty); - } else { - assert(zero == false); - /* - * Propagate the dirty and unzeroed flags to the allocated - * small run, so that arena_dalloc_bin_run() has the ability to - * conditionally trim clean pages. - */ - arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); - /* - * The first page will always be dirtied during small run - * initialization, so a validation failure here would not - * actually cause an observable failure. - */ - if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind) == 0) - arena_run_page_validate_zeroed(chunk, run_ind); - for (i = 1; i < need_pages - 1; i++) { - arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); - if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) { - arena_run_page_validate_zeroed(chunk, - run_ind+i); - } - } - arena_mapbits_small_set(chunk, run_ind+need_pages-1, - need_pages-1, binind, flag_dirty); - if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == - 0) { - arena_run_page_validate_zeroed(chunk, - run_ind+need_pages-1); - } - } - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), (need_pages << LG_PAGE)); -} - -static arena_chunk_t * -arena_chunk_alloc(arena_t *arena) -{ - arena_chunk_t *chunk; - size_t i; - - if (arena->spare != NULL) { - chunk = arena->spare; - arena->spare = NULL; - - assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); - assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); - assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); - assert(arena_mapbits_unallocated_size_get(chunk, - chunk_npages-1) == arena_maxclass); - assert(arena_mapbits_dirty_get(chunk, map_bias) == - arena_mapbits_dirty_get(chunk, chunk_npages-1)); - } else { - bool zero; - size_t unzeroed; - - zero = false; - malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, - false, &zero, arena->dss_prec); - malloc_mutex_lock(&arena->lock); - if (chunk == NULL) - return (NULL); - if (config_stats) - arena->stats.mapped += chunksize; - - chunk->arena = arena; - - /* - * Claim that no pages are in use, since the header is merely - * overhead. - */ - chunk->ndirty = 0; - - chunk->nruns_avail = 0; - chunk->nruns_adjac = 0; - - /* - * Initialize the map to contain one maximal free untouched run. - * Mark the pages as zeroed iff chunk_alloc() returned a zeroed - * chunk. - */ - unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, - unzeroed); - /* - * There is no need to initialize the internal page map entries - * unless the chunk is not zeroed. - */ - if (zero == false) { - for (i = map_bias+1; i < chunk_npages-1; i++) - arena_mapbits_unzeroed_set(chunk, i, unzeroed); - } else if (config_debug) { - VALGRIND_MAKE_MEM_DEFINED( - (void *)arena_mapp_get(chunk, map_bias+1), - (void *)((uintptr_t) - arena_mapp_get(chunk, chunk_npages-1) - - (uintptr_t)arena_mapp_get(chunk, map_bias+1))); - for (i = map_bias+1; i < chunk_npages-1; i++) { - assert(arena_mapbits_unzeroed_get(chunk, i) == - unzeroed); - } - } - arena_mapbits_unallocated_set(chunk, chunk_npages-1, - arena_maxclass, unzeroed); - } - - /* Insert the run into the runs_avail tree. */ - arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, - false, false); - - return (chunk); -} - -static void -arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) -{ - assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); - assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); - assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); - assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == - arena_maxclass); - assert(arena_mapbits_dirty_get(chunk, map_bias) == - arena_mapbits_dirty_get(chunk, chunk_npages-1)); - - /* - * Remove run from the runs_avail tree, so that the arena does not use - * it. - */ - arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias, - false, false); - - if (arena->spare != NULL) { - arena_chunk_t *spare = arena->spare; - - arena->spare = chunk; - malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize, true); - malloc_mutex_lock(&arena->lock); - if (config_stats) - arena->stats.mapped -= chunksize; - } else - arena->spare = chunk; -} - -static arena_run_t * -arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, - bool zero) -{ - arena_run_t *run; - arena_chunk_map_t *mapelm, key; - - key.bits = size | CHUNK_MAP_KEY; - mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); - return (run); - } - - return (NULL); -} - -static arena_run_t * -arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, - bool zero) -{ - arena_chunk_t *chunk; - arena_run_t *run; - - assert(size <= arena_maxclass); - assert((size & PAGE_MASK) == 0); - assert((large && binind == BININD_INVALID) || (large == false && binind - != BININD_INVALID)); - - /* Search the arena's chunks for the lowest best fit. */ - run = arena_run_alloc_helper(arena, size, large, binind, zero); - if (run != NULL) - return (run); - - /* - * No usable runs. Create a new chunk from which to allocate the run. - */ - chunk = arena_chunk_alloc(arena); - if (chunk != NULL) { - run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); - return (run); - } - - /* - * arena_chunk_alloc() failed, but another thread may have made - * sufficient memory available while this one dropped arena->lock in - * arena_chunk_alloc(), so search one more time. - */ - return (arena_run_alloc_helper(arena, size, large, binind, zero)); -} - -static inline void -arena_maybe_purge(arena_t *arena) -{ - size_t npurgeable, threshold; - - /* Don't purge if the option is disabled. */ - if (opt_lg_dirty_mult < 0) - return; - /* Don't purge if all dirty pages are already being purged. */ - if (arena->ndirty <= arena->npurgatory) - return; - npurgeable = arena->ndirty - arena->npurgatory; - threshold = (arena->nactive >> opt_lg_dirty_mult); - /* - * Don't purge unless the number of purgeable pages exceeds the - * threshold. - */ - if (npurgeable <= threshold) - return; - - arena_purge(arena, false); -} - -static inline size_t -arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) -{ - size_t npurged; - ql_head(arena_chunk_map_t) mapelms; - arena_chunk_map_t *mapelm; - size_t pageind, npages; - size_t nmadvise; - - ql_new(&mapelms); - - /* - * If chunk is the spare, temporarily re-allocate it, 1) so that its - * run is reinserted into runs_avail, and 2) so that it cannot be - * completely discarded by another thread while arena->lock is dropped - * by this thread. Note that the arena_run_dalloc() call will - * implicitly deallocate the chunk, so no explicit action is required - * in this function to deallocate the chunk. - * - * Note that once a chunk contains dirty pages, it cannot again contain - * a single run unless 1) it is a dirty run, or 2) this function purges - * dirty pages and causes the transition to a single clean run. Thus - * (chunk == arena->spare) is possible, but it is not possible for - * this function to be called on the spare unless it contains a dirty - * run. - */ - if (chunk == arena->spare) { - assert(arena_mapbits_dirty_get(chunk, map_bias) != 0); - assert(arena_mapbits_dirty_get(chunk, chunk_npages-1) != 0); - - arena_chunk_alloc(arena); - } - - if (config_stats) - arena->stats.purged += chunk->ndirty; - - /* - * Operate on all dirty runs if there is no clean/dirty run - * fragmentation. - */ - if (chunk->nruns_adjac == 0) - all = true; - - /* - * Temporarily allocate free dirty runs within chunk. If all is false, - * only operate on dirty runs that are fragments; otherwise operate on - * all dirty runs. - */ - for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { - mapelm = arena_mapp_get(chunk, pageind); - if (arena_mapbits_allocated_get(chunk, pageind) == 0) { - size_t run_size = - arena_mapbits_unallocated_size_get(chunk, pageind); - - npages = run_size >> LG_PAGE; - assert(pageind + npages <= chunk_npages); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, pageind+npages-1)); - - if (arena_mapbits_dirty_get(chunk, pageind) != 0 && - (all || arena_avail_adjac(chunk, pageind, - npages))) { - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << LG_PAGE)); - - arena_run_split(arena, run, run_size, true, - BININD_INVALID, false); - /* Append to list for later processing. */ - ql_elm_new(mapelm, u.ql_link); - ql_tail_insert(&mapelms, mapelm, u.ql_link); - } - } else { - /* Skip run. */ - if (arena_mapbits_large_get(chunk, pageind) != 0) { - npages = arena_mapbits_large_size_get(chunk, - pageind) >> LG_PAGE; - } else { - size_t binind; - arena_bin_info_t *bin_info; - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << LG_PAGE)); - - assert(arena_mapbits_small_runind_get(chunk, - pageind) == 0); - binind = arena_bin_index(arena, run->bin); - bin_info = &arena_bin_info[binind]; - npages = bin_info->run_size >> LG_PAGE; - } - } - } - assert(pageind == chunk_npages); - assert(chunk->ndirty == 0 || all == false); - assert(chunk->nruns_adjac == 0); - - malloc_mutex_unlock(&arena->lock); - if (config_stats) - nmadvise = 0; - npurged = 0; - ql_foreach(mapelm, &mapelms, u.ql_link) { - bool unzeroed; - size_t flag_unzeroed, i; - - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - npages = arena_mapbits_large_size_get(chunk, pageind) >> - LG_PAGE; - assert(pageind + npages <= chunk_npages); - unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << - LG_PAGE)), (npages << LG_PAGE)); - flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; - /* - * Set the unzeroed flag for all pages, now that pages_purge() - * has returned whether the pages were zeroed as a side effect - * of purging. This chunk map modification is safe even though - * the arena mutex isn't currently owned by this thread, - * because the run is marked as allocated, thus protecting it - * from being modified by any other thread. As long as these - * writes don't perturb the first and last elements' - * CHUNK_MAP_ALLOCATED bits, behavior is well defined. - */ - for (i = 0; i < npages; i++) { - arena_mapbits_unzeroed_set(chunk, pageind+i, - flag_unzeroed); - } - npurged += npages; - if (config_stats) - nmadvise++; - } - malloc_mutex_lock(&arena->lock); - if (config_stats) - arena->stats.nmadvise += nmadvise; - - /* Deallocate runs. */ - for (mapelm = ql_first(&mapelms); mapelm != NULL; - mapelm = ql_first(&mapelms)) { - arena_run_t *run; - - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << - LG_PAGE)); - ql_remove(&mapelms, mapelm, u.ql_link); - arena_run_dalloc(arena, run, false, true); - } - - return (npurged); -} - -static arena_chunk_t * -chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) -{ - size_t *ndirty = (size_t *)arg; - - assert(chunk->ndirty != 0); - *ndirty += chunk->ndirty; - return (NULL); -} - -static void -arena_purge(arena_t *arena, bool all) -{ - arena_chunk_t *chunk; - size_t npurgatory; - if (config_debug) { - size_t ndirty = 0; - - arena_chunk_dirty_iter(&arena->chunks_dirty, NULL, - chunks_dirty_iter_cb, (void *)&ndirty); - assert(ndirty == arena->ndirty); - } - assert(arena->ndirty > arena->npurgatory || all); - assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - arena->npurgatory) || all); - - if (config_stats) - arena->stats.npurge++; - - /* - * Compute the minimum number of pages that this thread should try to - * purge, and add the result to arena->npurgatory. This will keep - * multiple threads from racing to reduce ndirty below the threshold. - */ - { - size_t npurgeable = arena->ndirty - arena->npurgatory; - - if (all == false) { - size_t threshold = (arena->nactive >> - opt_lg_dirty_mult); - - npurgatory = npurgeable - threshold; - } else - npurgatory = npurgeable; - } - arena->npurgatory += npurgatory; - - while (npurgatory > 0) { - size_t npurgeable, npurged, nunpurged; - - /* Get next chunk with dirty pages. */ - chunk = arena_chunk_dirty_first(&arena->chunks_dirty); - if (chunk == NULL) { - /* - * This thread was unable to purge as many pages as - * originally intended, due to races with other threads - * that either did some of the purging work, or re-used - * dirty pages. - */ - arena->npurgatory -= npurgatory; - return; - } - npurgeable = chunk->ndirty; - assert(npurgeable != 0); - - if (npurgeable > npurgatory && chunk->nruns_adjac == 0) { - /* - * This thread will purge all the dirty pages in chunk, - * so set npurgatory to reflect this thread's intent to - * purge the pages. This tends to reduce the chances - * of the following scenario: - * - * 1) This thread sets arena->npurgatory such that - * (arena->ndirty - arena->npurgatory) is at the - * threshold. - * 2) This thread drops arena->lock. - * 3) Another thread causes one or more pages to be - * dirtied, and immediately determines that it must - * purge dirty pages. - * - * If this scenario *does* play out, that's okay, - * because all of the purging work being done really - * needs to happen. - */ - arena->npurgatory += npurgeable - npurgatory; - npurgatory = npurgeable; - } - - /* - * Keep track of how many pages are purgeable, versus how many - * actually get purged, and adjust counters accordingly. - */ - arena->npurgatory -= npurgeable; - npurgatory -= npurgeable; - npurged = arena_chunk_purge(arena, chunk, all); - nunpurged = npurgeable - npurged; - arena->npurgatory += nunpurged; - npurgatory += nunpurged; - } -} - -void -arena_purge_all(arena_t *arena) -{ - - malloc_mutex_lock(&arena->lock); - arena_purge(arena, true); - malloc_mutex_unlock(&arena->lock); -} - -static void -arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) -{ - arena_chunk_t *chunk; - size_t size, run_ind, run_pages, flag_dirty; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - assert(run_ind >= map_bias); - assert(run_ind < chunk_npages); - if (arena_mapbits_large_get(chunk, run_ind) != 0) { - size = arena_mapbits_large_size_get(chunk, run_ind); - assert(size == PAGE || - arena_mapbits_large_size_get(chunk, - run_ind+(size>>LG_PAGE)-1) == 0); - } else { - size_t binind = arena_bin_index(arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - size = bin_info->run_size; - } - run_pages = (size >> LG_PAGE); - if (config_stats) { - /* - * Update stats_cactive if nactive is crossing a chunk - * multiple. - */ - size_t cactive_diff = CHUNK_CEILING(arena->nactive << LG_PAGE) - - CHUNK_CEILING((arena->nactive - run_pages) << LG_PAGE); - if (cactive_diff != 0) - stats_cactive_sub(cactive_diff); - } - arena->nactive -= run_pages; - - /* - * The run is dirty if the caller claims to have dirtied it, as well as - * if it was already dirty before being allocated and the caller - * doesn't claim to have cleaned it. - */ - assert(arena_mapbits_dirty_get(chunk, run_ind) == - arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) - dirty = true; - flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; - - /* Mark pages as unallocated in the chunk map. */ - if (dirty) { - arena_mapbits_unallocated_set(chunk, run_ind, size, - CHUNK_MAP_DIRTY); - arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, - CHUNK_MAP_DIRTY); - } else { - arena_mapbits_unallocated_set(chunk, run_ind, size, - arena_mapbits_unzeroed_get(chunk, run_ind)); - arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, - arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1)); - } - - /* Try to coalesce forward. */ - if (run_ind + run_pages < chunk_npages && - arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 && - arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) { - size_t nrun_size = arena_mapbits_unallocated_size_get(chunk, - run_ind+run_pages); - size_t nrun_pages = nrun_size >> LG_PAGE; - - /* - * Remove successor from runs_avail; the coalesced run is - * inserted later. - */ - assert(arena_mapbits_unallocated_size_get(chunk, - run_ind+run_pages+nrun_pages-1) == nrun_size); - assert(arena_mapbits_dirty_get(chunk, - run_ind+run_pages+nrun_pages-1) == flag_dirty); - arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages, - false, true); - - size += nrun_size; - run_pages += nrun_pages; - - arena_mapbits_unallocated_size_set(chunk, run_ind, size); - arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1, - size); - } - - /* Try to coalesce backward. */ - if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, run_ind-1) - == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == flag_dirty) { - size_t prun_size = arena_mapbits_unallocated_size_get(chunk, - run_ind-1); - size_t prun_pages = prun_size >> LG_PAGE; - - run_ind -= prun_pages; - - /* - * Remove predecessor from runs_avail; the coalesced run is - * inserted later. - */ - assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == - prun_size); - assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); - arena_avail_remove(arena, chunk, run_ind, prun_pages, true, - false); - - size += prun_size; - run_pages += prun_pages; - - arena_mapbits_unallocated_size_set(chunk, run_ind, size); - arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1, - size); - } - - /* Insert into runs_avail, now that coalescing is complete. */ - assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == - arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); - assert(arena_mapbits_dirty_get(chunk, run_ind) == - arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - arena_avail_insert(arena, chunk, run_ind, run_pages, true, true); - - /* Deallocate chunk if it is now completely unused. */ - if (size == arena_maxclass) { - assert(run_ind == map_bias); - assert(run_pages == (arena_maxclass >> LG_PAGE)); - arena_chunk_dealloc(arena, chunk); - } - - /* - * It is okay to do dirty page processing here even if the chunk was - * deallocated above, since in that case it is the spare. Waiting - * until after possible chunk deallocation to do dirty processing - * allows for an old spare to be fully deallocated, thus decreasing the - * chances of spuriously crossing the dirty page purging threshold. - */ - if (dirty) - arena_maybe_purge(arena); -} - -static void -arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - size_t oldsize, size_t newsize) -{ - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - size_t head_npages = (oldsize - newsize) >> LG_PAGE; - size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); - - assert(oldsize > newsize); - - /* - * Update the chunk map so that arena_run_dalloc() can treat the - * leading run as separately allocated. Set the last element of each - * run first, in case of single-page runs. - */ - assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); - arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty); - arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty); - - if (config_debug) { - UNUSED size_t tail_npages = newsize >> LG_PAGE; - assert(arena_mapbits_large_size_get(chunk, - pageind+head_npages+tail_npages-1) == 0); - assert(arena_mapbits_dirty_get(chunk, - pageind+head_npages+tail_npages-1) == flag_dirty); - } - arena_mapbits_large_set(chunk, pageind+head_npages, newsize, - flag_dirty); - - arena_run_dalloc(arena, run, false, false); -} - -static void -arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - size_t oldsize, size_t newsize, bool dirty) -{ - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - size_t head_npages = newsize >> LG_PAGE; - size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); - - assert(oldsize > newsize); - - /* - * Update the chunk map so that arena_run_dalloc() can treat the - * trailing run as separately allocated. Set the last element of each - * run first, in case of single-page runs. - */ - assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); - arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty); - arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty); - - if (config_debug) { - UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE; - assert(arena_mapbits_large_size_get(chunk, - pageind+head_npages+tail_npages-1) == 0); - assert(arena_mapbits_dirty_get(chunk, - pageind+head_npages+tail_npages-1) == flag_dirty); - } - arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize, - flag_dirty); - - arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), - dirty, false); -} - -static arena_run_t * -arena_bin_runs_first(arena_bin_t *bin) -{ - arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs); - if (mapelm != NULL) { - arena_chunk_t *chunk; - size_t pageind; - arena_run_t *run; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t))) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - arena_mapbits_small_runind_get(chunk, pageind)) << - LG_PAGE)); - return (run); - } - - return (NULL); -} - -static void -arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) -{ - arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); - - assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); - - arena_run_tree_insert(&bin->runs, mapelm); -} - -static void -arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) -{ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); - - assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); - - arena_run_tree_remove(&bin->runs, mapelm); -} - -static arena_run_t * -arena_bin_nonfull_run_tryget(arena_bin_t *bin) -{ - arena_run_t *run = arena_bin_runs_first(bin); - if (run != NULL) { - arena_bin_runs_remove(bin, run); - if (config_stats) - bin->stats.reruns++; - } - return (run); -} - -static arena_run_t * -arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) -{ - arena_run_t *run; - size_t binind; - arena_bin_info_t *bin_info; - - /* Look for a usable run. */ - run = arena_bin_nonfull_run_tryget(bin); - if (run != NULL) - return (run); - /* No existing runs have any space available. */ - - binind = arena_bin_index(arena, bin); - bin_info = &arena_bin_info[binind]; - - /* Allocate a new run. */ - malloc_mutex_unlock(&bin->lock); - /******************************/ - malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin_info->run_size, false, binind, false); - if (run != NULL) { - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - - /* Initialize run internals. */ - run->bin = bin; - run->nextind = 0; - run->nfree = bin_info->nregs; - bitmap_init(bitmap, &bin_info->bitmap_info); - } - malloc_mutex_unlock(&arena->lock); - /********************************/ - malloc_mutex_lock(&bin->lock); - if (run != NULL) { - if (config_stats) { - bin->stats.nruns++; - bin->stats.curruns++; - } - return (run); - } - - /* - * arena_run_alloc() failed, but another thread may have made - * sufficient memory available while this one dropped bin->lock above, - * so search one more time. - */ - run = arena_bin_nonfull_run_tryget(bin); - if (run != NULL) - return (run); - - return (NULL); -} - -/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */ -static void * -arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) -{ - void *ret; - size_t binind; - arena_bin_info_t *bin_info; - arena_run_t *run; - - binind = arena_bin_index(arena, bin); - bin_info = &arena_bin_info[binind]; - bin->runcur = NULL; - run = arena_bin_nonfull_run_get(arena, bin); - if (bin->runcur != NULL && bin->runcur->nfree > 0) { - /* - * Another thread updated runcur while this one ran without the - * bin lock in arena_bin_nonfull_run_get(). - */ - assert(bin->runcur->nfree > 0); - ret = arena_run_reg_alloc(bin->runcur, bin_info); - if (run != NULL) { - arena_chunk_t *chunk; - - /* - * arena_run_alloc() may have allocated run, or it may - * have pulled run from the bin's run tree. Therefore - * it is unsafe to make any assumptions about how run - * has previously been used, and arena_bin_lower_run() - * must be called, as if a region were just deallocated - * from the run. - */ - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - if (run->nfree == bin_info->nregs) - arena_dalloc_bin_run(arena, chunk, run, bin); - else - arena_bin_lower_run(arena, chunk, run, bin); - } - return (ret); - } - - if (run == NULL) - return (NULL); - - bin->runcur = run; - - assert(bin->runcur->nfree > 0); - - return (arena_run_reg_alloc(bin->runcur, bin_info)); -} - -void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, - uint64_t prof_accumbytes) -{ - unsigned i, nfill; - arena_bin_t *bin; - arena_run_t *run; - void *ptr; - - assert(tbin->ncached == 0); - - if (config_prof && arena_prof_accum(arena, prof_accumbytes)) - prof_idump(); - bin = &arena->bins[binind]; - malloc_mutex_lock(&bin->lock); - for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> - tbin->lg_fill_div); i < nfill; i++) { - if ((run = bin->runcur) != NULL && run->nfree > 0) - ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); - else - ptr = arena_bin_malloc_hard(arena, bin); - if (ptr == NULL) - break; - if (config_fill && opt_junk) { - arena_alloc_junk_small(ptr, &arena_bin_info[binind], - true); - } - /* Insert such that low regions get used first. */ - tbin->avail[nfill - 1 - i] = ptr; - } - if (config_stats) { - bin->stats.allocated += i * arena_bin_info[binind].reg_size; - bin->stats.nmalloc += i; - bin->stats.nrequests += tbin->tstats.nrequests; - bin->stats.nfills++; - tbin->tstats.nrequests = 0; - } - malloc_mutex_unlock(&bin->lock); - tbin->ncached = i; -} - -void -arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero) -{ - - if (zero) { - size_t redzone_size = bin_info->redzone_size; - memset((void *)((uintptr_t)ptr - redzone_size), 0xa5, - redzone_size); - memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5, - redzone_size); - } else { - memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5, - bin_info->reg_interval); - } -} - -void -arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) -{ - size_t size = bin_info->reg_size; - size_t redzone_size = bin_info->redzone_size; - size_t i; - bool error = false; - - for (i = 1; i <= redzone_size; i++) { - unsigned byte; - if ((byte = *(uint8_t *)((uintptr_t)ptr - i)) != 0xa5) { - error = true; - malloc_printf(": Corrupt redzone " - "%zu byte%s before %p (size %zu), byte=%#x\n", i, - (i == 1) ? "" : "s", ptr, size, byte); - } - } - for (i = 0; i < redzone_size; i++) { - unsigned byte; - if ((byte = *(uint8_t *)((uintptr_t)ptr + size + i)) != 0xa5) { - error = true; - malloc_printf(": Corrupt redzone " - "%zu byte%s after end of %p (size %zu), byte=%#x\n", - i, (i == 1) ? "" : "s", ptr, size, byte); - } - } - if (opt_abort && error) - abort(); - - memset((void *)((uintptr_t)ptr - redzone_size), 0x5a, - bin_info->reg_interval); -} - -void * -arena_malloc_small(arena_t *arena, size_t size, bool zero) -{ - void *ret; - arena_bin_t *bin; - arena_run_t *run; - size_t binind; - - binind = SMALL_SIZE2BIN(size); - assert(binind < NBINS); - bin = &arena->bins[binind]; - size = arena_bin_info[binind].reg_size; - - malloc_mutex_lock(&bin->lock); - if ((run = bin->runcur) != NULL && run->nfree > 0) - ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); - else - ret = arena_bin_malloc_hard(arena, bin); - - if (ret == NULL) { - malloc_mutex_unlock(&bin->lock); - return (NULL); - } - - if (config_stats) { - bin->stats.allocated += size; - bin->stats.nmalloc++; - bin->stats.nrequests++; - } - malloc_mutex_unlock(&bin->lock); - if (config_prof && isthreaded == false && arena_prof_accum(arena, size)) - prof_idump(); - - if (zero == false) { - if (config_fill) { - if (opt_junk) { - arena_alloc_junk_small(ret, - &arena_bin_info[binind], false); - } else if (opt_zero) - memset(ret, 0, size); - } - } else { - if (config_fill && opt_junk) { - arena_alloc_junk_small(ret, &arena_bin_info[binind], - true); - } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); - } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - - return (ret); -} - -void * -arena_malloc_large(arena_t *arena, size_t size, bool zero) -{ - void *ret; - UNUSED bool idump; - - /* Large allocation. */ - size = PAGE_CEILING(size); - malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, size, true, BININD_INVALID, zero); - if (ret == NULL) { - malloc_mutex_unlock(&arena->lock); - return (NULL); - } - if (config_stats) { - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; - } - if (config_prof) - idump = arena_prof_accum_locked(arena, size); - malloc_mutex_unlock(&arena->lock); - if (config_prof && idump) - prof_idump(); - - if (zero == false) { - if (config_fill) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); - } - } - - return (ret); -} - -/* Only handles large allocations that require more than page alignment. */ -void * -arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) -{ - void *ret; - size_t alloc_size, leadsize, trailsize; - arena_run_t *run; - arena_chunk_t *chunk; - - assert((size & PAGE_MASK) == 0); - - alignment = PAGE_CEILING(alignment); - alloc_size = size + alignment - PAGE; - - malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, zero); - if (run == NULL) { - malloc_mutex_unlock(&arena->lock); - return (NULL); - } - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - - leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) - - (uintptr_t)run; - assert(alloc_size >= leadsize + size); - trailsize = alloc_size - leadsize - size; - ret = (void *)((uintptr_t)run + leadsize); - if (leadsize != 0) { - arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size - - leadsize); - } - if (trailsize != 0) { - arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, - false); - } - - if (config_stats) { - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; - } - malloc_mutex_unlock(&arena->lock); - - if (config_fill && zero == false) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); - } - return (ret); -} - -void -arena_prof_promoted(const void *ptr, size_t size) -{ - arena_chunk_t *chunk; - size_t pageind, binind; - - cassert(config_prof); - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr, false) == PAGE); - assert(isalloc(ptr, true) == PAGE); - assert(size <= SMALL_MAXCLASS); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - binind = SMALL_SIZE2BIN(size); - assert(binind < NBINS); - arena_mapbits_large_binind_set(chunk, pageind, binind); - - assert(isalloc(ptr, false) == PAGE); - assert(isalloc(ptr, true) == size); -} - -static void -arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin) -{ - - /* Dissociate run from bin. */ - if (run == bin->runcur) - bin->runcur = NULL; - else { - size_t binind = arena_bin_index(chunk->arena, bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - - if (bin_info->nregs != 1) { - /* - * This block's conditional is necessary because if the - * run only contains one region, then it never gets - * inserted into the non-full runs tree. - */ - arena_bin_runs_remove(bin, run); - } - } -} - -static void -arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin) -{ - size_t binind; - arena_bin_info_t *bin_info; - size_t npages, run_ind, past; - - assert(run != bin->runcur); - assert(arena_run_tree_search(&bin->runs, - arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)) - == NULL); - - binind = arena_bin_index(chunk->arena, run->bin); - bin_info = &arena_bin_info[binind]; - - malloc_mutex_unlock(&bin->lock); - /******************************/ - npages = bin_info->run_size >> LG_PAGE; - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - past = (size_t)(PAGE_CEILING((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * - bin_info->reg_interval - bin_info->redzone_size) - - (uintptr_t)chunk) >> LG_PAGE); - malloc_mutex_lock(&arena->lock); - - /* - * If the run was originally clean, and some pages were never touched, - * trim the clean pages before deallocating the dirty portion of the - * run. - */ - assert(arena_mapbits_dirty_get(chunk, run_ind) == - arena_mapbits_dirty_get(chunk, run_ind+npages-1)); - if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind < - npages) { - /* Trim clean pages. Convert to large run beforehand. */ - assert(npages > 0); - arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, 0); - arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0); - arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), - ((past - run_ind) << LG_PAGE), false); - /* npages = past - run_ind; */ - } - arena_run_dalloc(arena, run, true, false); - malloc_mutex_unlock(&arena->lock); - /****************************/ - malloc_mutex_lock(&bin->lock); - if (config_stats) - bin->stats.curruns--; -} - -static void -arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin) -{ - - /* - * Make sure that if bin->runcur is non-NULL, it refers to the lowest - * non-full run. It is okay to NULL runcur out rather than proactively - * keeping it pointing at the lowest non-full run. - */ - if ((uintptr_t)run < (uintptr_t)bin->runcur) { - /* Switch runcur. */ - if (bin->runcur->nfree > 0) - arena_bin_runs_insert(bin, bin->runcur); - bin->runcur = run; - if (config_stats) - bin->stats.reruns++; - } else - arena_bin_runs_insert(bin, run); -} - -void -arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_t *mapelm) -{ - size_t pageind; - arena_run_t *run; - arena_bin_t *bin; - arena_bin_info_t *bin_info; - size_t size, binind; - - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); - bin = run->bin; - binind = arena_ptr_small_binind_get(ptr, mapelm->bits); - bin_info = &arena_bin_info[binind]; - if (config_fill || config_stats) - size = bin_info->reg_size; - - if (config_fill && opt_junk) - arena_dalloc_junk_small(ptr, bin_info); - - arena_run_reg_dalloc(run, ptr); - if (run->nfree == bin_info->nregs) { - arena_dissociate_bin_run(chunk, run, bin); - arena_dalloc_bin_run(arena, chunk, run, bin); - } else if (run->nfree == 1 && run != bin->runcur) - arena_bin_lower_run(arena, chunk, run, bin); - - if (config_stats) { - bin->stats.allocated -= size; - bin->stats.ndalloc++; - } -} - -void -arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind, arena_chunk_map_t *mapelm) -{ - arena_run_t *run; - arena_bin_t *bin; - - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); - bin = run->bin; - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin_locked(arena, chunk, ptr, mapelm); - malloc_mutex_unlock(&bin->lock); -} - -void -arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind) -{ - arena_chunk_map_t *mapelm; - - if (config_debug) { - /* arena_ptr_small_binind_get() does extra sanity checking. */ - assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, - pageind)) != BININD_INVALID); - } - mapelm = arena_mapp_get(chunk, pageind); - arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); -} - -void -arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) -{ - - if (config_fill || config_stats) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t size = arena_mapbits_large_size_get(chunk, pageind); - - if (config_fill && config_stats && opt_junk) - memset(ptr, 0x5a, size); - if (config_stats) { - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= size; - arena->stats.lstats[(size >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns--; - } - } - - arena_run_dalloc(arena, (arena_run_t *)ptr, true, false); -} - -void -arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) -{ - - malloc_mutex_lock(&arena->lock); - arena_dalloc_large_locked(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); -} - -static void -arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t oldsize, size_t size) -{ - - assert(size < oldsize); - - /* - * Shrink the run, and make trailing pages available for other - * allocations. - */ - malloc_mutex_lock(&arena->lock); - arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size, - true); - if (config_stats) { - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; - - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; - } - malloc_mutex_unlock(&arena->lock); -} - -static bool -arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t oldsize, size_t size, size_t extra, bool zero) -{ - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t npages = oldsize >> LG_PAGE; - size_t followsize; - - assert(oldsize == arena_mapbits_large_size_get(chunk, pageind)); - - /* Try to extend the run. */ - assert(size + extra > oldsize); - malloc_mutex_lock(&arena->lock); - if (pageind + npages < chunk_npages && - arena_mapbits_allocated_get(chunk, pageind+npages) == 0 && - (followsize = arena_mapbits_unallocated_size_get(chunk, - pageind+npages)) >= size - oldsize) { - /* - * The next run is available and sufficiently large. Split the - * following run, then merge the first part with the existing - * allocation. - */ - size_t flag_dirty; - size_t splitsize = (oldsize + followsize <= size + extra) - ? followsize : size + extra - oldsize; - arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << LG_PAGE)), splitsize, true, - BININD_INVALID, zero); - - size = oldsize + splitsize; - npages = size >> LG_PAGE; - - /* - * Mark the extended run as dirty if either portion of the run - * was dirty before allocation. This is rather pedantic, - * because there's not actually any sequence of events that - * could cause the resulting run to be passed to - * arena_run_dalloc() with the dirty argument set to false - * (which is when dirty flag consistency would really matter). - */ - flag_dirty = arena_mapbits_dirty_get(chunk, pageind) | - arena_mapbits_dirty_get(chunk, pageind+npages-1); - arena_mapbits_large_set(chunk, pageind, size, flag_dirty); - arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty); - - if (config_stats) { - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; - - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; - } - malloc_mutex_unlock(&arena->lock); - return (false); - } - malloc_mutex_unlock(&arena->lock); - - return (true); -} - -/* - * Try to resize a large allocation, in order to avoid copying. This will - * always fail if growing an object, and the following run is already in use. - */ -static bool -arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, - bool zero) -{ - size_t psize; - - psize = PAGE_CEILING(size + extra); - if (psize == oldsize) { - /* Same size class. */ - if (config_fill && opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - - size); - } - return (false); - } else { - arena_chunk_t *chunk; - arena_t *arena; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - - if (psize < oldsize) { - /* Fill before shrinking in order avoid a race. */ - if (config_fill && opt_junk) { - memset((void *)((uintptr_t)ptr + size), 0x5a, - oldsize - size); - } - arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, - psize); - return (false); - } else { - bool ret = arena_ralloc_large_grow(arena, chunk, ptr, - oldsize, PAGE_CEILING(size), - psize - PAGE_CEILING(size), zero); - if (config_fill && ret == false && zero == false && - opt_zero) { - memset((void *)((uintptr_t)ptr + oldsize), 0, - size - oldsize); - } - return (ret); - } - } -} - -void * -arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, - bool zero) -{ - - /* - * Avoid moving the allocation if the size class can be left the same. - */ - if (oldsize <= arena_maxclass) { - if (oldsize <= SMALL_MAXCLASS) { - assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size - == oldsize); - if ((size + extra <= SMALL_MAXCLASS && - SMALL_SIZE2BIN(size + extra) == - SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && - size + extra >= oldsize)) { - if (config_fill && opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), - 0x5a, oldsize - size); - } - return (ptr); - } - } else { - assert(size <= arena_maxclass); - if (size + extra > SMALL_MAXCLASS) { - if (arena_ralloc_large(ptr, oldsize, size, - extra, zero) == false) - return (ptr); - } - } - } - - /* Reallocation would require a move. */ - return (NULL); -} - -void * -arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc) -{ - void *ret; - size_t copysize; - - /* Try to avoid moving the allocation. */ - ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero); - if (ret != NULL) - return (ret); - - /* - * size and oldsize are different enough that we need to move the - * object. In that case, fall back to allocating new space and - * copying. - */ - if (alignment != 0) { - size_t usize = sa2u(size + extra, alignment); - if (usize == 0) - return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, arena); - } else - ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc); - - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, this time without extra. */ - if (alignment != 0) { - size_t usize = sa2u(size, alignment); - if (usize == 0) - return (NULL); - ret = ipallocx(usize, alignment, zero, try_tcache_alloc, - arena); - } else - ret = arena_malloc(arena, size, zero, try_tcache_alloc); - - if (ret == NULL) - return (NULL); - } - - /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */ - - /* - * Copy at most size bytes (not size+extra), since the caller has no - * expectation that the extra bytes will be reliably preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); - memcpy(ret, ptr, copysize); - iqallocx(ptr, try_tcache_dalloc); - return (ret); -} - -dss_prec_t -arena_dss_prec_get(arena_t *arena) -{ - dss_prec_t ret; - - malloc_mutex_lock(&arena->lock); - ret = arena->dss_prec; - malloc_mutex_unlock(&arena->lock); - return (ret); -} - -void -arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) -{ - - malloc_mutex_lock(&arena->lock); - arena->dss_prec = dss_prec; - malloc_mutex_unlock(&arena->lock); -} - -void -arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, - size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats) -{ - unsigned i; - - malloc_mutex_lock(&arena->lock); - *dss = dss_prec_names[arena->dss_prec]; - *nactive += arena->nactive; - *ndirty += arena->ndirty; - - astats->mapped += arena->stats.mapped; - astats->npurge += arena->stats.npurge; - astats->nmadvise += arena->stats.nmadvise; - astats->purged += arena->stats.purged; - astats->allocated_large += arena->stats.allocated_large; - astats->nmalloc_large += arena->stats.nmalloc_large; - astats->ndalloc_large += arena->stats.ndalloc_large; - astats->nrequests_large += arena->stats.nrequests_large; - - for (i = 0; i < nlclasses; i++) { - lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; - lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; - lstats[i].nrequests += arena->stats.lstats[i].nrequests; - lstats[i].curruns += arena->stats.lstats[i].curruns; - } - malloc_mutex_unlock(&arena->lock); - - for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - - malloc_mutex_lock(&bin->lock); - bstats[i].allocated += bin->stats.allocated; - bstats[i].nmalloc += bin->stats.nmalloc; - bstats[i].ndalloc += bin->stats.ndalloc; - bstats[i].nrequests += bin->stats.nrequests; - if (config_tcache) { - bstats[i].nfills += bin->stats.nfills; - bstats[i].nflushes += bin->stats.nflushes; - } - bstats[i].nruns += bin->stats.nruns; - bstats[i].reruns += bin->stats.reruns; - bstats[i].curruns += bin->stats.curruns; - malloc_mutex_unlock(&bin->lock); - } -} - -bool -arena_new(arena_t *arena, unsigned ind) -{ - unsigned i; - arena_bin_t *bin; - - arena->ind = ind; - arena->nthreads = 0; - - if (malloc_mutex_init(&arena->lock)) - return (true); - - if (config_stats) { - memset(&arena->stats, 0, sizeof(arena_stats_t)); - arena->stats.lstats = - (malloc_large_stats_t *)base_alloc(nlclasses * - sizeof(malloc_large_stats_t)); - if (arena->stats.lstats == NULL) - return (true); - memset(arena->stats.lstats, 0, nlclasses * - sizeof(malloc_large_stats_t)); - if (config_tcache) - ql_new(&arena->tcache_ql); - } - - if (config_prof) - arena->prof_accumbytes = 0; - - arena->dss_prec = chunk_dss_prec_get(); - - /* Initialize chunks. */ - arena_chunk_dirty_new(&arena->chunks_dirty); - arena->spare = NULL; - - arena->nactive = 0; - arena->ndirty = 0; - arena->npurgatory = 0; - - arena_avail_tree_new(&arena->runs_avail); - - /* Initialize bins. */ - for (i = 0; i < NBINS; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); - if (config_stats) - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); - } - - return (false); -} - -/* - * Calculate bin_info->run_size such that it meets the following constraints: - * - * *) bin_info->run_size >= min_run_size - * *) bin_info->run_size <= arena_maxclass - * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). - * *) bin_info->nregs <= RUN_MAXREGS - * - * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also - * calculated here, since these settings are all interdependent. - */ -static size_t -bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) -{ - size_t pad_size; - size_t try_run_size, good_run_size; - uint32_t try_nregs, good_nregs; - uint32_t try_hdr_size, good_hdr_size; - uint32_t try_bitmap_offset, good_bitmap_offset; - uint32_t try_ctx0_offset, good_ctx0_offset; - uint32_t try_redzone0_offset, good_redzone0_offset; - - assert(min_run_size >= PAGE); - assert(min_run_size <= arena_maxclass); - - /* - * Determine redzone size based on minimum alignment and minimum - * redzone size. Add padding to the end of the run if it is needed to - * align the regions. The padding allows each redzone to be half the - * minimum alignment; without the padding, each redzone would have to - * be twice as large in order to maintain alignment. - */ - if (config_fill && opt_redzone) { - size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1); - if (align_min <= REDZONE_MINSIZE) { - bin_info->redzone_size = REDZONE_MINSIZE; - pad_size = 0; - } else { - bin_info->redzone_size = align_min >> 1; - pad_size = bin_info->redzone_size; - } - } else { - bin_info->redzone_size = 0; - pad_size = 0; - } - bin_info->reg_interval = bin_info->reg_size + - (bin_info->redzone_size << 1); - - /* - * Calculate known-valid settings before entering the run_size - * expansion loop, so that the first part of the loop always copies - * valid settings. - * - * The do..while loop iteratively reduces the number of regions until - * the run header and the regions no longer overlap. A closed formula - * would be quite messy, since there is an interdependency between the - * header's mask length and the number of regions. - */ - try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin_info->reg_interval) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); - if (config_prof && opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* Add space for one (prof_ctx_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_ctx_t *); - } else - try_ctx0_offset = 0; - try_redzone0_offset = try_run_size - (try_nregs * - bin_info->reg_interval) - pad_size; - } while (try_hdr_size > try_redzone0_offset); - - /* run_size expansion loop. */ - do { - /* - * Copy valid settings before trying more aggressive settings. - */ - good_run_size = try_run_size; - good_nregs = try_nregs; - good_hdr_size = try_hdr_size; - good_bitmap_offset = try_bitmap_offset; - good_ctx0_offset = try_ctx0_offset; - good_redzone0_offset = try_redzone0_offset; - - /* Try more aggressive settings. */ - try_run_size += PAGE; - try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) / - bin_info->reg_interval) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); - if (config_prof && opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* - * Add space for one (prof_ctx_t *) per region. - */ - try_hdr_size += try_nregs * - sizeof(prof_ctx_t *); - } - try_redzone0_offset = try_run_size - (try_nregs * - bin_info->reg_interval) - pad_size; - } while (try_hdr_size > try_redzone0_offset); - } while (try_run_size <= arena_maxclass - && try_run_size <= arena_maxclass - && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) > - RUN_MAX_OVRHD_RELAX - && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size - && try_nregs < RUN_MAXREGS); - - assert(good_hdr_size <= good_redzone0_offset); - - /* Copy final settings. */ - bin_info->run_size = good_run_size; - bin_info->nregs = good_nregs; - bin_info->bitmap_offset = good_bitmap_offset; - bin_info->ctx0_offset = good_ctx0_offset; - bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size; - - assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs - * bin_info->reg_interval) + pad_size == bin_info->run_size); - - return (good_run_size); -} - -static void -bin_info_init(void) -{ - arena_bin_info_t *bin_info; - size_t prev_run_size = PAGE; - -#define SIZE_CLASS(bin, delta, size) \ - bin_info = &arena_bin_info[bin]; \ - bin_info->reg_size = size; \ - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\ - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - SIZE_CLASSES -#undef SIZE_CLASS -} - -void -arena_boot(void) -{ - size_t header_size; - unsigned i; - - /* - * Compute the header size such that it is large enough to contain the - * page map. The page map is biased to omit entries for the header - * itself, so some iteration is necessary to compute the map bias. - * - * 1) Compute safe header_size and map_bias values that include enough - * space for an unbiased page map. - * 2) Refine map_bias based on (1) to omit the header pages in the page - * map. The resulting map_bias may be one too small. - * 3) Refine map_bias based on (2). The result will be >= the result - * from (2), and will always be correct. - */ - map_bias = 0; - for (i = 0; i < 3; i++) { - header_size = offsetof(arena_chunk_t, map) + - (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); - map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK) - != 0); - } - assert(map_bias > 0); - - arena_maxclass = chunksize - (map_bias << LG_PAGE); - - bin_info_init(); -} - -void -arena_prefork(arena_t *arena) -{ - unsigned i; - - malloc_mutex_prefork(&arena->lock); - for (i = 0; i < NBINS; i++) - malloc_mutex_prefork(&arena->bins[i].lock); -} - -void -arena_postfork_parent(arena_t *arena) -{ - unsigned i; - - for (i = 0; i < NBINS; i++) - malloc_mutex_postfork_parent(&arena->bins[i].lock); - malloc_mutex_postfork_parent(&arena->lock); -} - -void -arena_postfork_child(arena_t *arena) -{ - unsigned i; - - for (i = 0; i < NBINS; i++) - malloc_mutex_postfork_child(&arena->bins[i].lock); - malloc_mutex_postfork_child(&arena->lock); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/atomic.c mariadb-5.5-5.5.40/extra/jemalloc/src/atomic.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/atomic.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/atomic.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -#define JEMALLOC_ATOMIC_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/base.c mariadb-5.5-5.5.40/extra/jemalloc/src/base.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/base.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/base.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,142 +0,0 @@ -#define JEMALLOC_BASE_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -static malloc_mutex_t base_mtx; - -/* - * Current pages that are being used for internal memory allocations. These - * pages are carved up in cacheline-size quanta, so that there is no chance of - * false cache line sharing. - */ -static void *base_pages; -static void *base_next_addr; -static void *base_past_addr; /* Addr immediately past base_pages. */ -static extent_node_t *base_nodes; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static bool base_pages_alloc(size_t minsize); - -/******************************************************************************/ - -static bool -base_pages_alloc(size_t minsize) -{ - size_t csize; - bool zero; - - assert(minsize != 0); - csize = CHUNK_CEILING(minsize); - zero = false; - base_pages = chunk_alloc(csize, chunksize, true, &zero, - chunk_dss_prec_get()); - if (base_pages == NULL) - return (true); - base_next_addr = base_pages; - base_past_addr = (void *)((uintptr_t)base_pages + csize); - - return (false); -} - -void * -base_alloc(size_t size) -{ - void *ret; - size_t csize; - - /* Round size up to nearest multiple of the cacheline size. */ - csize = CACHELINE_CEILING(size); - - malloc_mutex_lock(&base_mtx); - /* Make sure there's enough space for the allocation. */ - if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) { - if (base_pages_alloc(csize)) { - malloc_mutex_unlock(&base_mtx); - return (NULL); - } - } - /* Allocate. */ - ret = base_next_addr; - base_next_addr = (void *)((uintptr_t)base_next_addr + csize); - malloc_mutex_unlock(&base_mtx); - VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); - - return (ret); -} - -void * -base_calloc(size_t number, size_t size) -{ - void *ret = base_alloc(number * size); - - if (ret != NULL) - memset(ret, 0, number * size); - - return (ret); -} - -extent_node_t * -base_node_alloc(void) -{ - extent_node_t *ret; - - malloc_mutex_lock(&base_mtx); - if (base_nodes != NULL) { - ret = base_nodes; - base_nodes = *(extent_node_t **)ret; - malloc_mutex_unlock(&base_mtx); - VALGRIND_MAKE_MEM_UNDEFINED(ret, sizeof(extent_node_t)); - } else { - malloc_mutex_unlock(&base_mtx); - ret = (extent_node_t *)base_alloc(sizeof(extent_node_t)); - } - - return (ret); -} - -void -base_node_dealloc(extent_node_t *node) -{ - - VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); - malloc_mutex_lock(&base_mtx); - *(extent_node_t **)node = base_nodes; - base_nodes = node; - malloc_mutex_unlock(&base_mtx); -} - -bool -base_boot(void) -{ - - base_nodes = NULL; - if (malloc_mutex_init(&base_mtx)) - return (true); - - return (false); -} - -void -base_prefork(void) -{ - - malloc_mutex_prefork(&base_mtx); -} - -void -base_postfork_parent(void) -{ - - malloc_mutex_postfork_parent(&base_mtx); -} - -void -base_postfork_child(void) -{ - - malloc_mutex_postfork_child(&base_mtx); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/bitmap.c mariadb-5.5-5.5.40/extra/jemalloc/src/bitmap.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/bitmap.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/bitmap.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,90 +0,0 @@ -#define JEMALLOC_BITMAP_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static size_t bits2groups(size_t nbits); - -/******************************************************************************/ - -static size_t -bits2groups(size_t nbits) -{ - - return ((nbits >> LG_BITMAP_GROUP_NBITS) + - !!(nbits & BITMAP_GROUP_NBITS_MASK)); -} - -void -bitmap_info_init(bitmap_info_t *binfo, size_t nbits) -{ - unsigned i; - size_t group_count; - - assert(nbits > 0); - assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); - - /* - * Compute the number of groups necessary to store nbits bits, and - * progressively work upward through the levels until reaching a level - * that requires only one group. - */ - binfo->levels[0].group_offset = 0; - group_count = bits2groups(nbits); - for (i = 1; group_count > 1; i++) { - assert(i < BITMAP_MAX_LEVELS); - binfo->levels[i].group_offset = binfo->levels[i-1].group_offset - + group_count; - group_count = bits2groups(group_count); - } - binfo->levels[i].group_offset = binfo->levels[i-1].group_offset - + group_count; - binfo->nlevels = i; - binfo->nbits = nbits; -} - -size_t -bitmap_info_ngroups(const bitmap_info_t *binfo) -{ - - return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); -} - -size_t -bitmap_size(size_t nbits) -{ - bitmap_info_t binfo; - - bitmap_info_init(&binfo, nbits); - return (bitmap_info_ngroups(&binfo)); -} - -void -bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - size_t extra; - unsigned i; - - /* - * Bits are actually inverted with regard to the external bitmap - * interface, so the bitmap starts out with all 1 bits, except for - * trailing unused bits (if any). Note that each group uses bit 0 to - * correspond to the first logical bit in the group, so extra bits - * are the most significant bits of the last group. - */ - memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << - LG_SIZEOF_BITMAP); - extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) - & BITMAP_GROUP_NBITS_MASK; - if (extra != 0) - bitmap[binfo->levels[1].group_offset - 1] >>= extra; - for (i = 1; i < binfo->nlevels; i++) { - size_t group_count = binfo->levels[i].group_offset - - binfo->levels[i-1].group_offset; - extra = (BITMAP_GROUP_NBITS - (group_count & - BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; - if (extra != 0) - bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; - } -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/chunk.c mariadb-5.5-5.5.40/extra/jemalloc/src/chunk.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/chunk.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/chunk.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,385 +0,0 @@ -#define JEMALLOC_CHUNK_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -const char *opt_dss = DSS_DEFAULT; -size_t opt_lg_chunk = LG_CHUNK_DEFAULT; - -malloc_mutex_t chunks_mtx; -chunk_stats_t stats_chunks; - -/* - * Trees of chunks that were previously allocated (trees differ only in node - * ordering). These are used when allocating chunks, in an attempt to re-use - * address space. Depending on function, different tree orderings are needed, - * which is why there are two trees with the same contents. - */ -static extent_tree_t chunks_szad_mmap; -static extent_tree_t chunks_ad_mmap; -static extent_tree_t chunks_szad_dss; -static extent_tree_t chunks_ad_dss; - -rtree_t *chunks_rtree; - -/* Various chunk-related settings. */ -size_t chunksize; -size_t chunksize_mask; /* (chunksize - 1). */ -size_t chunk_npages; -size_t map_bias; -size_t arena_maxclass; /* Max size class for arenas. */ - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void *chunk_recycle(extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base, - bool *zero); -static void chunk_record(extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, void *chunk, size_t size); - -/******************************************************************************/ - -static void * -chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, - size_t alignment, bool base, bool *zero) -{ - void *ret; - extent_node_t *node; - extent_node_t key; - size_t alloc_size, leadsize, trailsize; - bool zeroed; - - if (base) { - /* - * This function may need to call base_node_{,de}alloc(), but - * the current chunk allocation request is on behalf of the - * base allocator. Avoid deadlock (and if that weren't an - * issue, potential for infinite recursion) by returning NULL. - */ - return (NULL); - } - - alloc_size = size + alignment - chunksize; - /* Beware size_t wrap-around. */ - if (alloc_size < size) - return (NULL); - key.addr = NULL; - key.size = alloc_size; - malloc_mutex_lock(&chunks_mtx); - node = extent_tree_szad_nsearch(chunks_szad, &key); - if (node == NULL) { - malloc_mutex_unlock(&chunks_mtx); - return (NULL); - } - leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - - (uintptr_t)node->addr; - assert(node->size >= leadsize + size); - trailsize = node->size - leadsize - size; - ret = (void *)((uintptr_t)node->addr + leadsize); - zeroed = node->zeroed; - if (zeroed) - *zero = true; - /* Remove node from the tree. */ - extent_tree_szad_remove(chunks_szad, node); - extent_tree_ad_remove(chunks_ad, node); - if (leadsize != 0) { - /* Insert the leading space as a smaller chunk. */ - node->size = leadsize; - extent_tree_szad_insert(chunks_szad, node); - extent_tree_ad_insert(chunks_ad, node); - node = NULL; - } - if (trailsize != 0) { - /* Insert the trailing space as a smaller chunk. */ - if (node == NULL) { - /* - * An additional node is required, but - * base_node_alloc() can cause a new base chunk to be - * allocated. Drop chunks_mtx in order to avoid - * deadlock, and if node allocation fails, deallocate - * the result before returning an error. - */ - malloc_mutex_unlock(&chunks_mtx); - node = base_node_alloc(); - if (node == NULL) { - chunk_dealloc(ret, size, true); - return (NULL); - } - malloc_mutex_lock(&chunks_mtx); - } - node->addr = (void *)((uintptr_t)(ret) + size); - node->size = trailsize; - node->zeroed = zeroed; - extent_tree_szad_insert(chunks_szad, node); - extent_tree_ad_insert(chunks_ad, node); - node = NULL; - } - malloc_mutex_unlock(&chunks_mtx); - - if (node != NULL) - base_node_dealloc(node); - if (*zero) { - if (zeroed == false) - memset(ret, 0, size); - else if (config_debug) { - size_t i; - size_t *p = (size_t *)(uintptr_t)ret; - - VALGRIND_MAKE_MEM_DEFINED(ret, size); - for (i = 0; i < size / sizeof(size_t); i++) - assert(p[i] == 0); - } - } - return (ret); -} - -/* - * If the caller specifies (*zero == false), it is still possible to receive - * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc() - * takes advantage of this to avoid demanding zeroed chunks, but taking - * advantage of them if they are returned. - */ -void * -chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, - dss_prec_t dss_prec) -{ - void *ret; - - assert(size != 0); - assert((size & chunksize_mask) == 0); - assert(alignment != 0); - assert((alignment & chunksize_mask) == 0); - - /* "primary" dss. */ - if (config_dss && dss_prec == dss_prec_primary) { - if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, - alignment, base, zero)) != NULL) - goto label_return; - if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) - goto label_return; - } - /* mmap. */ - if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size, - alignment, base, zero)) != NULL) - goto label_return; - if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) - goto label_return; - /* "secondary" dss. */ - if (config_dss && dss_prec == dss_prec_secondary) { - if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, - alignment, base, zero)) != NULL) - goto label_return; - if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) - goto label_return; - } - - /* All strategies for allocation failed. */ - ret = NULL; -label_return: - if (ret != NULL) { - if (config_ivsalloc && base == false) { - if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { - chunk_dealloc(ret, size, true); - return (NULL); - } - } - if (config_stats || config_prof) { - bool gdump; - malloc_mutex_lock(&chunks_mtx); - if (config_stats) - stats_chunks.nchunks += (size / chunksize); - stats_chunks.curchunks += (size / chunksize); - if (stats_chunks.curchunks > stats_chunks.highchunks) { - stats_chunks.highchunks = - stats_chunks.curchunks; - if (config_prof) - gdump = true; - } else if (config_prof) - gdump = false; - malloc_mutex_unlock(&chunks_mtx); - if (config_prof && opt_prof && opt_prof_gdump && gdump) - prof_gdump(); - } - if (config_valgrind) - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - } - assert(CHUNK_ADDR2BASE(ret) == ret); - return (ret); -} - -static void -chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, - size_t size) -{ - bool unzeroed; - extent_node_t *xnode, *node, *prev, key; - - unzeroed = pages_purge(chunk, size); - VALGRIND_MAKE_MEM_NOACCESS(chunk, size); - - /* - * Allocate a node before acquiring chunks_mtx even though it might not - * be needed, because base_node_alloc() may cause a new base chunk to - * be allocated, which could cause deadlock if chunks_mtx were already - * held. - */ - xnode = base_node_alloc(); - - malloc_mutex_lock(&chunks_mtx); - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { - /* - * Coalesce chunk with the following address range. This does - * not change the position within chunks_ad, so only - * remove/insert from/into chunks_szad. - */ - extent_tree_szad_remove(chunks_szad, node); - node->addr = chunk; - node->size += size; - node->zeroed = (node->zeroed && (unzeroed == false)); - extent_tree_szad_insert(chunks_szad, node); - if (xnode != NULL) - base_node_dealloc(xnode); - } else { - /* Coalescing forward failed, so insert a new node. */ - if (xnode == NULL) { - /* - * base_node_alloc() failed, which is an exceedingly - * unlikely failure. Leak chunk; its pages have - * already been purged, so this is only a virtual - * memory leak. - */ - malloc_mutex_unlock(&chunks_mtx); - return; - } - node = xnode; - node->addr = chunk; - node->size = size; - node->zeroed = (unzeroed == false); - extent_tree_ad_insert(chunks_ad, node); - extent_tree_szad_insert(chunks_szad, node); - } - - /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(chunks_ad, node); - if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == - chunk) { - /* - * Coalesce chunk with the previous address range. This does - * not change the position within chunks_ad, so only - * remove/insert node from/into chunks_szad. - */ - extent_tree_szad_remove(chunks_szad, prev); - extent_tree_ad_remove(chunks_ad, prev); - - extent_tree_szad_remove(chunks_szad, node); - node->addr = prev->addr; - node->size += prev->size; - node->zeroed = (node->zeroed && prev->zeroed); - extent_tree_szad_insert(chunks_szad, node); - - base_node_dealloc(prev); - } - malloc_mutex_unlock(&chunks_mtx); -} - -void -chunk_unmap(void *chunk, size_t size) -{ - assert(chunk != NULL); - assert(CHUNK_ADDR2BASE(chunk) == chunk); - assert(size != 0); - assert((size & chunksize_mask) == 0); - - if (config_dss && chunk_in_dss(chunk)) - chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size); - else if (chunk_dealloc_mmap(chunk, size)) - chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size); -} - -void -chunk_dealloc(void *chunk, size_t size, bool unmap) -{ - - assert(chunk != NULL); - assert(CHUNK_ADDR2BASE(chunk) == chunk); - assert(size != 0); - assert((size & chunksize_mask) == 0); - - if (config_ivsalloc) - rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); - if (config_stats || config_prof) { - malloc_mutex_lock(&chunks_mtx); - assert(stats_chunks.curchunks >= (size / chunksize)); - stats_chunks.curchunks -= (size / chunksize); - malloc_mutex_unlock(&chunks_mtx); - } - - if (unmap) - chunk_unmap(chunk, size); -} - -bool -chunk_boot(void) -{ - - /* Set variables according to the value of opt_lg_chunk. */ - chunksize = (ZU(1) << opt_lg_chunk); - assert(chunksize >= PAGE); - chunksize_mask = chunksize - 1; - chunk_npages = (chunksize >> LG_PAGE); - - if (config_stats || config_prof) { - if (malloc_mutex_init(&chunks_mtx)) - return (true); - memset(&stats_chunks, 0, sizeof(chunk_stats_t)); - } - if (config_dss && chunk_dss_boot()) - return (true); - extent_tree_szad_new(&chunks_szad_mmap); - extent_tree_ad_new(&chunks_ad_mmap); - extent_tree_szad_new(&chunks_szad_dss); - extent_tree_ad_new(&chunks_ad_dss); - if (config_ivsalloc) { - chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk); - if (chunks_rtree == NULL) - return (true); - } - - return (false); -} - -void -chunk_prefork(void) -{ - - malloc_mutex_lock(&chunks_mtx); - if (config_ivsalloc) - rtree_prefork(chunks_rtree); - chunk_dss_prefork(); -} - -void -chunk_postfork_parent(void) -{ - - chunk_dss_postfork_parent(); - if (config_ivsalloc) - rtree_postfork_parent(chunks_rtree); - malloc_mutex_postfork_parent(&chunks_mtx); -} - -void -chunk_postfork_child(void) -{ - - chunk_dss_postfork_child(); - if (config_ivsalloc) - rtree_postfork_child(chunks_rtree); - malloc_mutex_postfork_child(&chunks_mtx); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/chunk_dss.c mariadb-5.5-5.5.40/extra/jemalloc/src/chunk_dss.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/chunk_dss.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/chunk_dss.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,197 +0,0 @@ -#define JEMALLOC_CHUNK_DSS_C_ -#include "jemalloc/internal/jemalloc_internal.h" -/******************************************************************************/ -/* Data. */ - -const char *dss_prec_names[] = { - "disabled", - "primary", - "secondary", - "N/A" -}; - -/* Current dss precedence default, used when creating new arenas. */ -static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT; - -/* - * Protects sbrk() calls. This avoids malloc races among threads, though it - * does not protect against races with threads that call sbrk() directly. - */ -static malloc_mutex_t dss_mtx; - -/* Base address of the DSS. */ -static void *dss_base; -/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ -static void *dss_prev; -/* Current upper limit on DSS addresses. */ -static void *dss_max; - -/******************************************************************************/ - -#ifndef JEMALLOC_HAVE_SBRK -static void * -sbrk(intptr_t increment) -{ - - not_implemented(); - - return (NULL); -} -#endif - -dss_prec_t -chunk_dss_prec_get(void) -{ - dss_prec_t ret; - - if (config_dss == false) - return (dss_prec_disabled); - malloc_mutex_lock(&dss_mtx); - ret = dss_prec_default; - malloc_mutex_unlock(&dss_mtx); - return (ret); -} - -bool -chunk_dss_prec_set(dss_prec_t dss_prec) -{ - - if (config_dss == false) - return (true); - malloc_mutex_lock(&dss_mtx); - dss_prec_default = dss_prec; - malloc_mutex_unlock(&dss_mtx); - return (false); -} - -void * -chunk_alloc_dss(size_t size, size_t alignment, bool *zero) -{ - void *ret; - - cassert(config_dss); - assert(size > 0 && (size & chunksize_mask) == 0); - assert(alignment > 0 && (alignment & chunksize_mask) == 0); - - /* - * sbrk() uses a signed increment argument, so take care not to - * interpret a huge allocation request as a negative increment. - */ - if ((intptr_t)size < 0) - return (NULL); - - malloc_mutex_lock(&dss_mtx); - if (dss_prev != (void *)-1) { - size_t gap_size, cpad_size; - void *cpad, *dss_next; - intptr_t incr; - - /* - * The loop is necessary to recover from races with other - * threads that are using the DSS for something other than - * malloc. - */ - do { - /* Get the current end of the DSS. */ - dss_max = sbrk(0); - /* - * Calculate how much padding is necessary to - * chunk-align the end of the DSS. - */ - gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) & - chunksize_mask; - /* - * Compute how much chunk-aligned pad space (if any) is - * necessary to satisfy alignment. This space can be - * recycled for later use. - */ - cpad = (void *)((uintptr_t)dss_max + gap_size); - ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max, - alignment); - cpad_size = (uintptr_t)ret - (uintptr_t)cpad; - dss_next = (void *)((uintptr_t)ret + size); - if ((uintptr_t)ret < (uintptr_t)dss_max || - (uintptr_t)dss_next < (uintptr_t)dss_max) { - /* Wrap-around. */ - malloc_mutex_unlock(&dss_mtx); - return (NULL); - } - incr = gap_size + cpad_size + size; - dss_prev = sbrk(incr); - if (dss_prev == dss_max) { - /* Success. */ - dss_max = dss_next; - malloc_mutex_unlock(&dss_mtx); - if (cpad_size != 0) - chunk_unmap(cpad, cpad_size); - if (*zero) { - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); - } - return (ret); - } - } while (dss_prev != (void *)-1); - } - malloc_mutex_unlock(&dss_mtx); - - return (NULL); -} - -bool -chunk_in_dss(void *chunk) -{ - bool ret; - - cassert(config_dss); - - malloc_mutex_lock(&dss_mtx); - if ((uintptr_t)chunk >= (uintptr_t)dss_base - && (uintptr_t)chunk < (uintptr_t)dss_max) - ret = true; - else - ret = false; - malloc_mutex_unlock(&dss_mtx); - - return (ret); -} - -bool -chunk_dss_boot(void) -{ - - cassert(config_dss); - - if (malloc_mutex_init(&dss_mtx)) - return (true); - dss_base = sbrk(0); - dss_prev = dss_base; - dss_max = dss_base; - - return (false); -} - -void -chunk_dss_prefork(void) -{ - - if (config_dss) - malloc_mutex_prefork(&dss_mtx); -} - -void -chunk_dss_postfork_parent(void) -{ - - if (config_dss) - malloc_mutex_postfork_parent(&dss_mtx); -} - -void -chunk_dss_postfork_child(void) -{ - - if (config_dss) - malloc_mutex_postfork_child(&dss_mtx); -} - -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/chunk_mmap.c mariadb-5.5-5.5.40/extra/jemalloc/src/chunk_mmap.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/chunk_mmap.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/chunk_mmap.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,210 +0,0 @@ -#define JEMALLOC_CHUNK_MMAP_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void *pages_map(void *addr, size_t size); -static void pages_unmap(void *addr, size_t size); -static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, - bool *zero); - -/******************************************************************************/ - -static void * -pages_map(void *addr, size_t size) -{ - void *ret; - - assert(size != 0); - -#ifdef _WIN32 - /* - * If VirtualAlloc can't allocate at the given address when one is - * given, it fails and returns NULL. - */ - ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE, - PAGE_READWRITE); -#else - /* - * We don't use MAP_FIXED here, because it can cause the *replacement* - * of existing mappings, and we only want to create new mappings. - */ - ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, - -1, 0); - assert(ret != NULL); - - if (ret == MAP_FAILED) - ret = NULL; - else if (addr != NULL && ret != addr) { - /* - * We succeeded in mapping memory, but not in the right place. - */ - if (munmap(ret, size) == -1) { - char buf[BUFERROR_BUF]; - - buferror(buf, sizeof(buf)); - malloc_printf(": Error in " -#ifdef _WIN32 - "VirtualFree" -#else - "munmap" -#endif - "(): %s\n", buf); - if (opt_abort) - abort(); - } -} - -static void * -pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) -{ - void *ret = (void *)((uintptr_t)addr + leadsize); - - assert(alloc_size >= leadsize + size); -#ifdef _WIN32 - { - void *new_addr; - - pages_unmap(addr, alloc_size); - new_addr = pages_map(ret, size); - if (new_addr == ret) - return (ret); - if (new_addr) - pages_unmap(new_addr, size); - return (NULL); - } -#else - { - size_t trailsize = alloc_size - leadsize - size; - - if (leadsize != 0) - pages_unmap(addr, leadsize); - if (trailsize != 0) - pages_unmap((void *)((uintptr_t)ret + size), trailsize); - return (ret); - } -#endif -} - -bool -pages_purge(void *addr, size_t length) -{ - bool unzeroed; - -#ifdef _WIN32 - VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE); - unzeroed = true; -#else -# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED -# define JEMALLOC_MADV_PURGE MADV_DONTNEED -# define JEMALLOC_MADV_ZEROS true -# elif defined(JEMALLOC_PURGE_MADVISE_FREE) -# define JEMALLOC_MADV_PURGE MADV_FREE -# define JEMALLOC_MADV_ZEROS false -# else -# error "No method defined for purging unused dirty pages." -# endif - int err = madvise(addr, length, JEMALLOC_MADV_PURGE); - unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0); -# undef JEMALLOC_MADV_PURGE -# undef JEMALLOC_MADV_ZEROS -#endif - return (unzeroed); -} - -static void * -chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero) -{ - void *ret, *pages; - size_t alloc_size, leadsize; - - alloc_size = size + alignment - PAGE; - /* Beware size_t wrap-around. */ - if (alloc_size < size) - return (NULL); - do { - pages = pages_map(NULL, alloc_size); - if (pages == NULL) - return (NULL); - leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - - (uintptr_t)pages; - ret = pages_trim(pages, alloc_size, leadsize, size); - } while (ret == NULL); - - assert(ret != NULL); - *zero = true; - return (ret); -} - -void * -chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) -{ - void *ret; - size_t offset; - - /* - * Ideally, there would be a way to specify alignment to mmap() (like - * NetBSD has), but in the absence of such a feature, we have to work - * hard to efficiently create aligned mappings. The reliable, but - * slow method is to create a mapping that is over-sized, then trim the - * excess. However, that always results in one or two calls to - * pages_unmap(). - * - * Optimistically try mapping precisely the right amount before falling - * back to the slow method, with the expectation that the optimistic - * approach works most of the time. - */ - - assert(alignment != 0); - assert((alignment & chunksize_mask) == 0); - - ret = pages_map(NULL, size); - if (ret == NULL) - return (NULL); - offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); - if (offset != 0) { - pages_unmap(ret, size); - return (chunk_alloc_mmap_slow(size, alignment, zero)); - } - - assert(ret != NULL); - *zero = true; - return (ret); -} - -bool -chunk_dealloc_mmap(void *chunk, size_t size) -{ - - if (config_munmap) - pages_unmap(chunk, size); - - return (config_munmap == false); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/ckh.c mariadb-5.5-5.5.40/extra/jemalloc/src/ckh.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/ckh.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/ckh.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,563 +0,0 @@ -/* - ******************************************************************************* - * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each - * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash - * functions are employed. The original cuckoo hashing algorithm was described - * in: - * - * Pagh, R., F.F. Rodler (2004) Cuckoo Hashing. Journal of Algorithms - * 51(2):122-144. - * - * Generalization of cuckoo hashing was discussed in: - * - * Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical - * alternative to traditional hash tables. In Proceedings of the 7th - * Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA, - * January 2006. - * - * This implementation uses precisely two hash functions because that is the - * fewest that can work, and supporting multiple hashes is an implementation - * burden. Here is a reproduction of Figure 1 from Erlingsson et al. (2006) - * that shows approximate expected maximum load factors for various - * configurations: - * - * | #cells/bucket | - * #hashes | 1 | 2 | 4 | 8 | - * --------+-------+-------+-------+-------+ - * 1 | 0.006 | 0.006 | 0.03 | 0.12 | - * 2 | 0.49 | 0.86 |>0.93< |>0.96< | - * 3 | 0.91 | 0.97 | 0.98 | 0.999 | - * 4 | 0.97 | 0.99 | 0.999 | | - * - * The number of cells per bucket is chosen such that a bucket fits in one cache - * line. So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing, - * respectively. - * - ******************************************************************************/ -#define JEMALLOC_CKH_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static bool ckh_grow(ckh_t *ckh); -static void ckh_shrink(ckh_t *ckh); - -/******************************************************************************/ - -/* - * Search bucket for key and return the cell number if found; SIZE_T_MAX - * otherwise. - */ -JEMALLOC_INLINE size_t -ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) -{ - ckhc_t *cell; - unsigned i; - - for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { - cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; - if (cell->key != NULL && ckh->keycomp(key, cell->key)) - return ((bucket << LG_CKH_BUCKET_CELLS) + i); - } - - return (SIZE_T_MAX); -} - -/* - * Search table for key and return cell number if found; SIZE_T_MAX otherwise. - */ -JEMALLOC_INLINE size_t -ckh_isearch(ckh_t *ckh, const void *key) -{ - size_t hashes[2], bucket, cell; - - assert(ckh != NULL); - - ckh->hash(key, hashes); - - /* Search primary bucket. */ - bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); - cell = ckh_bucket_search(ckh, bucket, key); - if (cell != SIZE_T_MAX) - return (cell); - - /* Search secondary bucket. */ - bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); - cell = ckh_bucket_search(ckh, bucket, key); - return (cell); -} - -JEMALLOC_INLINE bool -ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, - const void *data) -{ - ckhc_t *cell; - unsigned offset, i; - - /* - * Cycle through the cells in the bucket, starting at a random position. - * The randomness avoids worst-case search overhead as buckets fill up. - */ - prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); - for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { - cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + - ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; - if (cell->key == NULL) { - cell->key = key; - cell->data = data; - ckh->count++; - return (false); - } - } - - return (true); -} - -/* - * No space is available in bucket. Randomly evict an item, then try to find an - * alternate location for that item. Iteratively repeat this - * eviction/relocation procedure until either success or detection of an - * eviction/relocation bucket cycle. - */ -JEMALLOC_INLINE bool -ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, - void const **argdata) -{ - const void *key, *data, *tkey, *tdata; - ckhc_t *cell; - size_t hashes[2], bucket, tbucket; - unsigned i; - - bucket = argbucket; - key = *argkey; - data = *argdata; - while (true) { - /* - * Choose a random item within the bucket to evict. This is - * critical to correct function, because without (eventually) - * evicting all items within a bucket during iteration, it - * would be possible to get stuck in an infinite loop if there - * were an item for which both hashes indicated the same - * bucket. - */ - prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); - cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; - assert(cell->key != NULL); - - /* Swap cell->{key,data} and {key,data} (evict). */ - tkey = cell->key; tdata = cell->data; - cell->key = key; cell->data = data; - key = tkey; data = tdata; - -#ifdef CKH_COUNT - ckh->nrelocs++; -#endif - - /* Find the alternate bucket for the evicted item. */ - ckh->hash(key, hashes); - tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); - if (tbucket == bucket) { - tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - - 1); - /* - * It may be that (tbucket == bucket) still, if the - * item's hashes both indicate this bucket. However, - * we are guaranteed to eventually escape this bucket - * during iteration, assuming pseudo-random item - * selection (true randomness would make infinite - * looping a remote possibility). The reason we can - * never get trapped forever is that there are two - * cases: - * - * 1) This bucket == argbucket, so we will quickly - * detect an eviction cycle and terminate. - * 2) An item was evicted to this bucket from another, - * which means that at least one item in this bucket - * has hashes that indicate distinct buckets. - */ - } - /* Check for a cycle. */ - if (tbucket == argbucket) { - *argkey = key; - *argdata = data; - return (true); - } - - bucket = tbucket; - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) - return (false); - } -} - -JEMALLOC_INLINE bool -ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) -{ - size_t hashes[2], bucket; - const void *key = *argkey; - const void *data = *argdata; - - ckh->hash(key, hashes); - - /* Try to insert in primary bucket. */ - bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) - return (false); - - /* Try to insert in secondary bucket. */ - bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) - return (false); - - /* - * Try to find a place for this item via iterative eviction/relocation. - */ - return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata)); -} - -/* - * Try to rebuild the hash table from scratch by inserting all items from the - * old table into the new. - */ -JEMALLOC_INLINE bool -ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) -{ - size_t count, i, nins; - const void *key, *data; - - count = ckh->count; - ckh->count = 0; - for (i = nins = 0; nins < count; i++) { - if (aTab[i].key != NULL) { - key = aTab[i].key; - data = aTab[i].data; - if (ckh_try_insert(ckh, &key, &data)) { - ckh->count = count; - return (true); - } - nins++; - } - } - - return (false); -} - -static bool -ckh_grow(ckh_t *ckh) -{ - bool ret; - ckhc_t *tab, *ttab; - size_t lg_curcells; - unsigned lg_prevbuckets; - -#ifdef CKH_COUNT - ckh->ngrows++; -#endif - - /* - * It is possible (though unlikely, given well behaved hashes) that the - * table will have to be doubled more than once in order to create a - * usable table. - */ - lg_prevbuckets = ckh->lg_curbuckets; - lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; - while (true) { - size_t usize; - - lg_curcells++; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (usize == 0) { - ret = true; - goto label_return; - } - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); - if (tab == NULL) { - ret = true; - goto label_return; - } - /* Swap in new table. */ - ttab = ckh->tab; - ckh->tab = tab; - tab = ttab; - ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; - - if (ckh_rebuild(ckh, tab) == false) { - idalloc(tab); - break; - } - - /* Rebuilding failed, so back out partially rebuilt table. */ - idalloc(ckh->tab); - ckh->tab = tab; - ckh->lg_curbuckets = lg_prevbuckets; - } - - ret = false; -label_return: - return (ret); -} - -static void -ckh_shrink(ckh_t *ckh) -{ - ckhc_t *tab, *ttab; - size_t lg_curcells, usize; - unsigned lg_prevbuckets; - - /* - * It is possible (though unlikely, given well behaved hashes) that the - * table rebuild will fail. - */ - lg_prevbuckets = ckh->lg_curbuckets; - lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (usize == 0) - return; - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); - if (tab == NULL) { - /* - * An OOM error isn't worth propagating, since it doesn't - * prevent this or future operations from proceeding. - */ - return; - } - /* Swap in new table. */ - ttab = ckh->tab; - ckh->tab = tab; - tab = ttab; - ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; - - if (ckh_rebuild(ckh, tab) == false) { - idalloc(tab); -#ifdef CKH_COUNT - ckh->nshrinks++; -#endif - return; - } - - /* Rebuilding failed, so back out partially rebuilt table. */ - idalloc(ckh->tab); - ckh->tab = tab; - ckh->lg_curbuckets = lg_prevbuckets; -#ifdef CKH_COUNT - ckh->nshrinkfails++; -#endif -} - -bool -ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) -{ - bool ret; - size_t mincells, usize; - unsigned lg_mincells; - - assert(minitems > 0); - assert(hash != NULL); - assert(keycomp != NULL); - -#ifdef CKH_COUNT - ckh->ngrows = 0; - ckh->nshrinks = 0; - ckh->nshrinkfails = 0; - ckh->ninserts = 0; - ckh->nrelocs = 0; -#endif - ckh->prng_state = 42; /* Value doesn't really matter. */ - ckh->count = 0; - - /* - * Find the minimum power of 2 that is large enough to fit aBaseCount - * entries. We are using (2+,2) cuckoo hashing, which has an expected - * maximum load factor of at least ~0.86, so 0.75 is a conservative load - * factor that will typically allow 2^aLgMinItems to fit without ever - * growing the table. - */ - assert(LG_CKH_BUCKET_CELLS > 0); - mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2; - for (lg_mincells = LG_CKH_BUCKET_CELLS; - (ZU(1) << lg_mincells) < mincells; - lg_mincells++) - ; /* Do nothing. */ - ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS; - ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS; - ckh->hash = hash; - ckh->keycomp = keycomp; - - usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); - if (usize == 0) { - ret = true; - goto label_return; - } - ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); - if (ckh->tab == NULL) { - ret = true; - goto label_return; - } - - ret = false; -label_return: - return (ret); -} - -void -ckh_delete(ckh_t *ckh) -{ - - assert(ckh != NULL); - -#ifdef CKH_VERBOSE - malloc_printf( - "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64"," - " nshrinkfails: %"PRIu64", ninserts: %"PRIu64"," - " nrelocs: %"PRIu64"\n", __func__, ckh, - (unsigned long long)ckh->ngrows, - (unsigned long long)ckh->nshrinks, - (unsigned long long)ckh->nshrinkfails, - (unsigned long long)ckh->ninserts, - (unsigned long long)ckh->nrelocs); -#endif - - idalloc(ckh->tab); - if (config_debug) - memset(ckh, 0x5a, sizeof(ckh_t)); -} - -size_t -ckh_count(ckh_t *ckh) -{ - - assert(ckh != NULL); - - return (ckh->count); -} - -bool -ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) -{ - size_t i, ncells; - - for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets + - LG_CKH_BUCKET_CELLS)); i < ncells; i++) { - if (ckh->tab[i].key != NULL) { - if (key != NULL) - *key = (void *)ckh->tab[i].key; - if (data != NULL) - *data = (void *)ckh->tab[i].data; - *tabind = i + 1; - return (false); - } - } - - return (true); -} - -bool -ckh_insert(ckh_t *ckh, const void *key, const void *data) -{ - bool ret; - - assert(ckh != NULL); - assert(ckh_search(ckh, key, NULL, NULL)); - -#ifdef CKH_COUNT - ckh->ninserts++; -#endif - - while (ckh_try_insert(ckh, &key, &data)) { - if (ckh_grow(ckh)) { - ret = true; - goto label_return; - } - } - - ret = false; -label_return: - return (ret); -} - -bool -ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) -{ - size_t cell; - - assert(ckh != NULL); - - cell = ckh_isearch(ckh, searchkey); - if (cell != SIZE_T_MAX) { - if (key != NULL) - *key = (void *)ckh->tab[cell].key; - if (data != NULL) - *data = (void *)ckh->tab[cell].data; - ckh->tab[cell].key = NULL; - ckh->tab[cell].data = NULL; /* Not necessary. */ - - ckh->count--; - /* Try to halve the table if it is less than 1/4 full. */ - if (ckh->count < (ZU(1) << (ckh->lg_curbuckets - + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets - > ckh->lg_minbuckets) { - /* Ignore error due to OOM. */ - ckh_shrink(ckh); - } - - return (false); - } - - return (true); -} - -bool -ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) -{ - size_t cell; - - assert(ckh != NULL); - - cell = ckh_isearch(ckh, searchkey); - if (cell != SIZE_T_MAX) { - if (key != NULL) - *key = (void *)ckh->tab[cell].key; - if (data != NULL) - *data = (void *)ckh->tab[cell].data; - return (false); - } - - return (true); -} - -void -ckh_string_hash(const void *key, size_t r_hash[2]) -{ - - hash(key, strlen((const char *)key), 0x94122f33U, r_hash); -} - -bool -ckh_string_keycomp(const void *k1, const void *k2) -{ - - assert(k1 != NULL); - assert(k2 != NULL); - - return (strcmp((char *)k1, (char *)k2) ? false : true); -} - -void -ckh_pointer_hash(const void *key, size_t r_hash[2]) -{ - union { - const void *v; - size_t i; - } u; - - assert(sizeof(u.v) == sizeof(u.i)); - u.v = key; - hash(&u.i, sizeof(u.i), 0xd983396eU, r_hash); -} - -bool -ckh_pointer_keycomp(const void *k1, const void *k2) -{ - - return ((k1 == k2) ? true : false); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/ctl.c mariadb-5.5-5.5.40/extra/jemalloc/src/ctl.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/ctl.c 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/ctl.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1673 +0,0 @@ -#define JEMALLOC_CTL_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -/* - * ctl_mtx protects the following: - * - ctl_stats.* - * - opt_prof_active - */ -static malloc_mutex_t ctl_mtx; -static bool ctl_initialized; -static uint64_t ctl_epoch; -static ctl_stats_t ctl_stats; - -/******************************************************************************/ -/* Helpers for named and indexed nodes. */ - -static inline const ctl_named_node_t * -ctl_named_node(const ctl_node_t *node) -{ - - return ((node->named) ? (const ctl_named_node_t *)node : NULL); -} - -static inline const ctl_named_node_t * -ctl_named_children(const ctl_named_node_t *node, int index) -{ - const ctl_named_node_t *children = ctl_named_node(node->children); - - return (children ? &children[index] : NULL); -} - -static inline const ctl_indexed_node_t * -ctl_indexed_node(const ctl_node_t *node) -{ - - return ((node->named == false) ? (const ctl_indexed_node_t *)node : - NULL); -} - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -#define CTL_PROTO(n) \ -static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen); - -#define INDEX_PROTO(n) \ -static const ctl_named_node_t *n##_index(const size_t *mib, \ - size_t miblen, size_t i); - -static bool ctl_arena_init(ctl_arena_stats_t *astats); -static void ctl_arena_clear(ctl_arena_stats_t *astats); -static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, - arena_t *arena); -static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, - ctl_arena_stats_t *astats); -static void ctl_arena_refresh(arena_t *arena, unsigned i); -static bool ctl_grow(void); -static void ctl_refresh(void); -static bool ctl_init(void); -static int ctl_lookup(const char *name, ctl_node_t const **nodesp, - size_t *mibp, size_t *depthp); - -CTL_PROTO(version) -CTL_PROTO(epoch) -CTL_PROTO(thread_tcache_enabled) -CTL_PROTO(thread_tcache_flush) -CTL_PROTO(thread_arena) -CTL_PROTO(thread_allocated) -CTL_PROTO(thread_allocatedp) -CTL_PROTO(thread_deallocated) -CTL_PROTO(thread_deallocatedp) -CTL_PROTO(config_debug) -CTL_PROTO(config_dss) -CTL_PROTO(config_fill) -CTL_PROTO(config_lazy_lock) -CTL_PROTO(config_mremap) -CTL_PROTO(config_munmap) -CTL_PROTO(config_prof) -CTL_PROTO(config_prof_libgcc) -CTL_PROTO(config_prof_libunwind) -CTL_PROTO(config_stats) -CTL_PROTO(config_tcache) -CTL_PROTO(config_tls) -CTL_PROTO(config_utrace) -CTL_PROTO(config_valgrind) -CTL_PROTO(config_xmalloc) -CTL_PROTO(opt_abort) -CTL_PROTO(opt_dss) -CTL_PROTO(opt_lg_chunk) -CTL_PROTO(opt_narenas) -CTL_PROTO(opt_lg_dirty_mult) -CTL_PROTO(opt_stats_print) -CTL_PROTO(opt_junk) -CTL_PROTO(opt_zero) -CTL_PROTO(opt_quarantine) -CTL_PROTO(opt_redzone) -CTL_PROTO(opt_utrace) -CTL_PROTO(opt_valgrind) -CTL_PROTO(opt_xmalloc) -CTL_PROTO(opt_tcache) -CTL_PROTO(opt_lg_tcache_max) -CTL_PROTO(opt_prof) -CTL_PROTO(opt_prof_prefix) -CTL_PROTO(opt_prof_active) -CTL_PROTO(opt_lg_prof_sample) -CTL_PROTO(opt_lg_prof_interval) -CTL_PROTO(opt_prof_gdump) -CTL_PROTO(opt_prof_final) -CTL_PROTO(opt_prof_leak) -CTL_PROTO(opt_prof_accum) -CTL_PROTO(arena_i_purge) -static void arena_purge(unsigned arena_ind); -CTL_PROTO(arena_i_dss) -INDEX_PROTO(arena_i) -CTL_PROTO(arenas_bin_i_size) -CTL_PROTO(arenas_bin_i_nregs) -CTL_PROTO(arenas_bin_i_run_size) -INDEX_PROTO(arenas_bin_i) -CTL_PROTO(arenas_lrun_i_size) -INDEX_PROTO(arenas_lrun_i) -CTL_PROTO(arenas_narenas) -CTL_PROTO(arenas_initialized) -CTL_PROTO(arenas_quantum) -CTL_PROTO(arenas_page) -CTL_PROTO(arenas_tcache_max) -CTL_PROTO(arenas_nbins) -CTL_PROTO(arenas_nhbins) -CTL_PROTO(arenas_nlruns) -CTL_PROTO(arenas_purge) -CTL_PROTO(arenas_extend) -CTL_PROTO(prof_active) -CTL_PROTO(prof_dump) -CTL_PROTO(prof_interval) -CTL_PROTO(stats_chunks_current) -CTL_PROTO(stats_chunks_total) -CTL_PROTO(stats_chunks_high) -CTL_PROTO(stats_huge_allocated) -CTL_PROTO(stats_huge_nmalloc) -CTL_PROTO(stats_huge_ndalloc) -CTL_PROTO(stats_arenas_i_small_allocated) -CTL_PROTO(stats_arenas_i_small_nmalloc) -CTL_PROTO(stats_arenas_i_small_ndalloc) -CTL_PROTO(stats_arenas_i_small_nrequests) -CTL_PROTO(stats_arenas_i_large_allocated) -CTL_PROTO(stats_arenas_i_large_nmalloc) -CTL_PROTO(stats_arenas_i_large_ndalloc) -CTL_PROTO(stats_arenas_i_large_nrequests) -CTL_PROTO(stats_arenas_i_bins_j_allocated) -CTL_PROTO(stats_arenas_i_bins_j_nmalloc) -CTL_PROTO(stats_arenas_i_bins_j_ndalloc) -CTL_PROTO(stats_arenas_i_bins_j_nrequests) -CTL_PROTO(stats_arenas_i_bins_j_nfills) -CTL_PROTO(stats_arenas_i_bins_j_nflushes) -CTL_PROTO(stats_arenas_i_bins_j_nruns) -CTL_PROTO(stats_arenas_i_bins_j_nreruns) -CTL_PROTO(stats_arenas_i_bins_j_curruns) -INDEX_PROTO(stats_arenas_i_bins_j) -CTL_PROTO(stats_arenas_i_lruns_j_nmalloc) -CTL_PROTO(stats_arenas_i_lruns_j_ndalloc) -CTL_PROTO(stats_arenas_i_lruns_j_nrequests) -CTL_PROTO(stats_arenas_i_lruns_j_curruns) -INDEX_PROTO(stats_arenas_i_lruns_j) -CTL_PROTO(stats_arenas_i_nthreads) -CTL_PROTO(stats_arenas_i_dss) -CTL_PROTO(stats_arenas_i_pactive) -CTL_PROTO(stats_arenas_i_pdirty) -CTL_PROTO(stats_arenas_i_mapped) -CTL_PROTO(stats_arenas_i_npurge) -CTL_PROTO(stats_arenas_i_nmadvise) -CTL_PROTO(stats_arenas_i_purged) -INDEX_PROTO(stats_arenas_i) -CTL_PROTO(stats_cactive) -CTL_PROTO(stats_allocated) -CTL_PROTO(stats_active) -CTL_PROTO(stats_mapped) - -/******************************************************************************/ -/* mallctl tree. */ - -/* Maximum tree depth. */ -#define CTL_MAX_DEPTH 6 - -#define NAME(n) {true}, n -#define CHILD(t, c) \ - sizeof(c##_node) / sizeof(ctl_##t##_node_t), \ - (ctl_node_t *)c##_node, \ - NULL -#define CTL(c) 0, NULL, c##_ctl - -/* - * Only handles internal indexed nodes, since there are currently no external - * ones. - */ -#define INDEX(i) {false}, i##_index - -static const ctl_named_node_t tcache_node[] = { - {NAME("enabled"), CTL(thread_tcache_enabled)}, - {NAME("flush"), CTL(thread_tcache_flush)} -}; - -static const ctl_named_node_t thread_node[] = { - {NAME("arena"), CTL(thread_arena)}, - {NAME("allocated"), CTL(thread_allocated)}, - {NAME("allocatedp"), CTL(thread_allocatedp)}, - {NAME("deallocated"), CTL(thread_deallocated)}, - {NAME("deallocatedp"), CTL(thread_deallocatedp)}, - {NAME("tcache"), CHILD(named, tcache)} -}; - -static const ctl_named_node_t config_node[] = { - {NAME("debug"), CTL(config_debug)}, - {NAME("dss"), CTL(config_dss)}, - {NAME("fill"), CTL(config_fill)}, - {NAME("lazy_lock"), CTL(config_lazy_lock)}, - {NAME("mremap"), CTL(config_mremap)}, - {NAME("munmap"), CTL(config_munmap)}, - {NAME("prof"), CTL(config_prof)}, - {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, - {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, - {NAME("stats"), CTL(config_stats)}, - {NAME("tcache"), CTL(config_tcache)}, - {NAME("tls"), CTL(config_tls)}, - {NAME("utrace"), CTL(config_utrace)}, - {NAME("valgrind"), CTL(config_valgrind)}, - {NAME("xmalloc"), CTL(config_xmalloc)} -}; - -static const ctl_named_node_t opt_node[] = { - {NAME("abort"), CTL(opt_abort)}, - {NAME("dss"), CTL(opt_dss)}, - {NAME("lg_chunk"), CTL(opt_lg_chunk)}, - {NAME("narenas"), CTL(opt_narenas)}, - {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, - {NAME("stats_print"), CTL(opt_stats_print)}, - {NAME("junk"), CTL(opt_junk)}, - {NAME("zero"), CTL(opt_zero)}, - {NAME("quarantine"), CTL(opt_quarantine)}, - {NAME("redzone"), CTL(opt_redzone)}, - {NAME("utrace"), CTL(opt_utrace)}, - {NAME("valgrind"), CTL(opt_valgrind)}, - {NAME("xmalloc"), CTL(opt_xmalloc)}, - {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, - {NAME("prof"), CTL(opt_prof)}, - {NAME("prof_prefix"), CTL(opt_prof_prefix)}, - {NAME("prof_active"), CTL(opt_prof_active)}, - {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, - {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, - {NAME("prof_gdump"), CTL(opt_prof_gdump)}, - {NAME("prof_final"), CTL(opt_prof_final)}, - {NAME("prof_leak"), CTL(opt_prof_leak)}, - {NAME("prof_accum"), CTL(opt_prof_accum)} -}; - -static const ctl_named_node_t arena_i_node[] = { - {NAME("purge"), CTL(arena_i_purge)}, - {NAME("dss"), CTL(arena_i_dss)} -}; -static const ctl_named_node_t super_arena_i_node[] = { - {NAME(""), CHILD(named, arena_i)} -}; - -static const ctl_indexed_node_t arena_node[] = { - {INDEX(arena_i)} -}; - -static const ctl_named_node_t arenas_bin_i_node[] = { - {NAME("size"), CTL(arenas_bin_i_size)}, - {NAME("nregs"), CTL(arenas_bin_i_nregs)}, - {NAME("run_size"), CTL(arenas_bin_i_run_size)} -}; -static const ctl_named_node_t super_arenas_bin_i_node[] = { - {NAME(""), CHILD(named, arenas_bin_i)} -}; - -static const ctl_indexed_node_t arenas_bin_node[] = { - {INDEX(arenas_bin_i)} -}; - -static const ctl_named_node_t arenas_lrun_i_node[] = { - {NAME("size"), CTL(arenas_lrun_i_size)} -}; -static const ctl_named_node_t super_arenas_lrun_i_node[] = { - {NAME(""), CHILD(named, arenas_lrun_i)} -}; - -static const ctl_indexed_node_t arenas_lrun_node[] = { - {INDEX(arenas_lrun_i)} -}; - -static const ctl_named_node_t arenas_node[] = { - {NAME("narenas"), CTL(arenas_narenas)}, - {NAME("initialized"), CTL(arenas_initialized)}, - {NAME("quantum"), CTL(arenas_quantum)}, - {NAME("page"), CTL(arenas_page)}, - {NAME("tcache_max"), CTL(arenas_tcache_max)}, - {NAME("nbins"), CTL(arenas_nbins)}, - {NAME("nhbins"), CTL(arenas_nhbins)}, - {NAME("bin"), CHILD(indexed, arenas_bin)}, - {NAME("nlruns"), CTL(arenas_nlruns)}, - {NAME("lrun"), CHILD(indexed, arenas_lrun)}, - {NAME("purge"), CTL(arenas_purge)}, - {NAME("extend"), CTL(arenas_extend)} -}; - -static const ctl_named_node_t prof_node[] = { - {NAME("active"), CTL(prof_active)}, - {NAME("dump"), CTL(prof_dump)}, - {NAME("interval"), CTL(prof_interval)} -}; - -static const ctl_named_node_t stats_chunks_node[] = { - {NAME("current"), CTL(stats_chunks_current)}, - {NAME("total"), CTL(stats_chunks_total)}, - {NAME("high"), CTL(stats_chunks_high)} -}; - -static const ctl_named_node_t stats_huge_node[] = { - {NAME("allocated"), CTL(stats_huge_allocated)}, - {NAME("nmalloc"), CTL(stats_huge_nmalloc)}, - {NAME("ndalloc"), CTL(stats_huge_ndalloc)} -}; - -static const ctl_named_node_t stats_arenas_i_small_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} -}; - -static const ctl_named_node_t stats_arenas_i_large_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} -}; - -static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, - {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, - {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, - {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, - {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, - {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} -}; -static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { - {NAME(""), CHILD(named, stats_arenas_i_bins_j)} -}; - -static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { - {INDEX(stats_arenas_i_bins_j)} -}; - -static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = { - {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, - {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} -}; -static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = { - {NAME(""), CHILD(named, stats_arenas_i_lruns_j)} -}; - -static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { - {INDEX(stats_arenas_i_lruns_j)} -}; - -static const ctl_named_node_t stats_arenas_i_node[] = { - {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, - {NAME("dss"), CTL(stats_arenas_i_dss)}, - {NAME("pactive"), CTL(stats_arenas_i_pactive)}, - {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, - {NAME("mapped"), CTL(stats_arenas_i_mapped)}, - {NAME("npurge"), CTL(stats_arenas_i_npurge)}, - {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, - {NAME("purged"), CTL(stats_arenas_i_purged)}, - {NAME("small"), CHILD(named, stats_arenas_i_small)}, - {NAME("large"), CHILD(named, stats_arenas_i_large)}, - {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, - {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)} -}; -static const ctl_named_node_t super_stats_arenas_i_node[] = { - {NAME(""), CHILD(named, stats_arenas_i)} -}; - -static const ctl_indexed_node_t stats_arenas_node[] = { - {INDEX(stats_arenas_i)} -}; - -static const ctl_named_node_t stats_node[] = { - {NAME("cactive"), CTL(stats_cactive)}, - {NAME("allocated"), CTL(stats_allocated)}, - {NAME("active"), CTL(stats_active)}, - {NAME("mapped"), CTL(stats_mapped)}, - {NAME("chunks"), CHILD(named, stats_chunks)}, - {NAME("huge"), CHILD(named, stats_huge)}, - {NAME("arenas"), CHILD(indexed, stats_arenas)} -}; - -static const ctl_named_node_t root_node[] = { - {NAME("version"), CTL(version)}, - {NAME("epoch"), CTL(epoch)}, - {NAME("thread"), CHILD(named, thread)}, - {NAME("config"), CHILD(named, config)}, - {NAME("opt"), CHILD(named, opt)}, - {NAME("arena"), CHILD(indexed, arena)}, - {NAME("arenas"), CHILD(named, arenas)}, - {NAME("prof"), CHILD(named, prof)}, - {NAME("stats"), CHILD(named, stats)} -}; -static const ctl_named_node_t super_root_node[] = { - {NAME(""), CHILD(named, root)} -}; - -#undef NAME -#undef CHILD -#undef CTL -#undef INDEX - -/******************************************************************************/ - -static bool -ctl_arena_init(ctl_arena_stats_t *astats) -{ - - if (astats->lstats == NULL) { - astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * - sizeof(malloc_large_stats_t)); - if (astats->lstats == NULL) - return (true); - } - - return (false); -} - -static void -ctl_arena_clear(ctl_arena_stats_t *astats) -{ - - astats->dss = dss_prec_names[dss_prec_limit]; - astats->pactive = 0; - astats->pdirty = 0; - if (config_stats) { - memset(&astats->astats, 0, sizeof(arena_stats_t)); - astats->allocated_small = 0; - astats->nmalloc_small = 0; - astats->ndalloc_small = 0; - astats->nrequests_small = 0; - memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t)); - memset(astats->lstats, 0, nlclasses * - sizeof(malloc_large_stats_t)); - } -} - -static void -ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) -{ - unsigned i; - - arena_stats_merge(arena, &cstats->dss, &cstats->pactive, - &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats); - - for (i = 0; i < NBINS; i++) { - cstats->allocated_small += cstats->bstats[i].allocated; - cstats->nmalloc_small += cstats->bstats[i].nmalloc; - cstats->ndalloc_small += cstats->bstats[i].ndalloc; - cstats->nrequests_small += cstats->bstats[i].nrequests; - } -} - -static void -ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) -{ - unsigned i; - - sstats->pactive += astats->pactive; - sstats->pdirty += astats->pdirty; - - sstats->astats.mapped += astats->astats.mapped; - sstats->astats.npurge += astats->astats.npurge; - sstats->astats.nmadvise += astats->astats.nmadvise; - sstats->astats.purged += astats->astats.purged; - - sstats->allocated_small += astats->allocated_small; - sstats->nmalloc_small += astats->nmalloc_small; - sstats->ndalloc_small += astats->ndalloc_small; - sstats->nrequests_small += astats->nrequests_small; - - sstats->astats.allocated_large += astats->astats.allocated_large; - sstats->astats.nmalloc_large += astats->astats.nmalloc_large; - sstats->astats.ndalloc_large += astats->astats.ndalloc_large; - sstats->astats.nrequests_large += astats->astats.nrequests_large; - - for (i = 0; i < nlclasses; i++) { - sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; - sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; - sstats->lstats[i].nrequests += astats->lstats[i].nrequests; - sstats->lstats[i].curruns += astats->lstats[i].curruns; - } - - for (i = 0; i < NBINS; i++) { - sstats->bstats[i].allocated += astats->bstats[i].allocated; - sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; - sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; - sstats->bstats[i].nrequests += astats->bstats[i].nrequests; - if (config_tcache) { - sstats->bstats[i].nfills += astats->bstats[i].nfills; - sstats->bstats[i].nflushes += - astats->bstats[i].nflushes; - } - sstats->bstats[i].nruns += astats->bstats[i].nruns; - sstats->bstats[i].reruns += astats->bstats[i].reruns; - sstats->bstats[i].curruns += astats->bstats[i].curruns; - } -} - -static void -ctl_arena_refresh(arena_t *arena, unsigned i) -{ - ctl_arena_stats_t *astats = &ctl_stats.arenas[i]; - ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas]; - - ctl_arena_clear(astats); - - sstats->nthreads += astats->nthreads; - if (config_stats) { - ctl_arena_stats_amerge(astats, arena); - /* Merge into sum stats as well. */ - ctl_arena_stats_smerge(sstats, astats); - } else { - astats->pactive += arena->nactive; - astats->pdirty += arena->ndirty; - /* Merge into sum stats as well. */ - sstats->pactive += arena->nactive; - sstats->pdirty += arena->ndirty; - } -} - -static bool -ctl_grow(void) -{ - size_t astats_size; - ctl_arena_stats_t *astats; - arena_t **tarenas; - - /* Extend arena stats and arenas arrays. */ - astats_size = (ctl_stats.narenas + 2) * sizeof(ctl_arena_stats_t); - if (ctl_stats.narenas == narenas_auto) { - /* ctl_stats.arenas and arenas came from base_alloc(). */ - astats = (ctl_arena_stats_t *)imalloc(astats_size); - if (astats == NULL) - return (true); - memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) * - sizeof(ctl_arena_stats_t)); - - tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) * - sizeof(arena_t *)); - if (tarenas == NULL) { - idalloc(astats); - return (true); - } - memcpy(tarenas, arenas, ctl_stats.narenas * sizeof(arena_t *)); - } else { - astats = (ctl_arena_stats_t *)iralloc(ctl_stats.arenas, - astats_size, 0, 0, false, false); - if (astats == NULL) - return (true); - - tarenas = (arena_t **)iralloc(arenas, (ctl_stats.narenas + 1) * - sizeof(arena_t *), 0, 0, false, false); - if (tarenas == NULL) - return (true); - } - /* Initialize the new astats and arenas elements. */ - memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); - if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) - return (true); - tarenas[ctl_stats.narenas] = NULL; - /* Swap merged stats to their new location. */ - { - ctl_arena_stats_t tstats; - memcpy(&tstats, &astats[ctl_stats.narenas], - sizeof(ctl_arena_stats_t)); - memcpy(&astats[ctl_stats.narenas], - &astats[ctl_stats.narenas + 1], sizeof(ctl_arena_stats_t)); - memcpy(&astats[ctl_stats.narenas + 1], &tstats, - sizeof(ctl_arena_stats_t)); - } - ctl_stats.arenas = astats; - ctl_stats.narenas++; - malloc_mutex_lock(&arenas_lock); - arenas = tarenas; - narenas_total++; - arenas_extend(narenas_total - 1); - malloc_mutex_unlock(&arenas_lock); - - return (false); -} - -static void -ctl_refresh(void) -{ - unsigned i; - VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - - if (config_stats) { - malloc_mutex_lock(&chunks_mtx); - ctl_stats.chunks.current = stats_chunks.curchunks; - ctl_stats.chunks.total = stats_chunks.nchunks; - ctl_stats.chunks.high = stats_chunks.highchunks; - malloc_mutex_unlock(&chunks_mtx); - - malloc_mutex_lock(&huge_mtx); - ctl_stats.huge.allocated = huge_allocated; - ctl_stats.huge.nmalloc = huge_nmalloc; - ctl_stats.huge.ndalloc = huge_ndalloc; - malloc_mutex_unlock(&huge_mtx); - } - - /* - * Clear sum stats, since they will be merged into by - * ctl_arena_refresh(). - */ - ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; - ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); - - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); - for (i = 0; i < ctl_stats.narenas; i++) { - if (arenas[i] != NULL) - ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; - else - ctl_stats.arenas[i].nthreads = 0; - } - malloc_mutex_unlock(&arenas_lock); - for (i = 0; i < ctl_stats.narenas; i++) { - bool initialized = (tarenas[i] != NULL); - - ctl_stats.arenas[i].initialized = initialized; - if (initialized) - ctl_arena_refresh(tarenas[i], i); - } - - if (config_stats) { - ctl_stats.allocated = - ctl_stats.arenas[ctl_stats.narenas].allocated_small - + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large - + ctl_stats.huge.allocated; - ctl_stats.active = - (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE) - + ctl_stats.huge.allocated; - ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); - } - - ctl_epoch++; -} - -static bool -ctl_init(void) -{ - bool ret; - - malloc_mutex_lock(&ctl_mtx); - if (ctl_initialized == false) { - /* - * Allocate space for one extra arena stats element, which - * contains summed stats across all arenas. - */ - assert(narenas_auto == narenas_total_get()); - ctl_stats.narenas = narenas_auto; - ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( - (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); - if (ctl_stats.arenas == NULL) { - ret = true; - goto label_return; - } - memset(ctl_stats.arenas, 0, (ctl_stats.narenas + 1) * - sizeof(ctl_arena_stats_t)); - - /* - * Initialize all stats structures, regardless of whether they - * ever get used. Lazy initialization would allow errors to - * cause inconsistent state to be viewable by the application. - */ - if (config_stats) { - unsigned i; - for (i = 0; i <= ctl_stats.narenas; i++) { - if (ctl_arena_init(&ctl_stats.arenas[i])) { - ret = true; - goto label_return; - } - } - } - ctl_stats.arenas[ctl_stats.narenas].initialized = true; - - ctl_epoch = 0; - ctl_refresh(); - ctl_initialized = true; - } - - ret = false; -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -static int -ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, - size_t *depthp) -{ - int ret; - const char *elm, *tdot, *dot; - size_t elen, i, j; - const ctl_named_node_t *node; - - elm = name; - /* Equivalent to strchrnul(). */ - dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : strchr(elm, '\0'); - elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); - if (elen == 0) { - ret = ENOENT; - goto label_return; - } - node = super_root_node; - for (i = 0; i < *depthp; i++) { - assert(node); - assert(node->nchildren > 0); - if (ctl_named_node(node->children) != NULL) { - const ctl_named_node_t *pnode = node; - - /* Children are named. */ - for (j = 0; j < node->nchildren; j++) { - const ctl_named_node_t *child = - ctl_named_children(node, j); - if (strlen(child->name) == elen && - strncmp(elm, child->name, elen) == 0) { - node = child; - if (nodesp != NULL) - nodesp[i] = - (const ctl_node_t *)node; - mibp[i] = j; - break; - } - } - if (node == pnode) { - ret = ENOENT; - goto label_return; - } - } else { - uintmax_t index; - const ctl_indexed_node_t *inode; - - /* Children are indexed. */ - index = malloc_strtoumax(elm, NULL, 10); - if (index == UINTMAX_MAX || index > SIZE_T_MAX) { - ret = ENOENT; - goto label_return; - } - - inode = ctl_indexed_node(node->children); - node = inode->index(mibp, *depthp, (size_t)index); - if (node == NULL) { - ret = ENOENT; - goto label_return; - } - - if (nodesp != NULL) - nodesp[i] = (const ctl_node_t *)node; - mibp[i] = (size_t)index; - } - - if (node->ctl != NULL) { - /* Terminal node. */ - if (*dot != '\0') { - /* - * The name contains more elements than are - * in this path through the tree. - */ - ret = ENOENT; - goto label_return; - } - /* Complete lookup successful. */ - *depthp = i + 1; - break; - } - - /* Update elm. */ - if (*dot == '\0') { - /* No more elements. */ - ret = ENOENT; - goto label_return; - } - elm = &dot[1]; - dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : - strchr(elm, '\0'); - elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); - } - - ret = 0; -label_return: - return (ret); -} - -int -ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen) -{ - int ret; - size_t depth; - ctl_node_t const *nodes[CTL_MAX_DEPTH]; - size_t mib[CTL_MAX_DEPTH]; - const ctl_named_node_t *node; - - if (ctl_initialized == false && ctl_init()) { - ret = EAGAIN; - goto label_return; - } - - depth = CTL_MAX_DEPTH; - ret = ctl_lookup(name, nodes, mib, &depth); - if (ret != 0) - goto label_return; - - node = ctl_named_node(nodes[depth-1]); - if (node != NULL && node->ctl) - ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen); - else { - /* The name refers to a partial path through the ctl tree. */ - ret = ENOENT; - } - -label_return: - return(ret); -} - -int -ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) -{ - int ret; - - if (ctl_initialized == false && ctl_init()) { - ret = EAGAIN; - goto label_return; - } - - ret = ctl_lookup(name, NULL, mibp, miblenp); -label_return: - return(ret); -} - -int -ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - const ctl_named_node_t *node; - size_t i; - - if (ctl_initialized == false && ctl_init()) { - ret = EAGAIN; - goto label_return; - } - - /* Iterate down the tree. */ - node = super_root_node; - for (i = 0; i < miblen; i++) { - assert(node); - assert(node->nchildren > 0); - if (ctl_named_node(node->children) != NULL) { - /* Children are named. */ - if (node->nchildren <= mib[i]) { - ret = ENOENT; - goto label_return; - } - node = ctl_named_children(node, mib[i]); - } else { - const ctl_indexed_node_t *inode; - - /* Indexed element. */ - inode = ctl_indexed_node(node->children); - node = inode->index(mib, miblen, mib[i]); - if (node == NULL) { - ret = ENOENT; - goto label_return; - } - } - } - - /* Call the ctl function. */ - if (node && node->ctl) - ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); - else { - /* Partial MIB. */ - ret = ENOENT; - } - -label_return: - return(ret); -} - -bool -ctl_boot(void) -{ - - if (malloc_mutex_init(&ctl_mtx)) - return (true); - - ctl_initialized = false; - - return (false); -} - -void -ctl_prefork(void) -{ - - malloc_mutex_lock(&ctl_mtx); -} - -void -ctl_postfork_parent(void) -{ - - malloc_mutex_postfork_parent(&ctl_mtx); -} - -void -ctl_postfork_child(void) -{ - - malloc_mutex_postfork_child(&ctl_mtx); -} - -/******************************************************************************/ -/* *_ctl() functions. */ - -#define READONLY() do { \ - if (newp != NULL || newlen != 0) { \ - ret = EPERM; \ - goto label_return; \ - } \ -} while (0) - -#define WRITEONLY() do { \ - if (oldp != NULL || oldlenp != NULL) { \ - ret = EPERM; \ - goto label_return; \ - } \ -} while (0) - -#define READ(v, t) do { \ - if (oldp != NULL && oldlenp != NULL) { \ - if (*oldlenp != sizeof(t)) { \ - size_t copylen = (sizeof(t) <= *oldlenp) \ - ? sizeof(t) : *oldlenp; \ - memcpy(oldp, (void *)&(v), copylen); \ - ret = EINVAL; \ - goto label_return; \ - } else \ - *(t *)oldp = (v); \ - } \ -} while (0) - -#define WRITE(v, t) do { \ - if (newp != NULL) { \ - if (newlen != sizeof(t)) { \ - ret = EINVAL; \ - goto label_return; \ - } \ - (v) = *(t *)newp; \ - } \ -} while (0) - -/* - * There's a lot of code duplication in the following macros due to limitations - * in how nested cpp macros are expanded. - */ -#define CTL_RO_CLGEN(c, l, n, v, t) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - t oldval; \ - \ - if ((c) == false) \ - return (ENOENT); \ - if (l) \ - malloc_mutex_lock(&ctl_mtx); \ - READONLY(); \ - oldval = (v); \ - READ(oldval, t); \ - \ - ret = 0; \ -label_return: \ - if (l) \ - malloc_mutex_unlock(&ctl_mtx); \ - return (ret); \ -} - -#define CTL_RO_CGEN(c, n, v, t) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - t oldval; \ - \ - if ((c) == false) \ - return (ENOENT); \ - malloc_mutex_lock(&ctl_mtx); \ - READONLY(); \ - oldval = (v); \ - READ(oldval, t); \ - \ - ret = 0; \ -label_return: \ - malloc_mutex_unlock(&ctl_mtx); \ - return (ret); \ -} - -#define CTL_RO_GEN(n, v, t) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - t oldval; \ - \ - malloc_mutex_lock(&ctl_mtx); \ - READONLY(); \ - oldval = (v); \ - READ(oldval, t); \ - \ - ret = 0; \ -label_return: \ - malloc_mutex_unlock(&ctl_mtx); \ - return (ret); \ -} - -/* - * ctl_mtx is not acquired, under the assumption that no pertinent data will - * mutate during the call. - */ -#define CTL_RO_NL_CGEN(c, n, v, t) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - t oldval; \ - \ - if ((c) == false) \ - return (ENOENT); \ - READONLY(); \ - oldval = (v); \ - READ(oldval, t); \ - \ - ret = 0; \ -label_return: \ - return (ret); \ -} - -#define CTL_RO_NL_GEN(n, v, t) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - t oldval; \ - \ - READONLY(); \ - oldval = (v); \ - READ(oldval, t); \ - \ - ret = 0; \ -label_return: \ - return (ret); \ -} - -#define CTL_RO_BOOL_CONFIG_GEN(n) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - bool oldval; \ - \ - READONLY(); \ - oldval = n; \ - READ(oldval, bool); \ - \ - ret = 0; \ -label_return: \ - return (ret); \ -} - -CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) - -static int -epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - uint64_t newval __attribute__((unused)); - - malloc_mutex_lock(&ctl_mtx); - WRITE(newval, uint64_t); - if (newp != NULL) - ctl_refresh(); - READ(ctl_epoch, uint64_t); - - ret = 0; -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -static int -thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - bool oldval; - - if (config_tcache == false) - return (ENOENT); - - oldval = tcache_enabled_get(); - if (newp != NULL) { - if (newlen != sizeof(bool)) { - ret = EINVAL; - goto label_return; - } - tcache_enabled_set(*(bool *)newp); - } - READ(oldval, bool); - - ret = 0; -label_return: - return (ret); -} - -static int -thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - - if (config_tcache == false) - return (ENOENT); - - READONLY(); - WRITEONLY(); - - tcache_flush(); - - ret = 0; -label_return: - return (ret); -} - -static int -thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - unsigned newind, oldind; - - malloc_mutex_lock(&ctl_mtx); - newind = oldind = choose_arena(NULL)->ind; - WRITE(newind, unsigned); - READ(oldind, unsigned); - if (newind != oldind) { - arena_t *arena; - - if (newind >= ctl_stats.narenas) { - /* New arena index is out of range. */ - ret = EFAULT; - goto label_return; - } - - /* Initialize arena if necessary. */ - malloc_mutex_lock(&arenas_lock); - if ((arena = arenas[newind]) == NULL && (arena = - arenas_extend(newind)) == NULL) { - malloc_mutex_unlock(&arenas_lock); - ret = EAGAIN; - goto label_return; - } - assert(arena == arenas[newind]); - arenas[oldind]->nthreads--; - arenas[newind]->nthreads++; - malloc_mutex_unlock(&arenas_lock); - - /* Set new arena association. */ - if (config_tcache) { - tcache_t *tcache; - if ((uintptr_t)(tcache = *tcache_tsd_get()) > - (uintptr_t)TCACHE_STATE_MAX) { - tcache_arena_dissociate(tcache); - tcache_arena_associate(tcache, arena); - } - } - arenas_tsd_set(&arena); - } - - ret = 0; -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -CTL_RO_NL_CGEN(config_stats, thread_allocated, - thread_allocated_tsd_get()->allocated, uint64_t) -CTL_RO_NL_CGEN(config_stats, thread_allocatedp, - &thread_allocated_tsd_get()->allocated, uint64_t *) -CTL_RO_NL_CGEN(config_stats, thread_deallocated, - thread_allocated_tsd_get()->deallocated, uint64_t) -CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, - &thread_allocated_tsd_get()->deallocated, uint64_t *) - -/******************************************************************************/ - -CTL_RO_BOOL_CONFIG_GEN(config_debug) -CTL_RO_BOOL_CONFIG_GEN(config_dss) -CTL_RO_BOOL_CONFIG_GEN(config_fill) -CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) -CTL_RO_BOOL_CONFIG_GEN(config_mremap) -CTL_RO_BOOL_CONFIG_GEN(config_munmap) -CTL_RO_BOOL_CONFIG_GEN(config_prof) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) -CTL_RO_BOOL_CONFIG_GEN(config_stats) -CTL_RO_BOOL_CONFIG_GEN(config_tcache) -CTL_RO_BOOL_CONFIG_GEN(config_tls) -CTL_RO_BOOL_CONFIG_GEN(config_utrace) -CTL_RO_BOOL_CONFIG_GEN(config_valgrind) -CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) - -/******************************************************************************/ - -CTL_RO_NL_GEN(opt_abort, opt_abort, bool) -CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) -CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) -CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) -CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) -CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) -CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) -CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) -CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) -CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) -CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) -CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) -CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) -CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) -CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) -CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) -CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ -CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) -CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) -CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) -CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) - -/******************************************************************************/ - -/* ctl_mutex must be held during execution of this function. */ -static void -arena_purge(unsigned arena_ind) -{ - VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); - malloc_mutex_unlock(&arenas_lock); - - if (arena_ind == ctl_stats.narenas) { - unsigned i; - for (i = 0; i < ctl_stats.narenas; i++) { - if (tarenas[i] != NULL) - arena_purge_all(tarenas[i]); - } - } else { - assert(arena_ind < ctl_stats.narenas); - if (tarenas[arena_ind] != NULL) - arena_purge_all(tarenas[arena_ind]); - } -} - -static int -arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - - READONLY(); - WRITEONLY(); - malloc_mutex_lock(&ctl_mtx); - arena_purge(mib[1]); - malloc_mutex_unlock(&ctl_mtx); - - ret = 0; -label_return: - return (ret); -} - -static int -arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret, i; - bool match, err; - const char *dss; - unsigned arena_ind = mib[1]; - dss_prec_t dss_prec_old = dss_prec_limit; - dss_prec_t dss_prec = dss_prec_limit; - - malloc_mutex_lock(&ctl_mtx); - WRITE(dss, const char *); - match = false; - for (i = 0; i < dss_prec_limit; i++) { - if (strcmp(dss_prec_names[i], dss) == 0) { - dss_prec = i; - match = true; - break; - } - } - if (match == false) { - ret = EINVAL; - goto label_return; - } - - if (arena_ind < ctl_stats.narenas) { - arena_t *arena = arenas[arena_ind]; - if (arena != NULL) { - dss_prec_old = arena_dss_prec_get(arena); - arena_dss_prec_set(arena, dss_prec); - err = false; - } else - err = true; - } else { - dss_prec_old = chunk_dss_prec_get(); - err = chunk_dss_prec_set(dss_prec); - } - dss = dss_prec_names[dss_prec_old]; - READ(dss, const char *); - if (err) { - ret = EFAULT; - goto label_return; - } - - ret = 0; -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -static const ctl_named_node_t * -arena_i_index(const size_t *mib, size_t miblen, size_t i) -{ - const ctl_named_node_t * ret; - - malloc_mutex_lock(&ctl_mtx); - if (i > ctl_stats.narenas) { - ret = NULL; - goto label_return; - } - - ret = super_arena_i_node; -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - - -/******************************************************************************/ - -CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) -static const ctl_named_node_t * -arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) -{ - - if (i > NBINS) - return (NULL); - return (super_arenas_bin_i_node); -} - -CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) -static const ctl_named_node_t * -arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) -{ - - if (i > nlclasses) - return (NULL); - return (super_arenas_lrun_i_node); -} - -static int -arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - unsigned narenas; - - malloc_mutex_lock(&ctl_mtx); - READONLY(); - if (*oldlenp != sizeof(unsigned)) { - ret = EINVAL; - goto label_return; - } - narenas = ctl_stats.narenas; - READ(narenas, unsigned); - - ret = 0; -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -static int -arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - unsigned nread, i; - - malloc_mutex_lock(&ctl_mtx); - READONLY(); - if (*oldlenp != ctl_stats.narenas * sizeof(bool)) { - ret = EINVAL; - nread = (*oldlenp < ctl_stats.narenas * sizeof(bool)) - ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas; - } else { - ret = 0; - nread = ctl_stats.narenas; - } - - for (i = 0; i < nread; i++) - ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized; - -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) -CTL_RO_NL_GEN(arenas_page, PAGE, size_t) -CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) -CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) -CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) -CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) - -static int -arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - unsigned arena_ind; - - malloc_mutex_lock(&ctl_mtx); - WRITEONLY(); - arena_ind = UINT_MAX; - WRITE(arena_ind, unsigned); - if (newp != NULL && arena_ind >= ctl_stats.narenas) - ret = EFAULT; - else { - if (arena_ind == UINT_MAX) - arena_ind = ctl_stats.narenas; - arena_purge(arena_ind); - ret = 0; - } - -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -static int -arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - unsigned narenas; - - malloc_mutex_lock(&ctl_mtx); - READONLY(); - if (ctl_grow()) { - ret = EAGAIN; - goto label_return; - } - narenas = ctl_stats.narenas - 1; - READ(narenas, unsigned); - - ret = 0; -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -/******************************************************************************/ - -static int -prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - bool oldval; - - if (config_prof == false) - return (ENOENT); - - malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ - oldval = opt_prof_active; - if (newp != NULL) { - /* - * The memory barriers will tend to make opt_prof_active - * propagate faster on systems with weak memory ordering. - */ - mb_write(); - WRITE(opt_prof_active, bool); - mb_write(); - } - READ(oldval, bool); - - ret = 0; -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -static int -prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - const char *filename = NULL; - - if (config_prof == false) - return (ENOENT); - - WRITEONLY(); - WRITE(filename, const char *); - - if (prof_mdump(filename)) { - ret = EFAULT; - goto label_return; - } - - ret = 0; -label_return: - return (ret); -} - -CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) - -/******************************************************************************/ - -CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, - size_t) -CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t) -CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t) -CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t) -CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t) -CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, - ctl_stats.arenas[mib[2]].allocated_small, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc, - ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc, - ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests, - ctl_stats.arenas[mib[2]].nrequests_small, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, - ctl_stats.arenas[mib[2]].astats.allocated_large, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, - ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, - ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, - ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t) - -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated, - ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc, - ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc, - ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests, - ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t) -CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills, - ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t) -CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes, - ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns, - ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, - ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, - ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) - -static const ctl_named_node_t * -stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) -{ - - if (j > NBINS) - return (NULL); - return (super_stats_arenas_i_bins_j_node); -} - -CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nmalloc, - ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_ndalloc, - ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, - ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, - ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) - -static const ctl_named_node_t * -stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) -{ - - if (j > nlclasses) - return (NULL); - return (super_stats_arenas_i_lruns_j_node); -} - -CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) -CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) -CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) -CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, - ctl_stats.arenas[mib[2]].astats.mapped, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, - ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, - ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_purged, - ctl_stats.arenas[mib[2]].astats.purged, uint64_t) - -static const ctl_named_node_t * -stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) -{ - const ctl_named_node_t * ret; - - malloc_mutex_lock(&ctl_mtx); - if (i > ctl_stats.narenas || ctl_stats.arenas[i].initialized == false) { - ret = NULL; - goto label_return; - } - - ret = super_stats_arenas_i_node; -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) -CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) -CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) -CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/extent.c mariadb-5.5-5.5.40/extra/jemalloc/src/extent.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/extent.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/extent.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,39 +0,0 @@ -#define JEMALLOC_EXTENT_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ - -static inline int -extent_szad_comp(extent_node_t *a, extent_node_t *b) -{ - int ret; - size_t a_size = a->size; - size_t b_size = b->size; - - ret = (a_size > b_size) - (a_size < b_size); - if (ret == 0) { - uintptr_t a_addr = (uintptr_t)a->addr; - uintptr_t b_addr = (uintptr_t)b->addr; - - ret = (a_addr > b_addr) - (a_addr < b_addr); - } - - return (ret); -} - -/* Generate red-black tree functions. */ -rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad, - extent_szad_comp) - -static inline int -extent_ad_comp(extent_node_t *a, extent_node_t *b) -{ - uintptr_t a_addr = (uintptr_t)a->addr; - uintptr_t b_addr = (uintptr_t)b->addr; - - return ((a_addr > b_addr) - (a_addr < b_addr)); -} - -/* Generate red-black tree functions. */ -rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad, - extent_ad_comp) diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/hash.c mariadb-5.5-5.5.40/extra/jemalloc/src/hash.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/hash.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/hash.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -#define JEMALLOC_HASH_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/huge.c mariadb-5.5-5.5.40/extra/jemalloc/src/huge.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/huge.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/huge.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,313 +0,0 @@ -#define JEMALLOC_HUGE_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -uint64_t huge_nmalloc; -uint64_t huge_ndalloc; -size_t huge_allocated; - -malloc_mutex_t huge_mtx; - -/******************************************************************************/ - -/* Tree of chunks that are stand-alone huge allocations. */ -static extent_tree_t huge; - -void * -huge_malloc(size_t size, bool zero) -{ - - return (huge_palloc(size, chunksize, zero)); -} - -void * -huge_palloc(size_t size, size_t alignment, bool zero) -{ - void *ret; - size_t csize; - extent_node_t *node; - bool is_zeroed; - - /* Allocate one or more contiguous chunks for this request. */ - - csize = CHUNK_CEILING(size); - if (csize == 0) { - /* size is large enough to cause size_t wrap-around. */ - return (NULL); - } - - /* Allocate an extent node with which to track the chunk. */ - node = base_node_alloc(); - if (node == NULL) - return (NULL); - - /* - * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that - * it is possible to make correct junk/zero fill decisions below. - */ - is_zeroed = zero; - ret = chunk_alloc(csize, alignment, false, &is_zeroed, - chunk_dss_prec_get()); - if (ret == NULL) { - base_node_dealloc(node); - return (NULL); - } - - /* Insert node into huge. */ - node->addr = ret; - node->size = csize; - - malloc_mutex_lock(&huge_mtx); - extent_tree_ad_insert(&huge, node); - if (config_stats) { - stats_cactive_add(csize); - huge_nmalloc++; - huge_allocated += csize; - } - malloc_mutex_unlock(&huge_mtx); - - if (config_fill && zero == false) { - if (opt_junk) - memset(ret, 0xa5, csize); - else if (opt_zero && is_zeroed == false) - memset(ret, 0, csize); - } - - return (ret); -} - -void * -huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) -{ - - /* - * Avoid moving the allocation if the size class can be left the same. - */ - if (oldsize > arena_maxclass - && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) - && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { - assert(CHUNK_CEILING(oldsize) == oldsize); - if (config_fill && opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), 0x5a, - oldsize - size); - } - return (ptr); - } - - /* Reallocation would require a move. */ - return (NULL); -} - -void * -huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_dalloc) -{ - void *ret; - size_t copysize; - - /* Try to avoid moving the allocation. */ - ret = huge_ralloc_no_move(ptr, oldsize, size, extra); - if (ret != NULL) - return (ret); - - /* - * size and oldsize are different enough that we need to use a - * different size class. In that case, fall back to allocating new - * space and copying. - */ - if (alignment > chunksize) - ret = huge_palloc(size + extra, alignment, zero); - else - ret = huge_malloc(size + extra, zero); - - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, this time without extra. */ - if (alignment > chunksize) - ret = huge_palloc(size, alignment, zero); - else - ret = huge_malloc(size, zero); - - if (ret == NULL) - return (NULL); - } - - /* - * Copy at most size bytes (not size+extra), since the caller has no - * expectation that the extra bytes will be reliably preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - -#ifdef JEMALLOC_MREMAP - /* - * Use mremap(2) if this is a huge-->huge reallocation, and neither the - * source nor the destination are in dss. - */ - if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr) - == false && chunk_in_dss(ret) == false))) { - size_t newsize = huge_salloc(ret); - - /* - * Remove ptr from the tree of huge allocations before - * performing the remap operation, in order to avoid the - * possibility of another thread acquiring that mapping before - * this one removes it from the tree. - */ - huge_dalloc(ptr, false); - if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, - ret) == MAP_FAILED) { - /* - * Assuming no chunk management bugs in the allocator, - * the only documented way an error can occur here is - * if the application changed the map type for a - * portion of the old allocation. This is firmly in - * undefined behavior territory, so write a diagnostic - * message, and optionally abort. - */ - char buf[BUFERROR_BUF]; - - buferror(buf, sizeof(buf)); - malloc_printf(": Error in mremap(): %s\n", - buf); - if (opt_abort) - abort(); - memcpy(ret, ptr, copysize); - chunk_dealloc_mmap(ptr, oldsize); - } - } else -#endif - { - memcpy(ret, ptr, copysize); - iqallocx(ptr, try_tcache_dalloc); - } - return (ret); -} - -void -huge_dalloc(void *ptr, bool unmap) -{ - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = ptr; - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - assert(node->addr == ptr); - extent_tree_ad_remove(&huge, node); - - if (config_stats) { - stats_cactive_sub(node->size); - huge_ndalloc++; - huge_allocated -= node->size; - } - - malloc_mutex_unlock(&huge_mtx); - - if (unmap && config_fill && config_dss && opt_junk) - memset(node->addr, 0x5a, node->size); - - chunk_dealloc(node->addr, node->size, unmap); - - base_node_dealloc(node); -} - -size_t -huge_salloc(const void *ptr) -{ - size_t ret; - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - - ret = node->size; - - malloc_mutex_unlock(&huge_mtx); - - return (ret); -} - -prof_ctx_t * -huge_prof_ctx_get(const void *ptr) -{ - prof_ctx_t *ret; - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - - ret = node->prof_ctx; - - malloc_mutex_unlock(&huge_mtx); - - return (ret); -} - -void -huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) -{ - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - - node->prof_ctx = ctx; - - malloc_mutex_unlock(&huge_mtx); -} - -bool -huge_boot(void) -{ - - /* Initialize chunks data. */ - if (malloc_mutex_init(&huge_mtx)) - return (true); - extent_tree_ad_new(&huge); - - if (config_stats) { - huge_nmalloc = 0; - huge_ndalloc = 0; - huge_allocated = 0; - } - - return (false); -} - -void -huge_prefork(void) -{ - - malloc_mutex_prefork(&huge_mtx); -} - -void -huge_postfork_parent(void) -{ - - malloc_mutex_postfork_parent(&huge_mtx); -} - -void -huge_postfork_child(void) -{ - - malloc_mutex_postfork_child(&huge_mtx); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/jemalloc.c mariadb-5.5-5.5.40/extra/jemalloc/src/jemalloc.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/jemalloc.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/jemalloc.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1868 +0,0 @@ -#define JEMALLOC_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -malloc_tsd_data(, arenas, arena_t *, NULL) -malloc_tsd_data(, thread_allocated, thread_allocated_t, - THREAD_ALLOCATED_INITIALIZER) - -/* Runtime configuration options. */ -const char *je_malloc_conf; -bool opt_abort = -#ifdef JEMALLOC_DEBUG - true -#else - false -#endif - ; -bool opt_junk = -#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) - true -#else - false -#endif - ; -size_t opt_quarantine = ZU(0); -bool opt_redzone = false; -bool opt_utrace = false; -bool opt_valgrind = false; -bool opt_xmalloc = false; -bool opt_zero = false; -size_t opt_narenas = 0; - -unsigned ncpus; - -malloc_mutex_t arenas_lock; -arena_t **arenas; -unsigned narenas_total; -unsigned narenas_auto; - -/* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; - -#ifdef JEMALLOC_THREADED_INIT -/* Used to let the initializing thread recursively allocate. */ -# define NO_INITIALIZER ((unsigned long)0) -# define INITIALIZER pthread_self() -# define IS_INITIALIZER (malloc_initializer == pthread_self()) -static pthread_t malloc_initializer = NO_INITIALIZER; -#else -# define NO_INITIALIZER false -# define INITIALIZER true -# define IS_INITIALIZER malloc_initializer -static bool malloc_initializer = NO_INITIALIZER; -#endif - -/* Used to avoid initialization races. */ -#ifdef _WIN32 -static malloc_mutex_t init_lock; - -JEMALLOC_ATTR(constructor) -static void WINAPI -_init_init_lock(void) -{ - - malloc_mutex_init(&init_lock); -} - -#ifdef _MSC_VER -# pragma section(".CRT$XCU", read) -JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used) -static const void (WINAPI *init_init_lock)(void) = _init_init_lock; -#endif - -#else -static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; -#endif - -typedef struct { - void *p; /* Input pointer (as in realloc(p, s)). */ - size_t s; /* Request size. */ - void *r; /* Result pointer. */ -} malloc_utrace_t; - -#ifdef JEMALLOC_UTRACE -# define UTRACE(a, b, c) do { \ - if (opt_utrace) { \ - int utrace_serrno = errno; \ - malloc_utrace_t ut; \ - ut.p = (a); \ - ut.s = (b); \ - ut.r = (c); \ - utrace(&ut, sizeof(ut)); \ - errno = utrace_serrno; \ - } \ -} while (0) -#else -# define UTRACE(a, b, c) -#endif - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void stats_print_atexit(void); -static unsigned malloc_ncpus(void); -static bool malloc_conf_next(char const **opts_p, char const **k_p, - size_t *klen_p, char const **v_p, size_t *vlen_p); -static void malloc_conf_error(const char *msg, const char *k, size_t klen, - const char *v, size_t vlen); -static void malloc_conf_init(void); -static bool malloc_init_hard(void); -static int imemalign(void **memptr, size_t alignment, size_t size, - size_t min_alignment); - -/******************************************************************************/ -/* - * Begin miscellaneous support functions. - */ - -/* Create a new arena and insert it into the arenas array at index ind. */ -arena_t * -arenas_extend(unsigned ind) -{ - arena_t *ret; - - ret = (arena_t *)base_alloc(sizeof(arena_t)); - if (ret != NULL && arena_new(ret, ind) == false) { - arenas[ind] = ret; - return (ret); - } - /* Only reached if there is an OOM error. */ - - /* - * OOM here is quite inconvenient to propagate, since dealing with it - * would require a check for failure in the fast path. Instead, punt - * by using arenas[0]. In practice, this is an extremely unlikely - * failure. - */ - malloc_write(": Error initializing arena\n"); - if (opt_abort) - abort(); - - return (arenas[0]); -} - -/* Slow path, called only by choose_arena(). */ -arena_t * -choose_arena_hard(void) -{ - arena_t *ret; - - if (narenas_auto > 1) { - unsigned i, choose, first_null; - - choose = 0; - first_null = narenas_auto; - malloc_mutex_lock(&arenas_lock); - assert(arenas[0] != NULL); - for (i = 1; i < narenas_auto; i++) { - if (arenas[i] != NULL) { - /* - * Choose the first arena that has the lowest - * number of threads assigned to it. - */ - if (arenas[i]->nthreads < - arenas[choose]->nthreads) - choose = i; - } else if (first_null == narenas_auto) { - /* - * Record the index of the first uninitialized - * arena, in case all extant arenas are in use. - * - * NB: It is possible for there to be - * discontinuities in terms of initialized - * versus uninitialized arenas, due to the - * "thread.arena" mallctl. - */ - first_null = i; - } - } - - if (arenas[choose]->nthreads == 0 - || first_null == narenas_auto) { - /* - * Use an unloaded arena, or the least loaded arena if - * all arenas are already initialized. - */ - ret = arenas[choose]; - } else { - /* Initialize a new arena. */ - ret = arenas_extend(first_null); - } - ret->nthreads++; - malloc_mutex_unlock(&arenas_lock); - } else { - ret = arenas[0]; - malloc_mutex_lock(&arenas_lock); - ret->nthreads++; - malloc_mutex_unlock(&arenas_lock); - } - - arenas_tsd_set(&ret); - - return (ret); -} - -static void -stats_print_atexit(void) -{ - - if (config_tcache && config_stats) { - unsigned narenas, i; - - /* - * Merge stats from extant threads. This is racy, since - * individual threads do not lock when recording tcache stats - * events. As a consequence, the final stats may be slightly - * out of date by the time they are reported, if other threads - * continue to allocate. - */ - for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { - arena_t *arena = arenas[i]; - if (arena != NULL) { - tcache_t *tcache; - - /* - * tcache_stats_merge() locks bins, so if any - * code is introduced that acquires both arena - * and bin locks in the opposite order, - * deadlocks may result. - */ - malloc_mutex_lock(&arena->lock); - ql_foreach(tcache, &arena->tcache_ql, link) { - tcache_stats_merge(tcache, arena); - } - malloc_mutex_unlock(&arena->lock); - } - } - } - je_malloc_stats_print(NULL, NULL, NULL); -} - -/* - * End miscellaneous support functions. - */ -/******************************************************************************/ -/* - * Begin initialization functions. - */ - -static unsigned -malloc_ncpus(void) -{ - unsigned ret; - long result; - -#ifdef _WIN32 - SYSTEM_INFO si; - GetSystemInfo(&si); - result = si.dwNumberOfProcessors; -#else - result = sysconf(_SC_NPROCESSORS_ONLN); -#endif - if (result == -1) { - /* Error. */ - ret = 1; - } else { - ret = (unsigned)result; - } - - return (ret); -} - -void -arenas_cleanup(void *arg) -{ - arena_t *arena = *(arena_t **)arg; - - malloc_mutex_lock(&arenas_lock); - arena->nthreads--; - malloc_mutex_unlock(&arenas_lock); -} - -static JEMALLOC_ATTR(always_inline) void -malloc_thread_init(void) -{ - - /* - * TSD initialization can't be safely done as a side effect of - * deallocation, because it is possible for a thread to do nothing but - * deallocate its TLS data via free(), in which case writing to TLS - * would cause write-after-free memory corruption. The quarantine - * facility *only* gets used as a side effect of deallocation, so make - * a best effort attempt at initializing its TSD by hooking all - * allocation events. - */ - if (config_fill && opt_quarantine) - quarantine_alloc_hook(); -} - -static JEMALLOC_ATTR(always_inline) bool -malloc_init(void) -{ - - if (malloc_initialized == false && malloc_init_hard()) - return (true); - malloc_thread_init(); - - return (false); -} - -static bool -malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, - char const **v_p, size_t *vlen_p) -{ - bool accept; - const char *opts = *opts_p; - - *k_p = opts; - - for (accept = false; accept == false;) { - switch (*opts) { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - case '_': - opts++; - break; - case ':': - opts++; - *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; - *v_p = opts; - accept = true; - break; - case '\0': - if (opts != *opts_p) { - malloc_write(": Conf string ends " - "with key\n"); - } - return (true); - default: - malloc_write(": Malformed conf string\n"); - return (true); - } - } - - for (accept = false; accept == false;) { - switch (*opts) { - case ',': - opts++; - /* - * Look ahead one character here, because the next time - * this function is called, it will assume that end of - * input has been cleanly reached if no input remains, - * but we have optimistically already consumed the - * comma if one exists. - */ - if (*opts == '\0') { - malloc_write(": Conf string ends " - "with comma\n"); - } - *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; - accept = true; - break; - case '\0': - *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; - accept = true; - break; - default: - opts++; - break; - } - } - - *opts_p = opts; - return (false); -} - -static void -malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, - size_t vlen) -{ - - malloc_printf(": %s: %.*s:%.*s\n", msg, (int)klen, k, - (int)vlen, v); -} - -static void -malloc_conf_init(void) -{ - unsigned i; - char buf[PATH_MAX + 1]; - const char *opts, *k, *v; - size_t klen, vlen; - - /* - * Automatically configure valgrind before processing options. The - * valgrind option remains in jemalloc 3.x for compatibility reasons. - */ - if (config_valgrind) { - opt_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; - if (config_fill && opt_valgrind) { - opt_junk = false; - assert(opt_zero == false); - opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; - opt_redzone = true; - } - if (config_tcache && opt_valgrind) - opt_tcache = false; - } - - for (i = 0; i < 3; i++) { - /* Get runtime configuration. */ - switch (i) { - case 0: - if (je_malloc_conf != NULL) { - /* - * Use options that were compiled into the - * program. - */ - opts = je_malloc_conf; - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - case 1: { -#ifndef _WIN32 - int linklen; - const char *linkname = -# ifdef JEMALLOC_PREFIX - "/etc/"JEMALLOC_PREFIX"malloc.conf" -# else - "/etc/malloc.conf" -# endif - ; - - if ((linklen = readlink(linkname, buf, - sizeof(buf) - 1)) != -1) { - /* - * Use the contents of the "/etc/malloc.conf" - * symbolic link's name. - */ - buf[linklen] = '\0'; - opts = buf; - } else -#endif - { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - } case 2: { - const char *envname = -#ifdef JEMALLOC_PREFIX - JEMALLOC_CPREFIX"MALLOC_CONF" -#else - "MALLOC_CONF" -#endif - ; - - if ((opts = getenv(envname)) != NULL) { - /* - * Do nothing; opts is already initialized to - * the value of the MALLOC_CONF environment - * variable. - */ - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - } default: - /* NOTREACHED */ - assert(false); - buf[0] = '\0'; - opts = buf; - } - - while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, - &vlen) == false) { -#define CONF_HANDLE_BOOL(o, n) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ - if (strncmp("true", v, vlen) == 0 && \ - vlen == sizeof("true")-1) \ - o = true; \ - else if (strncmp("false", v, vlen) == \ - 0 && vlen == sizeof("false")-1) \ - o = false; \ - else { \ - malloc_conf_error( \ - "Invalid conf value", \ - k, klen, v, vlen); \ - } \ - continue; \ - } -#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ - uintmax_t um; \ - char *end; \ - \ - set_errno(0); \ - um = malloc_strtoumax(v, &end, 0); \ - if (get_errno() != 0 || (uintptr_t)end -\ - (uintptr_t)v != vlen) { \ - malloc_conf_error( \ - "Invalid conf value", \ - k, klen, v, vlen); \ - } else if (clip) { \ - if (um < min) \ - o = min; \ - else if (um > max) \ - o = max; \ - else \ - o = um; \ - } else { \ - if (um < min || um > max) { \ - malloc_conf_error( \ - "Out-of-range " \ - "conf value", \ - k, klen, v, vlen); \ - } else \ - o = um; \ - } \ - continue; \ - } -#define CONF_HANDLE_SSIZE_T(o, n, min, max) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ - long l; \ - char *end; \ - \ - set_errno(0); \ - l = strtol(v, &end, 0); \ - if (get_errno() != 0 || (uintptr_t)end -\ - (uintptr_t)v != vlen) { \ - malloc_conf_error( \ - "Invalid conf value", \ - k, klen, v, vlen); \ - } else if (l < (ssize_t)min || l > \ - (ssize_t)max) { \ - malloc_conf_error( \ - "Out-of-range conf value", \ - k, klen, v, vlen); \ - } else \ - o = l; \ - continue; \ - } -#define CONF_HANDLE_CHAR_P(o, n, d) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ - size_t cpylen = (vlen <= \ - sizeof(o)-1) ? vlen : \ - sizeof(o)-1; \ - strncpy(o, v, cpylen); \ - o[cpylen] = '\0'; \ - continue; \ - } - - CONF_HANDLE_BOOL(opt_abort, "abort") - /* - * Chunks always require at least one header page, plus - * one data page in the absence of redzones, or three - * pages in the presence of redzones. In order to - * simplify options processing, fix the limit based on - * config_fill. - */ - CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + - (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1, - true) - if (strncmp("dss", k, klen) == 0) { - int i; - bool match = false; - for (i = 0; i < dss_prec_limit; i++) { - if (strncmp(dss_prec_names[i], v, vlen) - == 0) { - if (chunk_dss_prec_set(i)) { - malloc_conf_error( - "Error setting dss", - k, klen, v, vlen); - } else { - opt_dss = - dss_prec_names[i]; - match = true; - break; - } - } - } - if (match == false) { - malloc_conf_error("Invalid conf value", - k, klen, v, vlen); - } - continue; - } - CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, - SIZE_T_MAX, false) - CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", - -1, (sizeof(size_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_stats_print, "stats_print") - if (config_fill) { - CONF_HANDLE_BOOL(opt_junk, "junk") - CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", - 0, SIZE_T_MAX, false) - CONF_HANDLE_BOOL(opt_redzone, "redzone") - CONF_HANDLE_BOOL(opt_zero, "zero") - } - if (config_utrace) { - CONF_HANDLE_BOOL(opt_utrace, "utrace") - } - if (config_valgrind) { - CONF_HANDLE_BOOL(opt_valgrind, "valgrind") - } - if (config_xmalloc) { - CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") - } - if (config_tcache) { - CONF_HANDLE_BOOL(opt_tcache, "tcache") - CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, - "lg_tcache_max", -1, - (sizeof(size_t) << 3) - 1) - } - if (config_prof) { - CONF_HANDLE_BOOL(opt_prof, "prof") - CONF_HANDLE_CHAR_P(opt_prof_prefix, - "prof_prefix", "jeprof") - CONF_HANDLE_BOOL(opt_prof_active, "prof_active") - CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, - "lg_prof_sample", 0, - (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum") - CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, - "lg_prof_interval", -1, - (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump") - CONF_HANDLE_BOOL(opt_prof_final, "prof_final") - CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak") - } - malloc_conf_error("Invalid conf pair", k, klen, v, - vlen); -#undef CONF_HANDLE_BOOL -#undef CONF_HANDLE_SIZE_T -#undef CONF_HANDLE_SSIZE_T -#undef CONF_HANDLE_CHAR_P - } - } -} - -static bool -malloc_init_hard(void) -{ - arena_t *init_arenas[1]; - - malloc_mutex_lock(&init_lock); - if (malloc_initialized || IS_INITIALIZER) { - /* - * Another thread initialized the allocator before this one - * acquired init_lock, or this thread is the initializing - * thread, and it is recursively allocating. - */ - malloc_mutex_unlock(&init_lock); - return (false); - } -#ifdef JEMALLOC_THREADED_INIT - if (malloc_initializer != NO_INITIALIZER && IS_INITIALIZER == false) { - /* Busy-wait until the initializing thread completes. */ - do { - malloc_mutex_unlock(&init_lock); - CPU_SPINWAIT; - malloc_mutex_lock(&init_lock); - } while (malloc_initialized == false); - malloc_mutex_unlock(&init_lock); - return (false); - } -#endif - malloc_initializer = INITIALIZER; - - malloc_tsd_boot(); - if (config_prof) - prof_boot0(); - - malloc_conf_init(); - -#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ - && !defined(_WIN32)) - /* Register fork handlers. */ - if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, - jemalloc_postfork_child) != 0) { - malloc_write(": Error in pthread_atfork()\n"); - if (opt_abort) - abort(); - } -#endif - - if (opt_stats_print) { - /* Print statistics at exit. */ - if (atexit(stats_print_atexit) != 0) { - malloc_write(": Error in atexit()\n"); - if (opt_abort) - abort(); - } - } - - if (base_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (chunk_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (ctl_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (config_prof) - prof_boot1(); - - arena_boot(); - - if (config_tcache && tcache_boot0()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (huge_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (malloc_mutex_init(&arenas_lock)) - return (true); - - /* - * Create enough scaffolding to allow recursive allocation in - * malloc_ncpus(). - */ - narenas_total = narenas_auto = 1; - arenas = init_arenas; - memset(arenas, 0, sizeof(arena_t *) * narenas_auto); - - /* - * Initialize one arena here. The rest are lazily created in - * choose_arena_hard(). - */ - arenas_extend(0); - if (arenas[0] == NULL) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - /* Initialize allocation counters before any allocations can occur. */ - if (config_stats && thread_allocated_tsd_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (arenas_tsd_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (config_tcache && tcache_boot1()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (config_fill && quarantine_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (config_prof && prof_boot2()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - /* Get number of CPUs. */ - malloc_mutex_unlock(&init_lock); - ncpus = malloc_ncpus(); - malloc_mutex_lock(&init_lock); - - if (mutex_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (opt_narenas == 0) { - /* - * For SMP systems, create more than one arena per CPU by - * default. - */ - if (ncpus > 1) - opt_narenas = ncpus << 2; - else - opt_narenas = 1; - } - narenas_auto = opt_narenas; - /* - * Make sure that the arenas array can be allocated. In practice, this - * limit is enough to allow the allocator to function, but the ctl - * machinery will fail to allocate memory at far lower limits. - */ - if (narenas_auto > chunksize / sizeof(arena_t *)) { - narenas_auto = chunksize / sizeof(arena_t *); - malloc_printf(": Reducing narenas to limit (%d)\n", - narenas_auto); - } - narenas_total = narenas_auto; - - /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); - if (arenas == NULL) { - malloc_mutex_unlock(&init_lock); - return (true); - } - /* - * Zero the array. In practice, this should always be pre-zeroed, - * since it was just mmap()ed, but let's be sure. - */ - memset(arenas, 0, sizeof(arena_t *) * narenas_total); - /* Copy the pointer to the one arena that was already initialized. */ - arenas[0] = init_arenas[0]; - - malloc_initialized = true; - malloc_mutex_unlock(&init_lock); - return (false); -} - -/* - * End initialization functions. - */ -/******************************************************************************/ -/* - * Begin malloc(3)-compatible functions. - */ - -void * -je_malloc(size_t size) -{ - void *ret; - size_t usize JEMALLOC_CC_SILENCE_INIT(0); - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); - - if (malloc_init()) { - ret = NULL; - goto label_oom; - } - - if (size == 0) - size = 1; - - if (config_prof && opt_prof) { - usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - ret = NULL; - goto label_oom; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - ret = imalloc(SMALL_MAXCLASS+1); - if (ret != NULL) - arena_prof_promoted(ret, usize); - } else - ret = imalloc(size); - } else { - if (config_stats || (config_valgrind && opt_valgrind)) - usize = s2u(size); - ret = imalloc(size); - } - -label_oom: - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in malloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); - } - if (config_prof && opt_prof && ret != NULL) - prof_malloc(ret, usize, cnt); - if (config_stats && ret != NULL) { - assert(usize == isalloc(ret, config_prof)); - thread_allocated_tsd_get()->allocated += usize; - } - UTRACE(0, size, ret); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); - return (ret); -} - -JEMALLOC_ATTR(nonnull(1)) -#ifdef JEMALLOC_PROF -/* - * Avoid any uncertainty as to how many backtrace frames to ignore in - * PROF_ALLOC_PREP(). - */ -JEMALLOC_NOINLINE -#endif -static int -imemalign(void **memptr, size_t alignment, size_t size, - size_t min_alignment) -{ - int ret; - size_t usize; - void *result; - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); - - assert(min_alignment != 0); - - if (malloc_init()) - result = NULL; - else { - if (size == 0) - size = 1; - - /* Make sure that alignment is a large enough power of 2. */ - if (((alignment - 1) & alignment) != 0 - || (alignment < min_alignment)) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error allocating " - "aligned memory: invalid alignment\n"); - abort(); - } - result = NULL; - ret = EINVAL; - goto label_return; - } - - usize = sa2u(size, alignment); - if (usize == 0) { - result = NULL; - ret = ENOMEM; - goto label_return; - } - - if (config_prof && opt_prof) { - PROF_ALLOC_PREP(2, usize, cnt); - if (cnt == NULL) { - result = NULL; - ret = EINVAL; - } else { - if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= SMALL_MAXCLASS) { - assert(sa2u(SMALL_MAXCLASS+1, - alignment) != 0); - result = ipalloc(sa2u(SMALL_MAXCLASS+1, - alignment), alignment, false); - if (result != NULL) { - arena_prof_promoted(result, - usize); - } - } else { - result = ipalloc(usize, alignment, - false); - } - } - } else - result = ipalloc(usize, alignment, false); - } - - if (result == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error allocating aligned " - "memory: out of memory\n"); - abort(); - } - ret = ENOMEM; - goto label_return; - } - - *memptr = result; - ret = 0; - -label_return: - if (config_stats && result != NULL) { - assert(usize == isalloc(result, config_prof)); - thread_allocated_tsd_get()->allocated += usize; - } - if (config_prof && opt_prof && result != NULL) - prof_malloc(result, usize, cnt); - UTRACE(0, size, result); - return (ret); -} - -int -je_posix_memalign(void **memptr, size_t alignment, size_t size) -{ - int ret = imemalign(memptr, alignment, size, sizeof(void *)); - JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr, - config_prof), false); - return (ret); -} - -void * -je_aligned_alloc(size_t alignment, size_t size) -{ - void *ret; - int err; - - if ((err = imemalign(&ret, alignment, size, 1)) != 0) { - ret = NULL; - set_errno(err); - } - JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof), - false); - return (ret); -} - -void * -je_calloc(size_t num, size_t size) -{ - void *ret; - size_t num_size; - size_t usize JEMALLOC_CC_SILENCE_INIT(0); - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); - - if (malloc_init()) { - num_size = 0; - ret = NULL; - goto label_return; - } - - num_size = num * size; - if (num_size == 0) { - if (num == 0 || size == 0) - num_size = 1; - else { - ret = NULL; - goto label_return; - } - /* - * Try to avoid division here. We know that it isn't possible to - * overflow during multiplication if neither operand uses any of the - * most significant half of the bits in a size_t. - */ - } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2))) - && (num_size / size != num)) { - /* size_t overflow. */ - ret = NULL; - goto label_return; - } - - if (config_prof && opt_prof) { - usize = s2u(num_size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - ret = NULL; - goto label_return; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize - <= SMALL_MAXCLASS) { - ret = icalloc(SMALL_MAXCLASS+1); - if (ret != NULL) - arena_prof_promoted(ret, usize); - } else - ret = icalloc(num_size); - } else { - if (config_stats || (config_valgrind && opt_valgrind)) - usize = s2u(num_size); - ret = icalloc(num_size); - } - -label_return: - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in calloc(): out of " - "memory\n"); - abort(); - } - set_errno(ENOMEM); - } - - if (config_prof && opt_prof && ret != NULL) - prof_malloc(ret, usize, cnt); - if (config_stats && ret != NULL) { - assert(usize == isalloc(ret, config_prof)); - thread_allocated_tsd_get()->allocated += usize; - } - UTRACE(0, num_size, ret); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true); - return (ret); -} - -void * -je_realloc(void *ptr, size_t size) -{ - void *ret; - size_t usize JEMALLOC_CC_SILENCE_INIT(0); - size_t old_size = 0; - size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); - prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); - - if (size == 0) { - if (ptr != NULL) { - /* realloc(ptr, 0) is equivalent to free(p). */ - assert(malloc_initialized || IS_INITIALIZER); - if (config_prof) { - old_size = isalloc(ptr, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(ptr); - } else if (config_stats) { - old_size = isalloc(ptr, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); - } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); - } - if (config_prof && opt_prof) { - old_ctx = prof_ctx_get(ptr); - cnt = NULL; - } - iqalloc(ptr); - ret = NULL; - goto label_return; - } else - size = 1; - } - - if (ptr != NULL) { - assert(malloc_initialized || IS_INITIALIZER); - malloc_thread_init(); - - if (config_prof) { - old_size = isalloc(ptr, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(ptr); - } else if (config_stats) { - old_size = isalloc(ptr, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); - } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); - } - if (config_prof && opt_prof) { - usize = s2u(size); - old_ctx = prof_ctx_get(ptr); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - old_ctx = NULL; - ret = NULL; - goto label_oom; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && - usize <= SMALL_MAXCLASS) { - ret = iralloc(ptr, SMALL_MAXCLASS+1, 0, 0, - false, false); - if (ret != NULL) - arena_prof_promoted(ret, usize); - else - old_ctx = NULL; - } else { - ret = iralloc(ptr, size, 0, 0, false, false); - if (ret == NULL) - old_ctx = NULL; - } - } else { - if (config_stats || (config_valgrind && opt_valgrind)) - usize = s2u(size); - ret = iralloc(ptr, size, 0, 0, false, false); - } - -label_oom: - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in realloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); - } - } else { - /* realloc(NULL, size) is equivalent to malloc(size). */ - if (config_prof && opt_prof) - old_ctx = NULL; - if (malloc_init()) { - if (config_prof && opt_prof) - cnt = NULL; - ret = NULL; - } else { - if (config_prof && opt_prof) { - usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) - ret = NULL; - else { - if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - ret = imalloc(SMALL_MAXCLASS+1); - if (ret != NULL) { - arena_prof_promoted(ret, - usize); - } - } else - ret = imalloc(size); - } - } else { - if (config_stats || (config_valgrind && - opt_valgrind)) - usize = s2u(size); - ret = imalloc(size); - } - } - - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in realloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); - } - } - -label_return: - if (config_prof && opt_prof) - prof_realloc(ret, usize, cnt, old_size, old_ctx); - if (config_stats && ret != NULL) { - thread_allocated_t *ta; - assert(usize == isalloc(ret, config_prof)); - ta = thread_allocated_tsd_get(); - ta->allocated += usize; - ta->deallocated += old_size; - } - UTRACE(ptr, size, ret); - JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_size, old_rzsize, false); - return (ret); -} - -void -je_free(void *ptr) -{ - - UTRACE(ptr, 0, 0); - if (ptr != NULL) { - size_t usize; - size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); - - assert(malloc_initialized || IS_INITIALIZER); - - if (config_prof && opt_prof) { - usize = isalloc(ptr, config_prof); - prof_free(ptr, usize); - } else if (config_stats || config_valgrind) - usize = isalloc(ptr, config_prof); - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && opt_valgrind) - rzsize = p2rz(ptr); - iqalloc(ptr); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); - } -} - -/* - * End malloc(3)-compatible functions. - */ -/******************************************************************************/ -/* - * Begin non-standard override functions. - */ - -#ifdef JEMALLOC_OVERRIDE_MEMALIGN -void * -je_memalign(size_t alignment, size_t size) -{ - void *ret JEMALLOC_CC_SILENCE_INIT(NULL); - imemalign(&ret, alignment, size, 1); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); - return (ret); -} -#endif - -#ifdef JEMALLOC_OVERRIDE_VALLOC -void * -je_valloc(size_t size) -{ - void *ret JEMALLOC_CC_SILENCE_INIT(NULL); - imemalign(&ret, PAGE, size, 1); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); - return (ret); -} -#endif - -/* - * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has - * #define je_malloc malloc - */ -#define malloc_is_malloc 1 -#define is_malloc_(a) malloc_is_ ## a -#define is_malloc(a) is_malloc_(a) - -#if ((is_malloc(je_malloc) == 1) && defined(__GLIBC__) && !defined(__UCLIBC__)) -/* - * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible - * to inconsistently reference libc's malloc(3)-compatible functions - * (https://bugzilla.mozilla.org/show_bug.cgi?id=493541). - * - * These definitions interpose hooks in glibc. The functions are actually - * passed an extra argument for the caller return address, which will be - * ignored. - */ -JEMALLOC_EXPORT void (* __free_hook)(void *ptr) = je_free; -JEMALLOC_EXPORT void *(* __malloc_hook)(size_t size) = je_malloc; -JEMALLOC_EXPORT void *(* __realloc_hook)(void *ptr, size_t size) = je_realloc; -JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) = - je_memalign; -#endif - -/* - * End non-standard override functions. - */ -/******************************************************************************/ -/* - * Begin non-standard functions. - */ - -size_t -je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) -{ - size_t ret; - - assert(malloc_initialized || IS_INITIALIZER); - malloc_thread_init(); - - if (config_ivsalloc) - ret = ivsalloc(ptr, config_prof); - else - ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; - - return (ret); -} - -void -je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, - const char *opts) -{ - - stats_print(write_cb, cbopaque, opts); -} - -int -je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen) -{ - - if (malloc_init()) - return (EAGAIN); - - return (ctl_byname(name, oldp, oldlenp, newp, newlen)); -} - -int -je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) -{ - - if (malloc_init()) - return (EAGAIN); - - return (ctl_nametomib(name, mibp, miblenp)); -} - -int -je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - - if (malloc_init()) - return (EAGAIN); - - return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); -} - -/* - * End non-standard functions. - */ -/******************************************************************************/ -/* - * Begin experimental functions. - */ -#ifdef JEMALLOC_EXPERIMENTAL - -static JEMALLOC_ATTR(always_inline) void * -iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) -{ - - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, - alignment))); - - if (alignment != 0) - return (ipallocx(usize, alignment, zero, try_tcache, arena)); - else if (zero) - return (icallocx(usize, try_tcache, arena)); - else - return (imallocx(usize, try_tcache, arena)); -} - -int -je_allocm(void **ptr, size_t *rsize, size_t size, int flags) -{ - void *p; - size_t usize; - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - bool zero = flags & ALLOCM_ZERO; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - arena_t *arena; - bool try_tcache; - - assert(ptr != NULL); - assert(size != 0); - - if (malloc_init()) - goto label_oom; - - if (arena_ind != UINT_MAX) { - arena = arenas[arena_ind]; - try_tcache = false; - } else { - arena = NULL; - try_tcache = true; - } - - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - if (usize == 0) - goto label_oom; - - if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) - goto label_oom; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - SMALL_MAXCLASS) { - size_t usize_promoted = (alignment == 0) ? - s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, - alignment); - assert(usize_promoted != 0); - p = iallocm(usize_promoted, alignment, zero, - try_tcache, arena); - if (p == NULL) - goto label_oom; - arena_prof_promoted(p, usize); - } else { - p = iallocm(usize, alignment, zero, try_tcache, arena); - if (p == NULL) - goto label_oom; - } - prof_malloc(p, usize, cnt); - } else { - p = iallocm(usize, alignment, zero, try_tcache, arena); - if (p == NULL) - goto label_oom; - } - if (rsize != NULL) - *rsize = usize; - - *ptr = p; - if (config_stats) { - assert(usize == isalloc(p, config_prof)); - thread_allocated_tsd_get()->allocated += usize; - } - UTRACE(0, size, p); - JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); - return (ALLOCM_SUCCESS); -label_oom: - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in allocm(): " - "out of memory\n"); - abort(); - } - *ptr = NULL; - UTRACE(0, size, 0); - return (ALLOCM_ERR_OOM); -} - -int -je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) -{ - void *p, *q; - size_t usize; - size_t old_size; - size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - bool zero = flags & ALLOCM_ZERO; - bool no_move = flags & ALLOCM_NO_MOVE; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - bool try_tcache_alloc, try_tcache_dalloc; - arena_t *arena; - - assert(ptr != NULL); - assert(*ptr != NULL); - assert(size != 0); - assert(SIZE_T_MAX - size >= extra); - assert(malloc_initialized || IS_INITIALIZER); - malloc_thread_init(); - - if (arena_ind != UINT_MAX) { - arena_chunk_t *chunk; - try_tcache_alloc = true; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(*ptr); - try_tcache_dalloc = (chunk == *ptr || chunk->arena != - arenas[arena_ind]); - arena = arenas[arena_ind]; - } else { - try_tcache_alloc = true; - try_tcache_dalloc = true; - arena = NULL; - } - - p = *ptr; - if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - - /* - * usize isn't knowable before iralloc() returns when extra is - * non-zero. Therefore, compute its maximum possible value and - * use that in PROF_ALLOC_PREP() to decide whether to capture a - * backtrace. prof_realloc() will use the actual usize to - * decide whether to sample. - */ - size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment); - prof_ctx_t *old_ctx = prof_ctx_get(p); - old_size = isalloc(p, true); - if (config_valgrind && opt_valgrind) - old_rzsize = p2rz(p); - PROF_ALLOC_PREP(1, max_usize, cnt); - if (cnt == NULL) - goto label_oom; - /* - * Use minimum usize to determine whether promotion may happen. - */ - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U - && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) - <= SMALL_MAXCLASS) { - q = irallocx(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= - size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), - alignment, zero, no_move, try_tcache_alloc, - try_tcache_dalloc, arena); - if (q == NULL) - goto label_err; - if (max_usize < PAGE) { - usize = max_usize; - arena_prof_promoted(q, usize); - } else - usize = isalloc(q, config_prof); - } else { - q = irallocx(p, size, extra, alignment, zero, no_move, - try_tcache_alloc, try_tcache_dalloc, arena); - if (q == NULL) - goto label_err; - usize = isalloc(q, config_prof); - } - prof_realloc(q, usize, cnt, old_size, old_ctx); - if (rsize != NULL) - *rsize = usize; - } else { - if (config_stats) { - old_size = isalloc(p, false); - if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); - } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(p, false); - old_rzsize = u2rz(old_size); - } - q = irallocx(p, size, extra, alignment, zero, no_move, - try_tcache_alloc, try_tcache_dalloc, arena); - if (q == NULL) - goto label_err; - if (config_stats) - usize = isalloc(q, config_prof); - if (rsize != NULL) { - if (config_stats == false) - usize = isalloc(q, config_prof); - *rsize = usize; - } - } - - *ptr = q; - if (config_stats) { - thread_allocated_t *ta; - ta = thread_allocated_tsd_get(); - ta->allocated += usize; - ta->deallocated += old_size; - } - UTRACE(p, size, q); - JEMALLOC_VALGRIND_REALLOC(q, usize, p, old_size, old_rzsize, zero); - return (ALLOCM_SUCCESS); -label_err: - if (no_move) { - UTRACE(p, size, q); - return (ALLOCM_ERR_NOT_MOVED); - } -label_oom: - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in rallocm(): " - "out of memory\n"); - abort(); - } - UTRACE(p, size, 0); - return (ALLOCM_ERR_OOM); -} - -int -je_sallocm(const void *ptr, size_t *rsize, int flags) -{ - size_t sz; - - assert(malloc_initialized || IS_INITIALIZER); - malloc_thread_init(); - - if (config_ivsalloc) - sz = ivsalloc(ptr, config_prof); - else { - assert(ptr != NULL); - sz = isalloc(ptr, config_prof); - } - assert(rsize != NULL); - *rsize = sz; - - return (ALLOCM_SUCCESS); -} - -int -je_dallocm(void *ptr, int flags) -{ - size_t usize; - size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - bool try_tcache; - - assert(ptr != NULL); - assert(malloc_initialized || IS_INITIALIZER); - - if (arena_ind != UINT_MAX) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache = (chunk == ptr || chunk->arena != - arenas[arena_ind]); - } else - try_tcache = true; - - UTRACE(ptr, 0, 0); - if (config_stats || config_valgrind) - usize = isalloc(ptr, config_prof); - if (config_prof && opt_prof) { - if (config_stats == false && config_valgrind == false) - usize = isalloc(ptr, config_prof); - prof_free(ptr, usize); - } - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && opt_valgrind) - rzsize = p2rz(ptr); - iqallocx(ptr, try_tcache); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); - - return (ALLOCM_SUCCESS); -} - -int -je_nallocm(size_t *rsize, size_t size, int flags) -{ - size_t usize; - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - - assert(size != 0); - - if (malloc_init()) - return (ALLOCM_ERR_OOM); - - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - if (usize == 0) - return (ALLOCM_ERR_OOM); - - if (rsize != NULL) - *rsize = usize; - return (ALLOCM_SUCCESS); -} - -#endif -/* - * End experimental functions. - */ -/******************************************************************************/ -/* - * The following functions are used by threading libraries for protection of - * malloc during fork(). - */ - -/* - * If an application creates a thread before doing any allocation in the main - * thread, then calls fork(2) in the main thread followed by memory allocation - * in the child process, a race can occur that results in deadlock within the - * child: the main thread may have forked while the created thread had - * partially initialized the allocator. Ordinarily jemalloc prevents - * fork/malloc races via the following functions it registers during - * initialization using pthread_atfork(), but of course that does no good if - * the allocator isn't fully initialized at fork time. The following library - * constructor is a partial solution to this problem. It may still possible to - * trigger the deadlock described above, but doing so would involve forking via - * a library constructor that runs before jemalloc's runs. - */ -JEMALLOC_ATTR(constructor) -static void -jemalloc_constructor(void) -{ - - malloc_init(); -} - -#ifndef JEMALLOC_MUTEX_INIT_CB -void -jemalloc_prefork(void) -#else -JEMALLOC_EXPORT void -_malloc_prefork(void) -#endif -{ - unsigned i; - -#ifdef JEMALLOC_MUTEX_INIT_CB - if (malloc_initialized == false) - return; -#endif - assert(malloc_initialized); - - /* Acquire all mutexes in a safe order. */ - ctl_prefork(); - prof_prefork(); - malloc_mutex_prefork(&arenas_lock); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_prefork(arenas[i]); - } - chunk_prefork(); - base_prefork(); - huge_prefork(); -} - -#ifndef JEMALLOC_MUTEX_INIT_CB -void -jemalloc_postfork_parent(void) -#else -JEMALLOC_EXPORT void -_malloc_postfork(void) -#endif -{ - unsigned i; - -#ifdef JEMALLOC_MUTEX_INIT_CB - if (malloc_initialized == false) - return; -#endif - assert(malloc_initialized); - - /* Release all mutexes, now that fork() has completed. */ - huge_postfork_parent(); - base_postfork_parent(); - chunk_postfork_parent(); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_postfork_parent(arenas[i]); - } - malloc_mutex_postfork_parent(&arenas_lock); - prof_postfork_parent(); - ctl_postfork_parent(); -} - -void -jemalloc_postfork_child(void) -{ - unsigned i; - - assert(malloc_initialized); - - /* Release all mutexes, now that fork() has completed. */ - huge_postfork_child(); - base_postfork_child(); - chunk_postfork_child(); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_postfork_child(arenas[i]); - } - malloc_mutex_postfork_child(&arenas_lock); - prof_postfork_child(); - ctl_postfork_child(); -} - -/******************************************************************************/ -/* - * The following functions are used for TLS allocation/deallocation in static - * binaries on FreeBSD. The primary difference between these and i[mcd]alloc() - * is that these avoid accessing TLS variables. - */ - -static void * -a0alloc(size_t size, bool zero) -{ - - if (malloc_init()) - return (NULL); - - if (size == 0) - size = 1; - - if (size <= arena_maxclass) - return (arena_malloc(arenas[0], size, zero, false)); - else - return (huge_malloc(size, zero)); -} - -void * -a0malloc(size_t size) -{ - - return (a0alloc(size, false)); -} - -void * -a0calloc(size_t num, size_t size) -{ - - return (a0alloc(num * size, true)); -} - -void -a0free(void *ptr) -{ - arena_chunk_t *chunk; - - if (ptr == NULL) - return; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr, false); - else - huge_dalloc(ptr, true); -} - -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/mb.c mariadb-5.5-5.5.40/extra/jemalloc/src/mb.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/mb.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/mb.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -#define JEMALLOC_MB_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/mutex.c mariadb-5.5-5.5.40/extra/jemalloc/src/mutex.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/mutex.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/mutex.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,149 +0,0 @@ -#define JEMALLOC_MUTEX_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) -#include -#endif - -#ifndef _CRT_SPINCOUNT -#define _CRT_SPINCOUNT 4000 -#endif - -/******************************************************************************/ -/* Data. */ - -#ifdef JEMALLOC_LAZY_LOCK -bool isthreaded = false; -#endif -#ifdef JEMALLOC_MUTEX_INIT_CB -static bool postpone_init = true; -static malloc_mutex_t *postponed_mutexes = NULL; -#endif - -#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) -static void pthread_create_once(void); -#endif - -/******************************************************************************/ -/* - * We intercept pthread_create() calls in order to toggle isthreaded if the - * process goes multi-threaded. - */ - -#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) -static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, - void *(*)(void *), void *__restrict); - -static void -pthread_create_once(void) -{ - - pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); - if (pthread_create_fptr == NULL) { - malloc_write(": Error in dlsym(RTLD_NEXT, " - "\"pthread_create\")\n"); - abort(); - } - - isthreaded = true; -} - -JEMALLOC_EXPORT int -pthread_create(pthread_t *__restrict thread, - const pthread_attr_t *__restrict attr, void *(*start_routine)(void *), - void *__restrict arg) -{ - static pthread_once_t once_control = PTHREAD_ONCE_INIT; - - pthread_once(&once_control, pthread_create_once); - - return (pthread_create_fptr(thread, attr, start_routine, arg)); -} -#endif - -/******************************************************************************/ - -#ifdef JEMALLOC_MUTEX_INIT_CB -JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, - void *(calloc_cb)(size_t, size_t)); -#endif - -bool -malloc_mutex_init(malloc_mutex_t *mutex) -{ - -#ifdef _WIN32 - if (!InitializeCriticalSectionAndSpinCount(&mutex->lock, - _CRT_SPINCOUNT)) - return (true); -#elif (defined(JEMALLOC_OSSPIN)) - mutex->lock = 0; -#elif (defined(JEMALLOC_MUTEX_INIT_CB)) - if (postpone_init) { - mutex->postponed_next = postponed_mutexes; - postponed_mutexes = mutex; - } else { - if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) != - 0) - return (true); - } -#else - pthread_mutexattr_t attr; - - if (pthread_mutexattr_init(&attr) != 0) - return (true); - pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE); - if (pthread_mutex_init(&mutex->lock, &attr) != 0) { - pthread_mutexattr_destroy(&attr); - return (true); - } - pthread_mutexattr_destroy(&attr); -#endif - return (false); -} - -void -malloc_mutex_prefork(malloc_mutex_t *mutex) -{ - - malloc_mutex_lock(mutex); -} - -void -malloc_mutex_postfork_parent(malloc_mutex_t *mutex) -{ - - malloc_mutex_unlock(mutex); -} - -void -malloc_mutex_postfork_child(malloc_mutex_t *mutex) -{ - -#ifdef JEMALLOC_MUTEX_INIT_CB - malloc_mutex_unlock(mutex); -#else - if (malloc_mutex_init(mutex)) { - malloc_printf(": Error re-initializing mutex in " - "child\n"); - if (opt_abort) - abort(); - } -#endif -} - -bool -mutex_boot(void) -{ - -#ifdef JEMALLOC_MUTEX_INIT_CB - postpone_init = false; - while (postponed_mutexes != NULL) { - if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock, - base_calloc) != 0) - return (true); - postponed_mutexes = postponed_mutexes->postponed_next; - } -#endif - return (false); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/prof.c mariadb-5.5-5.5.40/extra/jemalloc/src/prof.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/prof.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/prof.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1283 +0,0 @@ -#define JEMALLOC_PROF_C_ -#include "jemalloc/internal/jemalloc_internal.h" -/******************************************************************************/ - -#ifdef JEMALLOC_PROF_LIBUNWIND -#define UNW_LOCAL_ONLY -#include -#endif - -#ifdef JEMALLOC_PROF_LIBGCC -#include -#endif - -/******************************************************************************/ -/* Data. */ - -malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) - -bool opt_prof = false; -bool opt_prof_active = true; -size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; -ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; -bool opt_prof_gdump = false; -bool opt_prof_final = true; -bool opt_prof_leak = false; -bool opt_prof_accum = false; -char opt_prof_prefix[PATH_MAX + 1]; - -uint64_t prof_interval = 0; -bool prof_promote; - -/* - * Table of mutexes that are shared among ctx's. These are leaf locks, so - * there is no problem with using them for more than one ctx at the same time. - * The primary motivation for this sharing though is that ctx's are ephemeral, - * and destroying mutexes causes complications for systems that allocate when - * creating/destroying mutexes. - */ -static malloc_mutex_t *ctx_locks; -static unsigned cum_ctxs; /* Atomic counter. */ - -/* - * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data - * structure that knows about all backtraces currently captured. - */ -static ckh_t bt2ctx; -static malloc_mutex_t bt2ctx_mtx; - -static malloc_mutex_t prof_dump_seq_mtx; -static uint64_t prof_dump_seq; -static uint64_t prof_dump_iseq; -static uint64_t prof_dump_mseq; -static uint64_t prof_dump_useq; - -/* - * This buffer is rather large for stack allocation, so use a single buffer for - * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since - * it must be locked anyway during dumping. - */ -static char prof_dump_buf[PROF_DUMP_BUFSIZE]; -static unsigned prof_dump_buf_end; -static int prof_dump_fd; - -/* Do not dump any profiles until bootstrapping is complete. */ -static bool prof_booted = false; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static prof_bt_t *bt_dup(prof_bt_t *bt); -static void bt_destroy(prof_bt_t *bt); -#ifdef JEMALLOC_PROF_LIBGCC -static _Unwind_Reason_Code prof_unwind_init_callback( - struct _Unwind_Context *context, void *arg); -static _Unwind_Reason_Code prof_unwind_callback( - struct _Unwind_Context *context, void *arg); -#endif -static bool prof_flush(bool propagate_err); -static bool prof_write(bool propagate_err, const char *s); -static bool prof_printf(bool propagate_err, const char *format, ...) - JEMALLOC_ATTR(format(printf, 2, 3)); -static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, - size_t *leak_nctx); -static void prof_ctx_destroy(prof_ctx_t *ctx); -static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); -static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, - prof_bt_t *bt); -static bool prof_dump_maps(bool propagate_err); -static bool prof_dump(bool propagate_err, const char *filename, - bool leakcheck); -static void prof_dump_filename(char *filename, char v, int64_t vseq); -static void prof_fdump(void); -static void prof_bt_hash(const void *key, size_t r_hash[2]); -static bool prof_bt_keycomp(const void *k1, const void *k2); -static malloc_mutex_t *prof_ctx_mutex_choose(void); - -/******************************************************************************/ - -void -bt_init(prof_bt_t *bt, void **vec) -{ - - cassert(config_prof); - - bt->vec = vec; - bt->len = 0; -} - -static void -bt_destroy(prof_bt_t *bt) -{ - - cassert(config_prof); - - idalloc(bt); -} - -static prof_bt_t * -bt_dup(prof_bt_t *bt) -{ - prof_bt_t *ret; - - cassert(config_prof); - - /* - * Create a single allocation that has space for vec immediately - * following the prof_bt_t structure. The backtraces that get - * stored in the backtrace caches are copied from stack-allocated - * temporary variables, so size is known at creation time. Making this - * a contiguous object improves cache locality. - */ - ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + - (bt->len * sizeof(void *))); - if (ret == NULL) - return (NULL); - ret->vec = (void **)((uintptr_t)ret + - QUANTUM_CEILING(sizeof(prof_bt_t))); - memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); - ret->len = bt->len; - - return (ret); -} - -static inline void -prof_enter(prof_tdata_t *prof_tdata) -{ - - cassert(config_prof); - - assert(prof_tdata->enq == false); - prof_tdata->enq = true; - - malloc_mutex_lock(&bt2ctx_mtx); -} - -static inline void -prof_leave(prof_tdata_t *prof_tdata) -{ - bool idump, gdump; - - cassert(config_prof); - - malloc_mutex_unlock(&bt2ctx_mtx); - - assert(prof_tdata->enq); - prof_tdata->enq = false; - idump = prof_tdata->enq_idump; - prof_tdata->enq_idump = false; - gdump = prof_tdata->enq_gdump; - prof_tdata->enq_gdump = false; - - if (idump) - prof_idump(); - if (gdump) - prof_gdump(); -} - -#ifdef JEMALLOC_PROF_LIBUNWIND -void -prof_backtrace(prof_bt_t *bt, unsigned nignore) -{ - unw_context_t uc; - unw_cursor_t cursor; - unsigned i; - int err; - - cassert(config_prof); - assert(bt->len == 0); - assert(bt->vec != NULL); - - unw_getcontext(&uc); - unw_init_local(&cursor, &uc); - - /* Throw away (nignore+1) stack frames, if that many exist. */ - for (i = 0; i < nignore + 1; i++) { - err = unw_step(&cursor); - if (err <= 0) - return; - } - - /* - * Iterate over stack frames until there are no more, or until no space - * remains in bt. - */ - for (i = 0; i < PROF_BT_MAX; i++) { - unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); - bt->len++; - err = unw_step(&cursor); - if (err <= 0) - break; - } -} -#elif (defined(JEMALLOC_PROF_LIBGCC)) -static _Unwind_Reason_Code -prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) -{ - - cassert(config_prof); - - return (_URC_NO_REASON); -} - -static _Unwind_Reason_Code -prof_unwind_callback(struct _Unwind_Context *context, void *arg) -{ - prof_unwind_data_t *data = (prof_unwind_data_t *)arg; - - cassert(config_prof); - - if (data->nignore > 0) - data->nignore--; - else { - data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); - data->bt->len++; - if (data->bt->len == data->max) - return (_URC_END_OF_STACK); - } - - return (_URC_NO_REASON); -} - -void -prof_backtrace(prof_bt_t *bt, unsigned nignore) -{ - prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; - - cassert(config_prof); - - _Unwind_Backtrace(prof_unwind_callback, &data); -} -#elif (defined(JEMALLOC_PROF_GCC)) -void -prof_backtrace(prof_bt_t *bt, unsigned nignore) -{ -#define BT_FRAME(i) \ - if ((i) < nignore + PROF_BT_MAX) { \ - void *p; \ - if (__builtin_frame_address(i) == 0) \ - return; \ - p = __builtin_return_address(i); \ - if (p == NULL) \ - return; \ - if (i >= nignore) { \ - bt->vec[(i) - nignore] = p; \ - bt->len = (i) - nignore + 1; \ - } \ - } else \ - return; - - cassert(config_prof); - assert(nignore <= 3); - - BT_FRAME(0) - BT_FRAME(1) - BT_FRAME(2) - BT_FRAME(3) - BT_FRAME(4) - BT_FRAME(5) - BT_FRAME(6) - BT_FRAME(7) - BT_FRAME(8) - BT_FRAME(9) - - BT_FRAME(10) - BT_FRAME(11) - BT_FRAME(12) - BT_FRAME(13) - BT_FRAME(14) - BT_FRAME(15) - BT_FRAME(16) - BT_FRAME(17) - BT_FRAME(18) - BT_FRAME(19) - - BT_FRAME(20) - BT_FRAME(21) - BT_FRAME(22) - BT_FRAME(23) - BT_FRAME(24) - BT_FRAME(25) - BT_FRAME(26) - BT_FRAME(27) - BT_FRAME(28) - BT_FRAME(29) - - BT_FRAME(30) - BT_FRAME(31) - BT_FRAME(32) - BT_FRAME(33) - BT_FRAME(34) - BT_FRAME(35) - BT_FRAME(36) - BT_FRAME(37) - BT_FRAME(38) - BT_FRAME(39) - - BT_FRAME(40) - BT_FRAME(41) - BT_FRAME(42) - BT_FRAME(43) - BT_FRAME(44) - BT_FRAME(45) - BT_FRAME(46) - BT_FRAME(47) - BT_FRAME(48) - BT_FRAME(49) - - BT_FRAME(50) - BT_FRAME(51) - BT_FRAME(52) - BT_FRAME(53) - BT_FRAME(54) - BT_FRAME(55) - BT_FRAME(56) - BT_FRAME(57) - BT_FRAME(58) - BT_FRAME(59) - - BT_FRAME(60) - BT_FRAME(61) - BT_FRAME(62) - BT_FRAME(63) - BT_FRAME(64) - BT_FRAME(65) - BT_FRAME(66) - BT_FRAME(67) - BT_FRAME(68) - BT_FRAME(69) - - BT_FRAME(70) - BT_FRAME(71) - BT_FRAME(72) - BT_FRAME(73) - BT_FRAME(74) - BT_FRAME(75) - BT_FRAME(76) - BT_FRAME(77) - BT_FRAME(78) - BT_FRAME(79) - - BT_FRAME(80) - BT_FRAME(81) - BT_FRAME(82) - BT_FRAME(83) - BT_FRAME(84) - BT_FRAME(85) - BT_FRAME(86) - BT_FRAME(87) - BT_FRAME(88) - BT_FRAME(89) - - BT_FRAME(90) - BT_FRAME(91) - BT_FRAME(92) - BT_FRAME(93) - BT_FRAME(94) - BT_FRAME(95) - BT_FRAME(96) - BT_FRAME(97) - BT_FRAME(98) - BT_FRAME(99) - - BT_FRAME(100) - BT_FRAME(101) - BT_FRAME(102) - BT_FRAME(103) - BT_FRAME(104) - BT_FRAME(105) - BT_FRAME(106) - BT_FRAME(107) - BT_FRAME(108) - BT_FRAME(109) - - BT_FRAME(110) - BT_FRAME(111) - BT_FRAME(112) - BT_FRAME(113) - BT_FRAME(114) - BT_FRAME(115) - BT_FRAME(116) - BT_FRAME(117) - BT_FRAME(118) - BT_FRAME(119) - - BT_FRAME(120) - BT_FRAME(121) - BT_FRAME(122) - BT_FRAME(123) - BT_FRAME(124) - BT_FRAME(125) - BT_FRAME(126) - BT_FRAME(127) - - /* Extras to compensate for nignore. */ - BT_FRAME(128) - BT_FRAME(129) - BT_FRAME(130) -#undef BT_FRAME -} -#else -void -prof_backtrace(prof_bt_t *bt, unsigned nignore) -{ - - cassert(config_prof); - assert(false); -} -#endif - -prof_thr_cnt_t * -prof_lookup(prof_bt_t *bt) -{ - union { - prof_thr_cnt_t *p; - void *v; - } ret; - prof_tdata_t *prof_tdata; - - cassert(config_prof); - - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) - return (NULL); - - if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { - union { - prof_bt_t *p; - void *v; - } btkey; - union { - prof_ctx_t *p; - void *v; - } ctx; - bool new_ctx; - - /* - * This thread's cache lacks bt. Look for it in the global - * cache. - */ - prof_enter(prof_tdata); - if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { - /* bt has never been seen before. Insert it. */ - ctx.v = imalloc(sizeof(prof_ctx_t)); - if (ctx.v == NULL) { - prof_leave(prof_tdata); - return (NULL); - } - btkey.p = bt_dup(bt); - if (btkey.v == NULL) { - prof_leave(prof_tdata); - idalloc(ctx.v); - return (NULL); - } - ctx.p->bt = btkey.p; - ctx.p->lock = prof_ctx_mutex_choose(); - /* - * Set nlimbo to 1, in order to avoid a race condition - * with prof_ctx_merge()/prof_ctx_destroy(). - */ - ctx.p->nlimbo = 1; - memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); - ql_new(&ctx.p->cnts_ql); - if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { - /* OOM. */ - prof_leave(prof_tdata); - idalloc(btkey.v); - idalloc(ctx.v); - return (NULL); - } - new_ctx = true; - } else { - /* - * Increment nlimbo, in order to avoid a race condition - * with prof_ctx_merge()/prof_ctx_destroy(). - */ - malloc_mutex_lock(ctx.p->lock); - ctx.p->nlimbo++; - malloc_mutex_unlock(ctx.p->lock); - new_ctx = false; - } - prof_leave(prof_tdata); - - /* Link a prof_thd_cnt_t into ctx for this thread. */ - if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { - assert(ckh_count(&prof_tdata->bt2cnt) > 0); - /* - * Flush the least recently used cnt in order to keep - * bt2cnt from becoming too large. - */ - ret.p = ql_last(&prof_tdata->lru_ql, lru_link); - assert(ret.v != NULL); - if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, - NULL, NULL)) - assert(false); - ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); - prof_ctx_merge(ret.p->ctx, ret.p); - /* ret can now be re-used. */ - } else { - assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); - /* Allocate and partially initialize a new cnt. */ - ret.v = imalloc(sizeof(prof_thr_cnt_t)); - if (ret.p == NULL) { - if (new_ctx) - prof_ctx_destroy(ctx.p); - return (NULL); - } - ql_elm_new(ret.p, cnts_link); - ql_elm_new(ret.p, lru_link); - } - /* Finish initializing ret. */ - ret.p->ctx = ctx.p; - ret.p->epoch = 0; - memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); - if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { - if (new_ctx) - prof_ctx_destroy(ctx.p); - idalloc(ret.v); - return (NULL); - } - ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - malloc_mutex_lock(ctx.p->lock); - ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - ctx.p->nlimbo--; - malloc_mutex_unlock(ctx.p->lock); - } else { - /* Move ret to the front of the LRU. */ - ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); - ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - } - - return (ret.p); -} - -static bool -prof_flush(bool propagate_err) -{ - bool ret = false; - ssize_t err; - - cassert(config_prof); - - err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); - if (err == -1) { - if (propagate_err == false) { - malloc_write(": write() failed during heap " - "profile flush\n"); - if (opt_abort) - abort(); - } - ret = true; - } - prof_dump_buf_end = 0; - - return (ret); -} - -static bool -prof_write(bool propagate_err, const char *s) -{ - unsigned i, slen, n; - - cassert(config_prof); - - i = 0; - slen = strlen(s); - while (i < slen) { - /* Flush the buffer if it is full. */ - if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) - if (prof_flush(propagate_err) && propagate_err) - return (true); - - if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { - /* Finish writing. */ - n = slen - i; - } else { - /* Write as much of s as will fit. */ - n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; - } - memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); - prof_dump_buf_end += n; - i += n; - } - - return (false); -} - -JEMALLOC_ATTR(format(printf, 2, 3)) -static bool -prof_printf(bool propagate_err, const char *format, ...) -{ - bool ret; - va_list ap; - char buf[PROF_PRINTF_BUFSIZE]; - - va_start(ap, format); - malloc_vsnprintf(buf, sizeof(buf), format, ap); - va_end(ap); - ret = prof_write(propagate_err, buf); - - return (ret); -} - -static void -prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) -{ - prof_thr_cnt_t *thr_cnt; - prof_cnt_t tcnt; - - cassert(config_prof); - - malloc_mutex_lock(ctx->lock); - - memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); - ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { - volatile unsigned *epoch = &thr_cnt->epoch; - - while (true) { - unsigned epoch0 = *epoch; - - /* Make sure epoch is even. */ - if (epoch0 & 1U) - continue; - - memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); - - /* Terminate if epoch didn't change while reading. */ - if (*epoch == epoch0) - break; - } - - ctx->cnt_summed.curobjs += tcnt.curobjs; - ctx->cnt_summed.curbytes += tcnt.curbytes; - if (opt_prof_accum) { - ctx->cnt_summed.accumobjs += tcnt.accumobjs; - ctx->cnt_summed.accumbytes += tcnt.accumbytes; - } - } - - if (ctx->cnt_summed.curobjs != 0) - (*leak_nctx)++; - - /* Add to cnt_all. */ - cnt_all->curobjs += ctx->cnt_summed.curobjs; - cnt_all->curbytes += ctx->cnt_summed.curbytes; - if (opt_prof_accum) { - cnt_all->accumobjs += ctx->cnt_summed.accumobjs; - cnt_all->accumbytes += ctx->cnt_summed.accumbytes; - } - - malloc_mutex_unlock(ctx->lock); -} - -static void -prof_ctx_destroy(prof_ctx_t *ctx) -{ - prof_tdata_t *prof_tdata; - - cassert(config_prof); - - /* - * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() increments ctx->nlimbo in order to avoid a race - * condition with this function, as does prof_ctx_merge() in order to - * avoid a race between the main body of prof_ctx_merge() and entry - * into this function. - */ - prof_tdata = prof_tdata_get(false); - assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); - prof_enter(prof_tdata); - malloc_mutex_lock(ctx->lock); - if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && - ctx->nlimbo == 1) { - assert(ctx->cnt_merged.curbytes == 0); - assert(ctx->cnt_merged.accumobjs == 0); - assert(ctx->cnt_merged.accumbytes == 0); - /* Remove ctx from bt2ctx. */ - if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) - assert(false); - prof_leave(prof_tdata); - /* Destroy ctx. */ - malloc_mutex_unlock(ctx->lock); - bt_destroy(ctx->bt); - idalloc(ctx); - } else { - /* - * Compensate for increment in prof_ctx_merge() or - * prof_lookup(). - */ - ctx->nlimbo--; - malloc_mutex_unlock(ctx->lock); - prof_leave(prof_tdata); - } -} - -static void -prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) -{ - bool destroy; - - cassert(config_prof); - - /* Merge cnt stats and detach from ctx. */ - malloc_mutex_lock(ctx->lock); - ctx->cnt_merged.curobjs += cnt->cnts.curobjs; - ctx->cnt_merged.curbytes += cnt->cnts.curbytes; - ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; - ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; - ql_remove(&ctx->cnts_ql, cnt, cnts_link); - if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && - ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { - /* - * Increment ctx->nlimbo in order to keep another thread from - * winning the race to destroy ctx while this one has ctx->lock - * dropped. Without this, it would be possible for another - * thread to: - * - * 1) Sample an allocation associated with ctx. - * 2) Deallocate the sampled object. - * 3) Successfully prof_ctx_destroy(ctx). - * - * The result would be that ctx no longer exists by the time - * this thread accesses it in prof_ctx_destroy(). - */ - ctx->nlimbo++; - destroy = true; - } else - destroy = false; - malloc_mutex_unlock(ctx->lock); - if (destroy) - prof_ctx_destroy(ctx); -} - -static bool -prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) -{ - unsigned i; - - cassert(config_prof); - - /* - * Current statistics can sum to 0 as a result of unmerged per thread - * statistics. Additionally, interval- and growth-triggered dumps can - * occur between the time a ctx is created and when its statistics are - * filled in. Avoid dumping any ctx that is an artifact of either - * implementation detail. - */ - if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || - (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { - assert(ctx->cnt_summed.curobjs == 0); - assert(ctx->cnt_summed.curbytes == 0); - assert(ctx->cnt_summed.accumobjs == 0); - assert(ctx->cnt_summed.accumbytes == 0); - return (false); - } - - if (prof_printf(propagate_err, "%"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @", - ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, - ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) - return (true); - - for (i = 0; i < bt->len; i++) { - if (prof_printf(propagate_err, " %#"PRIxPTR, - (uintptr_t)bt->vec[i])) - return (true); - } - - if (prof_write(propagate_err, "\n")) - return (true); - - return (false); -} - -static bool -prof_dump_maps(bool propagate_err) -{ - int mfd; - char filename[PATH_MAX + 1]; - - cassert(config_prof); - - malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", - (int)getpid()); - mfd = open(filename, O_RDONLY); - if (mfd != -1) { - ssize_t nread; - - if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && - propagate_err) - return (true); - nread = 0; - do { - prof_dump_buf_end += nread; - if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { - /* Make space in prof_dump_buf before read(). */ - if (prof_flush(propagate_err) && propagate_err) - return (true); - } - nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], - PROF_DUMP_BUFSIZE - prof_dump_buf_end); - } while (nread > 0); - close(mfd); - } else - return (true); - - return (false); -} - -static bool -prof_dump(bool propagate_err, const char *filename, bool leakcheck) -{ - prof_tdata_t *prof_tdata; - prof_cnt_t cnt_all; - size_t tabind; - union { - prof_bt_t *p; - void *v; - } bt; - union { - prof_ctx_t *p; - void *v; - } ctx; - size_t leak_nctx; - - cassert(config_prof); - - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) - return (true); - prof_enter(prof_tdata); - prof_dump_fd = creat(filename, 0644); - if (prof_dump_fd == -1) { - if (propagate_err == false) { - malloc_printf( - ": creat(\"%s\"), 0644) failed\n", - filename); - if (opt_abort) - abort(); - } - goto label_error; - } - - /* Merge per thread profile stats, and sum them in cnt_all. */ - memset(&cnt_all, 0, sizeof(prof_cnt_t)); - leak_nctx = 0; - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) - prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); - - /* Dump profile header. */ - if (opt_lg_prof_sample == 0) { - if (prof_printf(propagate_err, - "heap profile: %"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @ heapprofile\n", - cnt_all.curobjs, cnt_all.curbytes, - cnt_all.accumobjs, cnt_all.accumbytes)) - goto label_error; - } else { - if (prof_printf(propagate_err, - "heap profile: %"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", - cnt_all.curobjs, cnt_all.curbytes, - cnt_all.accumobjs, cnt_all.accumbytes, - ((uint64_t)1U << opt_lg_prof_sample))) - goto label_error; - } - - /* Dump per ctx profile stats. */ - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) - == false;) { - if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) - goto label_error; - } - - /* Dump /proc//maps if possible. */ - if (prof_dump_maps(propagate_err)) - goto label_error; - - if (prof_flush(propagate_err)) - goto label_error; - close(prof_dump_fd); - prof_leave(prof_tdata); - - if (leakcheck && cnt_all.curbytes != 0) { - malloc_printf(": Leak summary: %"PRId64" byte%s, %" - PRId64" object%s, %zu context%s\n", - cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "", - cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "", - leak_nctx, (leak_nctx != 1) ? "s" : ""); - malloc_printf( - ": Run pprof on \"%s\" for leak detail\n", - filename); - } - - return (false); -label_error: - prof_leave(prof_tdata); - return (true); -} - -#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) -static void -prof_dump_filename(char *filename, char v, int64_t vseq) -{ - - cassert(config_prof); - - if (vseq != UINT64_C(0xffffffffffffffff)) { - /* "...v.heap" */ - malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, - "%s.%d.%"PRIu64".%c%"PRId64".heap", - opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); - } else { - /* "....heap" */ - malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, - "%s.%d.%"PRIu64".%c.heap", - opt_prof_prefix, (int)getpid(), prof_dump_seq, v); - } - prof_dump_seq++; -} - -static void -prof_fdump(void) -{ - char filename[DUMP_FILENAME_BUFSIZE]; - - cassert(config_prof); - - if (prof_booted == false) - return; - - if (opt_prof_final && opt_prof_prefix[0] != '\0') { - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(false, filename, opt_prof_leak); - } -} - -void -prof_idump(void) -{ - prof_tdata_t *prof_tdata; - char filename[PATH_MAX + 1]; - - cassert(config_prof); - - if (prof_booted == false) - return; - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) - return; - if (prof_tdata->enq) { - prof_tdata->enq_idump = true; - return; - } - - if (opt_prof_prefix[0] != '\0') { - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'i', prof_dump_iseq); - prof_dump_iseq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(false, filename, false); - } -} - -bool -prof_mdump(const char *filename) -{ - char filename_buf[DUMP_FILENAME_BUFSIZE]; - - cassert(config_prof); - - if (opt_prof == false || prof_booted == false) - return (true); - - if (filename == NULL) { - /* No filename specified, so automatically generate one. */ - if (opt_prof_prefix[0] == '\0') - return (true); - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename_buf, 'm', prof_dump_mseq); - prof_dump_mseq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - filename = filename_buf; - } - return (prof_dump(true, filename, false)); -} - -void -prof_gdump(void) -{ - prof_tdata_t *prof_tdata; - char filename[DUMP_FILENAME_BUFSIZE]; - - cassert(config_prof); - - if (prof_booted == false) - return; - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) - return; - if (prof_tdata->enq) { - prof_tdata->enq_gdump = true; - return; - } - - if (opt_prof_prefix[0] != '\0') { - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'u', prof_dump_useq); - prof_dump_useq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(false, filename, false); - } -} - -static void -prof_bt_hash(const void *key, size_t r_hash[2]) -{ - prof_bt_t *bt = (prof_bt_t *)key; - - cassert(config_prof); - - hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); -} - -static bool -prof_bt_keycomp(const void *k1, const void *k2) -{ - const prof_bt_t *bt1 = (prof_bt_t *)k1; - const prof_bt_t *bt2 = (prof_bt_t *)k2; - - cassert(config_prof); - - if (bt1->len != bt2->len) - return (false); - return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); -} - -static malloc_mutex_t * -prof_ctx_mutex_choose(void) -{ - unsigned nctxs = atomic_add_u(&cum_ctxs, 1); - - return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); -} - -prof_tdata_t * -prof_tdata_init(void) -{ - prof_tdata_t *prof_tdata; - - cassert(config_prof); - - /* Initialize an empty cache for this thread. */ - prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); - if (prof_tdata == NULL) - return (NULL); - - if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, - prof_bt_hash, prof_bt_keycomp)) { - idalloc(prof_tdata); - return (NULL); - } - ql_new(&prof_tdata->lru_ql); - - prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); - if (prof_tdata->vec == NULL) { - ckh_delete(&prof_tdata->bt2cnt); - idalloc(prof_tdata); - return (NULL); - } - - prof_tdata->prng_state = 0; - prof_tdata->threshold = 0; - prof_tdata->accum = 0; - - prof_tdata->enq = false; - prof_tdata->enq_idump = false; - prof_tdata->enq_gdump = false; - - prof_tdata_tsd_set(&prof_tdata); - - return (prof_tdata); -} - -void -prof_tdata_cleanup(void *arg) -{ - prof_thr_cnt_t *cnt; - prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; - - cassert(config_prof); - - if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { - /* - * Another destructor deallocated memory after this destructor - * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY - * in order to receive another callback. - */ - prof_tdata = PROF_TDATA_STATE_PURGATORY; - prof_tdata_tsd_set(&prof_tdata); - } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { - /* - * The previous time this destructor was called, we set the key - * to PROF_TDATA_STATE_PURGATORY so that other destructors - * wouldn't cause re-creation of the prof_tdata. This time, do - * nothing, so that the destructor will not be called again. - */ - } else if (prof_tdata != NULL) { - /* - * Delete the hash table. All of its contents can still be - * iterated over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); - /* - * Iteratively merge cnt's into the global stats and delete - * them. - */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - prof_ctx_merge(cnt->ctx, cnt); - idalloc(cnt); - } - idalloc(prof_tdata->vec); - idalloc(prof_tdata); - prof_tdata = PROF_TDATA_STATE_PURGATORY; - prof_tdata_tsd_set(&prof_tdata); - } -} - -void -prof_boot0(void) -{ - - cassert(config_prof); - - memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, - sizeof(PROF_PREFIX_DEFAULT)); -} - -void -prof_boot1(void) -{ - - cassert(config_prof); - - /* - * opt_prof and prof_promote must be in their final state before any - * arenas are initialized, so this function must be executed early. - */ - - if (opt_prof_leak && opt_prof == false) { - /* - * Enable opt_prof, but in such a way that profiles are never - * automatically dumped. - */ - opt_prof = true; - opt_prof_gdump = false; - } else if (opt_prof) { - if (opt_lg_prof_interval >= 0) { - prof_interval = (((uint64_t)1U) << - opt_lg_prof_interval); - } - } - - prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); -} - -bool -prof_boot2(void) -{ - - cassert(config_prof); - - if (opt_prof) { - unsigned i; - - if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, - prof_bt_keycomp)) - return (true); - if (malloc_mutex_init(&bt2ctx_mtx)) - return (true); - if (prof_tdata_tsd_boot()) { - malloc_write( - ": Error in pthread_key_create()\n"); - abort(); - } - - if (malloc_mutex_init(&prof_dump_seq_mtx)) - return (true); - - if (atexit(prof_fdump) != 0) { - malloc_write(": Error in atexit()\n"); - if (opt_abort) - abort(); - } - - ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * - sizeof(malloc_mutex_t)); - if (ctx_locks == NULL) - return (true); - for (i = 0; i < PROF_NCTX_LOCKS; i++) { - if (malloc_mutex_init(&ctx_locks[i])) - return (true); - } - } - -#ifdef JEMALLOC_PROF_LIBGCC - /* - * Cause the backtracing machinery to allocate its internal state - * before enabling profiling. - */ - _Unwind_Backtrace(prof_unwind_init_callback, NULL); -#endif - - prof_booted = true; - - return (false); -} - -void -prof_prefork(void) -{ - - if (opt_prof) { - unsigned i; - - malloc_mutex_lock(&bt2ctx_mtx); - malloc_mutex_lock(&prof_dump_seq_mtx); - for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_lock(&ctx_locks[i]); - } -} - -void -prof_postfork_parent(void) -{ - - if (opt_prof) { - unsigned i; - - for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_postfork_parent(&ctx_locks[i]); - malloc_mutex_postfork_parent(&prof_dump_seq_mtx); - malloc_mutex_postfork_parent(&bt2ctx_mtx); - } -} - -void -prof_postfork_child(void) -{ - - if (opt_prof) { - unsigned i; - - for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_postfork_child(&ctx_locks[i]); - malloc_mutex_postfork_child(&prof_dump_seq_mtx); - malloc_mutex_postfork_child(&bt2ctx_mtx); - } -} - -/******************************************************************************/ diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/quarantine.c mariadb-5.5-5.5.40/extra/jemalloc/src/quarantine.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/quarantine.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/quarantine.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,190 +0,0 @@ -#define JEMALLOC_QUARANTINE_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/* - * quarantine pointers close to NULL are used to encode state information that - * is used for cleaning up during thread shutdown. - */ -#define QUARANTINE_STATE_REINCARNATED ((quarantine_t *)(uintptr_t)1) -#define QUARANTINE_STATE_PURGATORY ((quarantine_t *)(uintptr_t)2) -#define QUARANTINE_STATE_MAX QUARANTINE_STATE_PURGATORY - -/******************************************************************************/ -/* Data. */ - -malloc_tsd_data(, quarantine, quarantine_t *, NULL) - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static quarantine_t *quarantine_grow(quarantine_t *quarantine); -static void quarantine_drain_one(quarantine_t *quarantine); -static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); - -/******************************************************************************/ - -quarantine_t * -quarantine_init(size_t lg_maxobjs) -{ - quarantine_t *quarantine; - - quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) + - ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t))); - if (quarantine == NULL) - return (NULL); - quarantine->curbytes = 0; - quarantine->curobjs = 0; - quarantine->first = 0; - quarantine->lg_maxobjs = lg_maxobjs; - - quarantine_tsd_set(&quarantine); - - return (quarantine); -} - -static quarantine_t * -quarantine_grow(quarantine_t *quarantine) -{ - quarantine_t *ret; - - ret = quarantine_init(quarantine->lg_maxobjs + 1); - if (ret == NULL) { - quarantine_drain_one(quarantine); - return (quarantine); - } - - ret->curbytes = quarantine->curbytes; - ret->curobjs = quarantine->curobjs; - if (quarantine->first + quarantine->curobjs <= (ZU(1) << - quarantine->lg_maxobjs)) { - /* objs ring buffer data are contiguous. */ - memcpy(ret->objs, &quarantine->objs[quarantine->first], - quarantine->curobjs * sizeof(quarantine_obj_t)); - } else { - /* objs ring buffer data wrap around. */ - size_t ncopy_a = (ZU(1) << quarantine->lg_maxobjs) - - quarantine->first; - size_t ncopy_b = quarantine->curobjs - ncopy_a; - - memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a - * sizeof(quarantine_obj_t)); - memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * - sizeof(quarantine_obj_t)); - } - idalloc(quarantine); - - return (ret); -} - -static void -quarantine_drain_one(quarantine_t *quarantine) -{ - quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; - assert(obj->usize == isalloc(obj->ptr, config_prof)); - idalloc(obj->ptr); - quarantine->curbytes -= obj->usize; - quarantine->curobjs--; - quarantine->first = (quarantine->first + 1) & ((ZU(1) << - quarantine->lg_maxobjs) - 1); -} - -static void -quarantine_drain(quarantine_t *quarantine, size_t upper_bound) -{ - - while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) - quarantine_drain_one(quarantine); -} - -void -quarantine(void *ptr) -{ - quarantine_t *quarantine; - size_t usize = isalloc(ptr, config_prof); - - cassert(config_fill); - assert(opt_quarantine); - - quarantine = *quarantine_tsd_get(); - if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) { - if (quarantine == QUARANTINE_STATE_PURGATORY) { - /* - * Make a note that quarantine() was called after - * quarantine_cleanup() was called. - */ - quarantine = QUARANTINE_STATE_REINCARNATED; - quarantine_tsd_set(&quarantine); - } - idalloc(ptr); - return; - } - /* - * Drain one or more objects if the quarantine size limit would be - * exceeded by appending ptr. - */ - if (quarantine->curbytes + usize > opt_quarantine) { - size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine - - usize : 0; - quarantine_drain(quarantine, upper_bound); - } - /* Grow the quarantine ring buffer if it's full. */ - if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs)) - quarantine = quarantine_grow(quarantine); - /* quarantine_grow() must free a slot if it fails to grow. */ - assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs)); - /* Append ptr if its size doesn't exceed the quarantine size. */ - if (quarantine->curbytes + usize <= opt_quarantine) { - size_t offset = (quarantine->first + quarantine->curobjs) & - ((ZU(1) << quarantine->lg_maxobjs) - 1); - quarantine_obj_t *obj = &quarantine->objs[offset]; - obj->ptr = ptr; - obj->usize = usize; - quarantine->curbytes += usize; - quarantine->curobjs++; - if (opt_junk) - memset(ptr, 0x5a, usize); - } else { - assert(quarantine->curbytes == 0); - idalloc(ptr); - } -} - -void -quarantine_cleanup(void *arg) -{ - quarantine_t *quarantine = *(quarantine_t **)arg; - - if (quarantine == QUARANTINE_STATE_REINCARNATED) { - /* - * Another destructor deallocated memory after this destructor - * was called. Reset quarantine to QUARANTINE_STATE_PURGATORY - * in order to receive another callback. - */ - quarantine = QUARANTINE_STATE_PURGATORY; - quarantine_tsd_set(&quarantine); - } else if (quarantine == QUARANTINE_STATE_PURGATORY) { - /* - * The previous time this destructor was called, we set the key - * to QUARANTINE_STATE_PURGATORY so that other destructors - * wouldn't cause re-creation of the quarantine. This time, do - * nothing, so that the destructor will not be called again. - */ - } else if (quarantine != NULL) { - quarantine_drain(quarantine, 0); - idalloc(quarantine); - quarantine = QUARANTINE_STATE_PURGATORY; - quarantine_tsd_set(&quarantine); - } -} - -bool -quarantine_boot(void) -{ - - cassert(config_fill); - - if (quarantine_tsd_boot()) - return (true); - - return (false); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/rtree.c mariadb-5.5-5.5.40/extra/jemalloc/src/rtree.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/rtree.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/rtree.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,67 +0,0 @@ -#define JEMALLOC_RTREE_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -rtree_t * -rtree_new(unsigned bits) -{ - rtree_t *ret; - unsigned bits_per_level, height, i; - - bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; - height = bits / bits_per_level; - if (height * bits_per_level != bits) - height++; - assert(height * bits_per_level >= bits); - - ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) + - (sizeof(unsigned) * height)); - if (ret == NULL) - return (NULL); - memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * - height)); - - if (malloc_mutex_init(&ret->mutex)) { - /* Leak the rtree. */ - return (NULL); - } - ret->height = height; - if (bits_per_level * height > bits) - ret->level2bits[0] = bits % bits_per_level; - else - ret->level2bits[0] = bits_per_level; - for (i = 1; i < height; i++) - ret->level2bits[i] = bits_per_level; - - ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]); - if (ret->root == NULL) { - /* - * We leak the rtree here, since there's no generic base - * deallocation. - */ - return (NULL); - } - memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); - - return (ret); -} - -void -rtree_prefork(rtree_t *rtree) -{ - - malloc_mutex_prefork(&rtree->mutex); -} - -void -rtree_postfork_parent(rtree_t *rtree) -{ - - malloc_mutex_postfork_parent(&rtree->mutex); -} - -void -rtree_postfork_child(rtree_t *rtree) -{ - - malloc_mutex_postfork_child(&rtree->mutex); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/stats.c mariadb-5.5-5.5.40/extra/jemalloc/src/stats.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/stats.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/stats.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,549 +0,0 @@ -#define JEMALLOC_STATS_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -#define CTL_GET(n, v, t) do { \ - size_t sz = sizeof(t); \ - xmallctl(n, v, &sz, NULL, 0); \ -} while (0) - -#define CTL_I_GET(n, v, t) do { \ - size_t mib[6]; \ - size_t miblen = sizeof(mib) / sizeof(size_t); \ - size_t sz = sizeof(t); \ - xmallctlnametomib(n, mib, &miblen); \ - mib[2] = i; \ - xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ -} while (0) - -#define CTL_J_GET(n, v, t) do { \ - size_t mib[6]; \ - size_t miblen = sizeof(mib) / sizeof(size_t); \ - size_t sz = sizeof(t); \ - xmallctlnametomib(n, mib, &miblen); \ - mib[2] = j; \ - xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ -} while (0) - -#define CTL_IJ_GET(n, v, t) do { \ - size_t mib[6]; \ - size_t miblen = sizeof(mib) / sizeof(size_t); \ - size_t sz = sizeof(t); \ - xmallctlnametomib(n, mib, &miblen); \ - mib[2] = i; \ - mib[4] = j; \ - xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ -} while (0) - -/******************************************************************************/ -/* Data. */ - -bool opt_stats_print = false; - -size_t stats_cactive = 0; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void stats_arena_bins_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -static void stats_arena_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i, bool bins, bool large); - -/******************************************************************************/ - -static void -stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) -{ - size_t page; - bool config_tcache; - unsigned nbins, j, gap_start; - - CTL_GET("arenas.page", &page, size_t); - - CTL_GET("config.tcache", &config_tcache, bool); - if (config_tcache) { - malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" - " ndalloc nrequests nfills nflushes" - " newruns reruns curruns\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" - " ndalloc newruns reruns curruns\n"); - } - CTL_GET("arenas.nbins", &nbins, unsigned); - for (j = 0, gap_start = UINT_MAX; j < nbins; j++) { - uint64_t nruns; - - CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t); - if (nruns == 0) { - if (gap_start == UINT_MAX) - gap_start = j; - } else { - size_t reg_size, run_size, allocated; - uint32_t nregs; - uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; - uint64_t reruns; - size_t curruns; - - if (gap_start != UINT_MAX) { - if (j > gap_start + 1) { - /* Gap of more than one size class. */ - malloc_cprintf(write_cb, cbopaque, - "[%u..%u]\n", gap_start, - j - 1); - } else { - /* Gap of one size class. */ - malloc_cprintf(write_cb, cbopaque, - "[%u]\n", gap_start); - } - gap_start = UINT_MAX; - } - CTL_J_GET("arenas.bin.0.size", ®_size, size_t); - CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); - CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.bins.0.allocated", - &allocated, size_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc", - &nmalloc, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc", - &ndalloc, uint64_t); - if (config_tcache) { - CTL_IJ_GET("stats.arenas.0.bins.0.nrequests", - &nrequests, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nfills", - &nfills, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nflushes", - &nflushes, uint64_t); - } - CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns, - uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns, - size_t); - if (config_tcache) { - malloc_cprintf(write_cb, cbopaque, - "%13u %5zu %4u %3zu %12zu %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu\n", - j, reg_size, nregs, run_size / page, - allocated, nmalloc, ndalloc, nrequests, - nfills, nflushes, nruns, reruns, curruns); - } else { - malloc_cprintf(write_cb, cbopaque, - "%13u %5zu %4u %3zu %12zu %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu\n", - j, reg_size, nregs, run_size / page, - allocated, nmalloc, ndalloc, nruns, reruns, - curruns); - } - } - } - if (gap_start != UINT_MAX) { - if (j > gap_start + 1) { - /* Gap of more than one size class. */ - malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n", - gap_start, j - 1); - } else { - /* Gap of one size class. */ - malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start); - } - } -} - -static void -stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) -{ - size_t page, nlruns, j; - ssize_t gap_start; - - CTL_GET("arenas.page", &page, size_t); - - malloc_cprintf(write_cb, cbopaque, - "large: size pages nmalloc ndalloc nrequests" - " curruns\n"); - CTL_GET("arenas.nlruns", &nlruns, size_t); - for (j = 0, gap_start = -1; j < nlruns; j++) { - uint64_t nmalloc, ndalloc, nrequests; - size_t run_size, curruns; - - CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc, - uint64_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc, - uint64_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests, - uint64_t); - if (nrequests == 0) { - if (gap_start == -1) - gap_start = j; - } else { - CTL_J_GET("arenas.lrun.0.size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns, - size_t); - if (gap_start != -1) { - malloc_cprintf(write_cb, cbopaque, "[%zu]\n", - j - gap_start); - gap_start = -1; - } - malloc_cprintf(write_cb, cbopaque, - "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu\n", - run_size, run_size / page, nmalloc, ndalloc, - nrequests, curruns); - } - } - if (gap_start != -1) - malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start); -} - -static void -stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i, bool bins, bool large) -{ - unsigned nthreads; - const char *dss; - size_t page, pactive, pdirty, mapped; - uint64_t npurge, nmadvise, purged; - size_t small_allocated; - uint64_t small_nmalloc, small_ndalloc, small_nrequests; - size_t large_allocated; - uint64_t large_nmalloc, large_ndalloc, large_nrequests; - - CTL_GET("arenas.page", &page, size_t); - - CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); - CTL_I_GET("stats.arenas.0.dss", &dss, const char *); - malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", - dss); - CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); - CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); - CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); - CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t); - CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s," - " %"PRIu64" madvise%s, %"PRIu64" purged\n", - pactive, pdirty, npurge, npurge == 1 ? "" : "s", - nmadvise, nmadvise == 1 ? "" : "s", purged); - - malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc ndalloc nrequests\n"); - CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t); - CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t); - CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t); - CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", - small_allocated, small_nmalloc, small_ndalloc, small_nrequests); - CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t); - CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t); - CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t); - CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", - large_allocated, large_nmalloc, large_ndalloc, large_nrequests); - malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", - small_allocated + large_allocated, - small_nmalloc + large_nmalloc, - small_ndalloc + large_ndalloc, - small_nrequests + large_nrequests); - malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page); - CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); - malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); - - if (bins) - stats_arena_bins_print(write_cb, cbopaque, i); - if (large) - stats_arena_lruns_print(write_cb, cbopaque, i); -} - -void -stats_print(void (*write_cb)(void *, const char *), void *cbopaque, - const char *opts) -{ - int err; - uint64_t epoch; - size_t u64sz; - bool general = true; - bool merged = true; - bool unmerged = true; - bool bins = true; - bool large = true; - - /* - * Refresh stats, in case mallctl() was called by the application. - * - * Check for OOM here, since refreshing the ctl cache can trigger - * allocation. In practice, none of the subsequent mallctl()-related - * calls in this function will cause OOM if this one succeeds. - * */ - epoch = 1; - u64sz = sizeof(uint64_t); - err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t)); - if (err != 0) { - if (err == EAGAIN) { - malloc_write(": Memory allocation failure in " - "mallctl(\"epoch\", ...)\n"); - return; - } - malloc_write(": Failure in mallctl(\"epoch\", " - "...)\n"); - abort(); - } - - if (opts != NULL) { - unsigned i; - - for (i = 0; opts[i] != '\0'; i++) { - switch (opts[i]) { - case 'g': - general = false; - break; - case 'm': - merged = false; - break; - case 'a': - unmerged = false; - break; - case 'b': - bins = false; - break; - case 'l': - large = false; - break; - default:; - } - } - } - - malloc_cprintf(write_cb, cbopaque, - "___ Begin jemalloc statistics ___\n"); - if (general) { - int err; - const char *cpv; - bool bv; - unsigned uv; - ssize_t ssv; - size_t sv, bsz, ssz, sssz, cpsz; - - bsz = sizeof(bool); - ssz = sizeof(size_t); - sssz = sizeof(ssize_t); - cpsz = sizeof(const char *); - - CTL_GET("version", &cpv, const char *); - malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); - CTL_GET("config.debug", &bv, bool); - malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", - bv ? "enabled" : "disabled"); - -#define OPT_WRITE_BOOL(n) \ - if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ - == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s\n", bv ? "true" : "false"); \ - } -#define OPT_WRITE_SIZE_T(n) \ - if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ - == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zu\n", sv); \ - } -#define OPT_WRITE_SSIZE_T(n) \ - if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ - == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd\n", ssv); \ - } -#define OPT_WRITE_CHAR_P(n) \ - if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ - == 0) { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": \"%s\"\n", cpv); \ - } - - malloc_cprintf(write_cb, cbopaque, - "Run-time option settings:\n"); - OPT_WRITE_BOOL(abort) - OPT_WRITE_SIZE_T(lg_chunk) - OPT_WRITE_CHAR_P(dss) - OPT_WRITE_SIZE_T(narenas) - OPT_WRITE_SSIZE_T(lg_dirty_mult) - OPT_WRITE_BOOL(stats_print) - OPT_WRITE_BOOL(junk) - OPT_WRITE_SIZE_T(quarantine) - OPT_WRITE_BOOL(redzone) - OPT_WRITE_BOOL(zero) - OPT_WRITE_BOOL(utrace) - OPT_WRITE_BOOL(valgrind) - OPT_WRITE_BOOL(xmalloc) - OPT_WRITE_BOOL(tcache) - OPT_WRITE_SSIZE_T(lg_tcache_max) - OPT_WRITE_BOOL(prof) - OPT_WRITE_CHAR_P(prof_prefix) - OPT_WRITE_BOOL(prof_active) - OPT_WRITE_SSIZE_T(lg_prof_sample) - OPT_WRITE_BOOL(prof_accum) - OPT_WRITE_SSIZE_T(lg_prof_interval) - OPT_WRITE_BOOL(prof_gdump) - OPT_WRITE_BOOL(prof_final) - OPT_WRITE_BOOL(prof_leak) - -#undef OPT_WRITE_BOOL -#undef OPT_WRITE_SIZE_T -#undef OPT_WRITE_SSIZE_T -#undef OPT_WRITE_CHAR_P - - malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); - - CTL_GET("arenas.narenas", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); - - malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", - sizeof(void *)); - - CTL_GET("arenas.quantum", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); - - CTL_GET("arenas.page", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); - - CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); - if (ssv >= 0) { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: %u:1\n", - (1U << ssv)); - } else { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: N/A\n"); - } - if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) - == 0) { - malloc_cprintf(write_cb, cbopaque, - "Maximum thread-cached size class: %zu\n", sv); - } - if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && - bv) { - CTL_GET("opt.lg_prof_sample", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "Average profile sample interval: %"PRIu64 - " (2^%zu)\n", (((uint64_t)1U) << sv), sv); - - CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); - if (ssv >= 0) { - malloc_cprintf(write_cb, cbopaque, - "Average profile dump interval: %"PRIu64 - " (2^%zd)\n", - (((uint64_t)1U) << ssv), ssv); - } else { - malloc_cprintf(write_cb, cbopaque, - "Average profile dump interval: N/A\n"); - } - } - CTL_GET("opt.lg_chunk", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n", - (ZU(1) << sv), sv); - } - - if (config_stats) { - size_t *cactive; - size_t allocated, active, mapped; - size_t chunks_current, chunks_high; - uint64_t chunks_total; - size_t huge_allocated; - uint64_t huge_nmalloc, huge_ndalloc; - - CTL_GET("stats.cactive", &cactive, size_t *); - CTL_GET("stats.allocated", &allocated, size_t); - CTL_GET("stats.active", &active, size_t); - CTL_GET("stats.mapped", &mapped, size_t); - malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, mapped: %zu\n", - allocated, active, mapped); - malloc_cprintf(write_cb, cbopaque, - "Current active ceiling: %zu\n", atomic_read_z(cactive)); - - /* Print chunk stats. */ - CTL_GET("stats.chunks.total", &chunks_total, uint64_t); - CTL_GET("stats.chunks.high", &chunks_high, size_t); - CTL_GET("stats.chunks.current", &chunks_current, size_t); - malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " - "highchunks curchunks\n"); - malloc_cprintf(write_cb, cbopaque, - " %13"PRIu64" %12zu %12zu\n", - chunks_total, chunks_high, chunks_current); - - /* Print huge stats. */ - CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t); - CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t); - CTL_GET("stats.huge.allocated", &huge_allocated, size_t); - malloc_cprintf(write_cb, cbopaque, - "huge: nmalloc ndalloc allocated\n"); - malloc_cprintf(write_cb, cbopaque, - " %12"PRIu64" %12"PRIu64" %12zu\n", - huge_nmalloc, huge_ndalloc, huge_allocated); - - if (merged) { - unsigned narenas; - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - VARIABLE_ARRAY(bool, initialized, narenas); - size_t isz; - unsigned i, ninitialized; - - isz = sizeof(bool) * narenas; - xmallctl("arenas.initialized", initialized, - &isz, NULL, 0); - for (i = ninitialized = 0; i < narenas; i++) { - if (initialized[i]) - ninitialized++; - } - - if (ninitialized > 1 || unmerged == false) { - /* Print merged arena stats. */ - malloc_cprintf(write_cb, cbopaque, - "\nMerged arenas stats:\n"); - stats_arena_print(write_cb, cbopaque, - narenas, bins, large); - } - } - } - - if (unmerged) { - unsigned narenas; - - /* Print stats for each arena. */ - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - VARIABLE_ARRAY(bool, initialized, narenas); - size_t isz; - unsigned i; - - isz = sizeof(bool) * narenas; - xmallctl("arenas.initialized", initialized, - &isz, NULL, 0); - - for (i = 0; i < narenas; i++) { - if (initialized[i]) { - malloc_cprintf(write_cb, - cbopaque, - "\narenas[%u]:\n", i); - stats_arena_print(write_cb, - cbopaque, i, bins, large); - } - } - } - } - } - malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n"); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/tcache.c mariadb-5.5-5.5.40/extra/jemalloc/src/tcache.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/tcache.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/tcache.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,476 +0,0 @@ -#define JEMALLOC_TCACHE_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -malloc_tsd_data(, tcache, tcache_t *, NULL) -malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default) - -bool opt_tcache = true; -ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; - -tcache_bin_info_t *tcache_bin_info; -static unsigned stack_nelms; /* Total stack elms per tcache. */ - -size_t nhbins; -size_t tcache_maxclass; - -/******************************************************************************/ - -size_t tcache_salloc(const void *ptr) -{ - - return (arena_salloc(ptr, false)); -} - -void -tcache_event_hard(tcache_t *tcache) -{ - size_t binind = tcache->next_gc_bin; - tcache_bin_t *tbin = &tcache->tbins[binind]; - tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; - - if (tbin->low_water > 0) { - /* - * Flush (ceiling) 3/4 of the objects below the low water mark. - */ - if (binind < NBINS) { - tcache_bin_flush_small(tbin, binind, tbin->ncached - - tbin->low_water + (tbin->low_water >> 2), tcache); - } else { - tcache_bin_flush_large(tbin, binind, tbin->ncached - - tbin->low_water + (tbin->low_water >> 2), tcache); - } - /* - * Reduce fill count by 2X. Limit lg_fill_div such that the - * fill count is always at least 1. - */ - if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1) - tbin->lg_fill_div++; - } else if (tbin->low_water < 0) { - /* - * Increase fill count by 2X. Make sure lg_fill_div stays - * greater than 0. - */ - if (tbin->lg_fill_div > 1) - tbin->lg_fill_div--; - } - tbin->low_water = tbin->ncached; - - tcache->next_gc_bin++; - if (tcache->next_gc_bin == nhbins) - tcache->next_gc_bin = 0; - tcache->ev_cnt = 0; -} - -void * -tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) -{ - void *ret; - - arena_tcache_fill_small(tcache->arena, tbin, binind, - config_prof ? tcache->prof_accumbytes : 0); - if (config_prof) - tcache->prof_accumbytes = 0; - ret = tcache_alloc_easy(tbin); - - return (ret); -} - -void -tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, - tcache_t *tcache) -{ - void *ptr; - unsigned i, nflush, ndeferred; - bool merged_stats = false; - - assert(binind < NBINS); - assert(rem <= tbin->ncached); - - for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { - /* Lock the arena bin associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); - arena_t *arena = chunk->arena; - arena_bin_t *bin = &arena->bins[binind]; - - if (config_prof && arena == tcache->arena) { - if (arena_prof_accum(arena, tcache->prof_accumbytes)) - prof_idump(); - tcache->prof_accumbytes = 0; - } - - malloc_mutex_lock(&bin->lock); - if (config_stats && arena == tcache->arena) { - assert(merged_stats == false); - merged_stats = true; - bin->stats.nflushes++; - bin->stats.nrequests += tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - } - ndeferred = 0; - for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; - assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) { - size_t pageind = ((uintptr_t)ptr - - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = - arena_mapp_get(chunk, pageind); - if (config_fill && opt_junk) { - arena_alloc_junk_small(ptr, - &arena_bin_info[binind], true); - } - arena_dalloc_bin_locked(arena, chunk, ptr, - mapelm); - } else { - /* - * This object was allocated via a different - * arena bin than the one that is currently - * locked. Stash the object, so that it can be - * handled in a future pass. - */ - tbin->avail[ndeferred] = ptr; - ndeferred++; - } - } - malloc_mutex_unlock(&bin->lock); - } - if (config_stats && merged_stats == false) { - /* - * The flush loop didn't happen to flush to this thread's - * arena, so the stats didn't get merged. Manually do so now. - */ - arena_bin_t *bin = &tcache->arena->bins[binind]; - malloc_mutex_lock(&bin->lock); - bin->stats.nflushes++; - bin->stats.nrequests += tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - malloc_mutex_unlock(&bin->lock); - } - - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); - tbin->ncached = rem; - if ((int)tbin->ncached < tbin->low_water) - tbin->low_water = tbin->ncached; -} - -void -tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, - tcache_t *tcache) -{ - void *ptr; - unsigned i, nflush, ndeferred; - bool merged_stats = false; - - assert(binind < nhbins); - assert(rem <= tbin->ncached); - - for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { - /* Lock the arena associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); - arena_t *arena = chunk->arena; - UNUSED bool idump; - - if (config_prof) - idump = false; - malloc_mutex_lock(&arena->lock); - if ((config_prof || config_stats) && arena == tcache->arena) { - if (config_prof) { - idump = arena_prof_accum_locked(arena, - tcache->prof_accumbytes); - tcache->prof_accumbytes = 0; - } - if (config_stats) { - merged_stats = true; - arena->stats.nrequests_large += - tbin->tstats.nrequests; - arena->stats.lstats[binind - NBINS].nrequests += - tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - } - } - ndeferred = 0; - for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; - assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) - arena_dalloc_large_locked(arena, chunk, ptr); - else { - /* - * This object was allocated via a different - * arena than the one that is currently locked. - * Stash the object, so that it can be handled - * in a future pass. - */ - tbin->avail[ndeferred] = ptr; - ndeferred++; - } - } - malloc_mutex_unlock(&arena->lock); - if (config_prof && idump) - prof_idump(); - } - if (config_stats && merged_stats == false) { - /* - * The flush loop didn't happen to flush to this thread's - * arena, so the stats didn't get merged. Manually do so now. - */ - arena_t *arena = tcache->arena; - malloc_mutex_lock(&arena->lock); - arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - NBINS].nrequests += - tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - malloc_mutex_unlock(&arena->lock); - } - - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); - tbin->ncached = rem; - if ((int)tbin->ncached < tbin->low_water) - tbin->low_water = tbin->ncached; -} - -void -tcache_arena_associate(tcache_t *tcache, arena_t *arena) -{ - - if (config_stats) { - /* Link into list of extant tcaches. */ - malloc_mutex_lock(&arena->lock); - ql_elm_new(tcache, link); - ql_tail_insert(&arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&arena->lock); - } - tcache->arena = arena; -} - -void -tcache_arena_dissociate(tcache_t *tcache) -{ - - if (config_stats) { - /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(&tcache->arena->lock); - ql_remove(&tcache->arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&tcache->arena->lock); - tcache_stats_merge(tcache, tcache->arena); - } -} - -tcache_t * -tcache_create(arena_t *arena) -{ - tcache_t *tcache; - size_t size, stack_offset; - unsigned i; - - size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); - /* Naturally align the pointer stacks. */ - size = PTR_CEILING(size); - stack_offset = size; - size += stack_nelms * sizeof(void *); - /* - * Round up to the nearest multiple of the cacheline size, in order to - * avoid the possibility of false cacheline sharing. - * - * That this works relies on the same logic as in ipalloc(), but we - * cannot directly call ipalloc() here due to tcache bootstrapping - * issues. - */ - size = (size + CACHELINE_MASK) & (-CACHELINE); - - if (size <= SMALL_MAXCLASS) - tcache = (tcache_t *)arena_malloc_small(arena, size, true); - else if (size <= tcache_maxclass) - tcache = (tcache_t *)arena_malloc_large(arena, size, true); - else - tcache = (tcache_t *)icallocx(size, false, arena); - - if (tcache == NULL) - return (NULL); - - tcache_arena_associate(tcache, arena); - - assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); - for (i = 0; i < nhbins; i++) { - tcache->tbins[i].lg_fill_div = 1; - tcache->tbins[i].avail = (void **)((uintptr_t)tcache + - (uintptr_t)stack_offset); - stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); - } - - tcache_tsd_set(&tcache); - - return (tcache); -} - -void -tcache_destroy(tcache_t *tcache) -{ - unsigned i; - size_t tcache_size; - - tcache_arena_dissociate(tcache); - - for (i = 0; i < NBINS; i++) { - tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_small(tbin, i, 0, tcache); - - if (config_stats && tbin->tstats.nrequests != 0) { - arena_t *arena = tcache->arena; - arena_bin_t *bin = &arena->bins[i]; - malloc_mutex_lock(&bin->lock); - bin->stats.nrequests += tbin->tstats.nrequests; - malloc_mutex_unlock(&bin->lock); - } - } - - for (; i < nhbins; i++) { - tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_large(tbin, i, 0, tcache); - - if (config_stats && tbin->tstats.nrequests != 0) { - arena_t *arena = tcache->arena; - malloc_mutex_lock(&arena->lock); - arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[i - NBINS].nrequests += - tbin->tstats.nrequests; - malloc_mutex_unlock(&arena->lock); - } - } - - if (config_prof && tcache->prof_accumbytes > 0 && - arena_prof_accum(tcache->arena, tcache->prof_accumbytes)) - prof_idump(); - - tcache_size = arena_salloc(tcache, false); - if (tcache_size <= SMALL_MAXCLASS) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); - arena_t *arena = chunk->arena; - size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> - LG_PAGE; - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); - - arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm); - } else if (tcache_size <= tcache_maxclass) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); - arena_t *arena = chunk->arena; - - arena_dalloc_large(arena, chunk, tcache); - } else - idallocx(tcache, false); -} - -void -tcache_thread_cleanup(void *arg) -{ - tcache_t *tcache = *(tcache_t **)arg; - - if (tcache == TCACHE_STATE_DISABLED) { - /* Do nothing. */ - } else if (tcache == TCACHE_STATE_REINCARNATED) { - /* - * Another destructor called an allocator function after this - * destructor was called. Reset tcache to - * TCACHE_STATE_PURGATORY in order to receive another callback. - */ - tcache = TCACHE_STATE_PURGATORY; - tcache_tsd_set(&tcache); - } else if (tcache == TCACHE_STATE_PURGATORY) { - /* - * The previous time this destructor was called, we set the key - * to TCACHE_STATE_PURGATORY so that other destructors wouldn't - * cause re-creation of the tcache. This time, do nothing, so - * that the destructor will not be called again. - */ - } else if (tcache != NULL) { - assert(tcache != TCACHE_STATE_PURGATORY); - tcache_destroy(tcache); - tcache = TCACHE_STATE_PURGATORY; - tcache_tsd_set(&tcache); - } -} - -void -tcache_stats_merge(tcache_t *tcache, arena_t *arena) -{ - unsigned i; - - /* Merge and reset tcache stats. */ - for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - tcache_bin_t *tbin = &tcache->tbins[i]; - malloc_mutex_lock(&bin->lock); - bin->stats.nrequests += tbin->tstats.nrequests; - malloc_mutex_unlock(&bin->lock); - tbin->tstats.nrequests = 0; - } - - for (; i < nhbins; i++) { - malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS]; - tcache_bin_t *tbin = &tcache->tbins[i]; - arena->stats.nrequests_large += tbin->tstats.nrequests; - lstats->nrequests += tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - } -} - -bool -tcache_boot0(void) -{ - unsigned i; - - /* - * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is - * known. - */ - if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS) - tcache_maxclass = SMALL_MAXCLASS; - else if ((1U << opt_lg_tcache_max) > arena_maxclass) - tcache_maxclass = arena_maxclass; - else - tcache_maxclass = (1U << opt_lg_tcache_max); - - nhbins = NBINS + (tcache_maxclass >> LG_PAGE); - - /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * - sizeof(tcache_bin_info_t)); - if (tcache_bin_info == NULL) - return (true); - stack_nelms = 0; - for (i = 0; i < NBINS; i++) { - if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { - tcache_bin_info[i].ncached_max = - (arena_bin_info[i].nregs << 1); - } else { - tcache_bin_info[i].ncached_max = - TCACHE_NSLOTS_SMALL_MAX; - } - stack_nelms += tcache_bin_info[i].ncached_max; - } - for (; i < nhbins; i++) { - tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; - stack_nelms += tcache_bin_info[i].ncached_max; - } - - return (false); -} - -bool -tcache_boot1(void) -{ - - if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) - return (true); - - return (false); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/tsd.c mariadb-5.5-5.5.40/extra/jemalloc/src/tsd.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/tsd.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/tsd.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,107 +0,0 @@ -#define JEMALLOC_TSD_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -static unsigned ncleanups; -static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; - -/******************************************************************************/ - -void * -malloc_tsd_malloc(size_t size) -{ - - /* Avoid choose_arena() in order to dodge bootstrapping issues. */ - return (arena_malloc(arenas[0], size, false, false)); -} - -void -malloc_tsd_dalloc(void *wrapper) -{ - - idalloc(wrapper); -} - -void -malloc_tsd_no_cleanup(void *arg) -{ - - not_reached(); -} - -#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) -#ifndef _WIN32 -JEMALLOC_EXPORT -#endif -void -_malloc_thread_cleanup(void) -{ - bool pending[MALLOC_TSD_CLEANUPS_MAX], again; - unsigned i; - - for (i = 0; i < ncleanups; i++) - pending[i] = true; - - do { - again = false; - for (i = 0; i < ncleanups; i++) { - if (pending[i]) { - pending[i] = cleanups[i](); - if (pending[i]) - again = true; - } - } - } while (again); -} -#endif - -void -malloc_tsd_cleanup_register(bool (*f)(void)) -{ - - assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); - cleanups[ncleanups] = f; - ncleanups++; -} - -void -malloc_tsd_boot(void) -{ - - ncleanups = 0; -} - -#ifdef _WIN32 -static BOOL WINAPI -_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) -{ - - switch (fdwReason) { -#ifdef JEMALLOC_LAZY_LOCK - case DLL_THREAD_ATTACH: - isthreaded = true; - break; -#endif - case DLL_THREAD_DETACH: - _malloc_thread_cleanup(); - break; - default: - break; - } - return (true); -} - -#ifdef _MSC_VER -# ifdef _M_IX86 -# pragma comment(linker, "/INCLUDE:__tls_used") -# else -# pragma comment(linker, "/INCLUDE:_tls_used") -# endif -# pragma section(".CRT$XLY",long,read) -#endif -JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) -static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, - DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; -#endif diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/util.c mariadb-5.5-5.5.40/extra/jemalloc/src/util.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/util.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/util.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,641 +0,0 @@ -#define assert(e) do { \ - if (config_debug && !(e)) { \ - malloc_write(": Failed assertion\n"); \ - abort(); \ - } \ -} while (0) - -#define not_reached() do { \ - if (config_debug) { \ - malloc_write(": Unreachable code reached\n"); \ - abort(); \ - } \ -} while (0) - -#define not_implemented() do { \ - if (config_debug) { \ - malloc_write(": Not implemented\n"); \ - abort(); \ - } \ -} while (0) - -#define JEMALLOC_UTIL_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void wrtmessage(void *cbopaque, const char *s); -#define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1) -static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s, - size_t *slen_p); -#define D2S_BUFSIZE (1 + U2S_BUFSIZE) -static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p); -#define O2S_BUFSIZE (1 + U2S_BUFSIZE) -static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p); -#define X2S_BUFSIZE (2 + U2S_BUFSIZE) -static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, - size_t *slen_p); - -/******************************************************************************/ - -/* malloc_message() setup. */ -static void -wrtmessage(void *cbopaque, const char *s) -{ - -#ifdef SYS_write - /* - * Use syscall(2) rather than write(2) when possible in order to avoid - * the possibility of memory allocation within libc. This is necessary - * on FreeBSD; most operating systems do not have this problem though. - */ - UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); -#else - UNUSED int result = write(STDERR_FILENO, s, strlen(s)); -#endif -} - -JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s); - -/* - * Wrapper around malloc_message() that avoids the need for - * je_malloc_message(...) throughout the code. - */ -void -malloc_write(const char *s) -{ - - if (je_malloc_message != NULL) - je_malloc_message(NULL, s); - else - wrtmessage(NULL, s); -} - -/* - * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so - * provide a wrapper. - */ -int -buferror(char *buf, size_t buflen) -{ - -#ifdef _WIN32 - FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0, - (LPSTR)buf, buflen, NULL); - return (0); -#elif defined(_GNU_SOURCE) - char *b = strerror_r(errno, buf, buflen); - if (b != buf) { - strncpy(buf, b, buflen); - buf[buflen-1] = '\0'; - } - return (0); -#else - return (strerror_r(errno, buf, buflen)); -#endif -} - -uintmax_t -malloc_strtoumax(const char *nptr, char **endptr, int base) -{ - uintmax_t ret, digit; - int b; - bool neg; - const char *p, *ns; - - if (base < 0 || base == 1 || base > 36) { - set_errno(EINVAL); - return (UINTMAX_MAX); - } - b = base; - - /* Swallow leading whitespace and get sign, if any. */ - neg = false; - p = nptr; - while (true) { - switch (*p) { - case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': - p++; - break; - case '-': - neg = true; - /* Fall through. */ - case '+': - p++; - /* Fall through. */ - default: - goto label_prefix; - } - } - - /* Get prefix, if any. */ - label_prefix: - /* - * Note where the first non-whitespace/sign character is so that it is - * possible to tell whether any digits are consumed (e.g., " 0" vs. - * " -x"). - */ - ns = p; - if (*p == '0') { - switch (p[1]) { - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': - if (b == 0) - b = 8; - if (b == 8) - p++; - break; - case 'x': - switch (p[2]) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - if (b == 0) - b = 16; - if (b == 16) - p += 2; - break; - default: - break; - } - break; - default: - break; - } - } - if (b == 0) - b = 10; - - /* Convert. */ - ret = 0; - while ((*p >= '0' && *p <= '9' && (digit = *p - '0') < b) - || (*p >= 'A' && *p <= 'Z' && (digit = 10 + *p - 'A') < b) - || (*p >= 'a' && *p <= 'z' && (digit = 10 + *p - 'a') < b)) { - uintmax_t pret = ret; - ret *= b; - ret += digit; - if (ret < pret) { - /* Overflow. */ - set_errno(ERANGE); - return (UINTMAX_MAX); - } - p++; - } - if (neg) - ret = -ret; - - if (endptr != NULL) { - if (p == ns) { - /* No characters were converted. */ - *endptr = (char *)nptr; - } else - *endptr = (char *)p; - } - - return (ret); -} - -static char * -u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) -{ - unsigned i; - - i = U2S_BUFSIZE - 1; - s[i] = '\0'; - switch (base) { - case 10: - do { - i--; - s[i] = "0123456789"[x % (uint64_t)10]; - x /= (uint64_t)10; - } while (x > 0); - break; - case 16: { - const char *digits = (uppercase) - ? "0123456789ABCDEF" - : "0123456789abcdef"; - - do { - i--; - s[i] = digits[x & 0xf]; - x >>= 4; - } while (x > 0); - break; - } default: { - const char *digits = (uppercase) - ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" - : "0123456789abcdefghijklmnopqrstuvwxyz"; - - assert(base >= 2 && base <= 36); - do { - i--; - s[i] = digits[x % (uint64_t)base]; - x /= (uint64_t)base; - } while (x > 0); - }} - - *slen_p = U2S_BUFSIZE - 1 - i; - return (&s[i]); -} - -static char * -d2s(intmax_t x, char sign, char *s, size_t *slen_p) -{ - bool neg; - - if ((neg = (x < 0))) - x = -x; - s = u2s(x, 10, false, s, slen_p); - if (neg) - sign = '-'; - switch (sign) { - case '-': - if (neg == false) - break; - /* Fall through. */ - case ' ': - case '+': - s--; - (*slen_p)++; - *s = sign; - break; - default: not_reached(); - } - return (s); -} - -static char * -o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p) -{ - - s = u2s(x, 8, false, s, slen_p); - if (alt_form && *s != '0') { - s--; - (*slen_p)++; - *s = '0'; - } - return (s); -} - -static char * -x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) -{ - - s = u2s(x, 16, uppercase, s, slen_p); - if (alt_form) { - s -= 2; - (*slen_p) += 2; - memcpy(s, uppercase ? "0X" : "0x", 2); - } - return (s); -} - -int -malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) -{ - int ret; - size_t i; - const char *f; - -#define APPEND_C(c) do { \ - if (i < size) \ - str[i] = (c); \ - i++; \ -} while (0) -#define APPEND_S(s, slen) do { \ - if (i < size) { \ - size_t cpylen = (slen <= size - i) ? slen : size - i; \ - memcpy(&str[i], s, cpylen); \ - } \ - i += slen; \ -} while (0) -#define APPEND_PADDED_S(s, slen, width, left_justify) do { \ - /* Left padding. */ \ - size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ? \ - (size_t)width - slen : 0); \ - if (left_justify == false && pad_len != 0) { \ - size_t j; \ - for (j = 0; j < pad_len; j++) \ - APPEND_C(' '); \ - } \ - /* Value. */ \ - APPEND_S(s, slen); \ - /* Right padding. */ \ - if (left_justify && pad_len != 0) { \ - size_t j; \ - for (j = 0; j < pad_len; j++) \ - APPEND_C(' '); \ - } \ -} while (0) -#define GET_ARG_NUMERIC(val, len) do { \ - switch (len) { \ - case '?': \ - val = va_arg(ap, int); \ - break; \ - case '?' | 0x80: \ - val = va_arg(ap, unsigned int); \ - break; \ - case 'l': \ - val = va_arg(ap, long); \ - break; \ - case 'l' | 0x80: \ - val = va_arg(ap, unsigned long); \ - break; \ - case 'q': \ - val = va_arg(ap, long long); \ - break; \ - case 'q' | 0x80: \ - val = va_arg(ap, unsigned long long); \ - break; \ - case 'j': \ - val = va_arg(ap, intmax_t); \ - break; \ - case 't': \ - val = va_arg(ap, ptrdiff_t); \ - break; \ - case 'z': \ - val = va_arg(ap, ssize_t); \ - break; \ - case 'z' | 0x80: \ - val = va_arg(ap, size_t); \ - break; \ - case 'p': /* Synthetic; used for %p. */ \ - val = va_arg(ap, uintptr_t); \ - break; \ - default: not_reached(); \ - } \ -} while (0) - - i = 0; - f = format; - while (true) { - switch (*f) { - case '\0': goto label_out; - case '%': { - bool alt_form = false; - bool left_justify = false; - bool plus_space = false; - bool plus_plus = false; - int prec = -1; - int width = -1; - unsigned char len = '?'; - - f++; - if (*f == '%') { - /* %% */ - APPEND_C(*f); - break; - } - /* Flags. */ - while (true) { - switch (*f) { - case '#': - assert(alt_form == false); - alt_form = true; - break; - case '-': - assert(left_justify == false); - left_justify = true; - break; - case ' ': - assert(plus_space == false); - plus_space = true; - break; - case '+': - assert(plus_plus == false); - plus_plus = true; - break; - default: goto label_width; - } - f++; - } - /* Width. */ - label_width: - switch (*f) { - case '*': - width = va_arg(ap, int); - f++; - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': { - uintmax_t uwidth; - set_errno(0); - uwidth = malloc_strtoumax(f, (char **)&f, 10); - assert(uwidth != UINTMAX_MAX || get_errno() != - ERANGE); - width = (int)uwidth; - if (*f == '.') { - f++; - goto label_precision; - } else - goto label_length; - break; - } case '.': - f++; - goto label_precision; - default: goto label_length; - } - /* Precision. */ - label_precision: - switch (*f) { - case '*': - prec = va_arg(ap, int); - f++; - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': { - uintmax_t uprec; - set_errno(0); - uprec = malloc_strtoumax(f, (char **)&f, 10); - assert(uprec != UINTMAX_MAX || get_errno() != - ERANGE); - prec = (int)uprec; - break; - } - default: break; - } - /* Length. */ - label_length: - switch (*f) { - case 'l': - f++; - if (*f == 'l') { - len = 'q'; - f++; - } else - len = 'l'; - break; - case 'j': - len = 'j'; - f++; - break; - case 't': - len = 't'; - f++; - break; - case 'z': - len = 'z'; - f++; - break; - default: break; - } - /* Conversion specifier. */ - switch (*f) { - char *s; - size_t slen; - case 'd': case 'i': { - intmax_t val JEMALLOC_CC_SILENCE_INIT(0); - char buf[D2S_BUFSIZE]; - - GET_ARG_NUMERIC(val, len); - s = d2s(val, (plus_plus ? '+' : (plus_space ? - ' ' : '-')), buf, &slen); - APPEND_PADDED_S(s, slen, width, left_justify); - f++; - break; - } case 'o': { - uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); - char buf[O2S_BUFSIZE]; - - GET_ARG_NUMERIC(val, len | 0x80); - s = o2s(val, alt_form, buf, &slen); - APPEND_PADDED_S(s, slen, width, left_justify); - f++; - break; - } case 'u': { - uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); - char buf[U2S_BUFSIZE]; - - GET_ARG_NUMERIC(val, len | 0x80); - s = u2s(val, 10, false, buf, &slen); - APPEND_PADDED_S(s, slen, width, left_justify); - f++; - break; - } case 'x': case 'X': { - uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); - char buf[X2S_BUFSIZE]; - - GET_ARG_NUMERIC(val, len | 0x80); - s = x2s(val, alt_form, *f == 'X', buf, &slen); - APPEND_PADDED_S(s, slen, width, left_justify); - f++; - break; - } case 'c': { - unsigned char val; - char buf[2]; - - assert(len == '?' || len == 'l'); - assert_not_implemented(len != 'l'); - val = va_arg(ap, int); - buf[0] = val; - buf[1] = '\0'; - APPEND_PADDED_S(buf, 1, width, left_justify); - f++; - break; - } case 's': - assert(len == '?' || len == 'l'); - assert_not_implemented(len != 'l'); - s = va_arg(ap, char *); - slen = (prec == -1) ? strlen(s) : prec; - APPEND_PADDED_S(s, slen, width, left_justify); - f++; - break; - case 'p': { - uintmax_t val; - char buf[X2S_BUFSIZE]; - - GET_ARG_NUMERIC(val, 'p'); - s = x2s(val, true, false, buf, &slen); - APPEND_PADDED_S(s, slen, width, left_justify); - f++; - break; - } - default: not_implemented(); - } - break; - } default: { - APPEND_C(*f); - f++; - break; - }} - } - label_out: - if (i < size) - str[i] = '\0'; - else - str[size - 1] = '\0'; - ret = i; - -#undef APPEND_C -#undef APPEND_S -#undef APPEND_PADDED_S -#undef GET_ARG_NUMERIC - return (ret); -} - -JEMALLOC_ATTR(format(printf, 3, 4)) -int -malloc_snprintf(char *str, size_t size, const char *format, ...) -{ - int ret; - va_list ap; - - va_start(ap, format); - ret = malloc_vsnprintf(str, size, format, ap); - va_end(ap); - - return (ret); -} - -void -malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, va_list ap) -{ - char buf[MALLOC_PRINTF_BUFSIZE]; - - if (write_cb == NULL) { - /* - * The caller did not provide an alternate write_cb callback - * function, so use the default one. malloc_write() is an - * inline function, so use malloc_message() directly here. - */ - write_cb = (je_malloc_message != NULL) ? je_malloc_message : - wrtmessage; - cbopaque = NULL; - } - - malloc_vsnprintf(buf, sizeof(buf), format, ap); - write_cb(cbopaque, buf); -} - -/* - * Print to a callback function in such a way as to (hopefully) avoid memory - * allocation. - */ -JEMALLOC_ATTR(format(printf, 3, 4)) -void -malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - malloc_vcprintf(write_cb, cbopaque, format, ap); - va_end(ap); -} - -/* Print to stderr in such a way as to avoid memory allocation. */ -JEMALLOC_ATTR(format(printf, 1, 2)) -void -malloc_printf(const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - malloc_vcprintf(NULL, NULL, format, ap); - va_end(ap); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/src/zone.c mariadb-5.5-5.5.40/extra/jemalloc/src/zone.c --- mariadb-5.5-5.5.39/extra/jemalloc/src/zone.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/src/zone.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,258 +0,0 @@ -#include "jemalloc/internal/jemalloc_internal.h" -#ifndef JEMALLOC_ZONE -# error "This source file is for zones on Darwin (OS X)." -#endif - -/* - * The malloc_default_purgeable_zone function is only available on >= 10.6. - * We need to check whether it is present at runtime, thus the weak_import. - */ -extern malloc_zone_t *malloc_default_purgeable_zone(void) -JEMALLOC_ATTR(weak_import); - -/******************************************************************************/ -/* Data. */ - -static malloc_zone_t zone; -static struct malloc_introspection_t zone_introspect; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static size_t zone_size(malloc_zone_t *zone, void *ptr); -static void *zone_malloc(malloc_zone_t *zone, size_t size); -static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size); -static void *zone_valloc(malloc_zone_t *zone, size_t size); -static void zone_free(malloc_zone_t *zone, void *ptr); -static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size); -#if (JEMALLOC_ZONE_VERSION >= 5) -static void *zone_memalign(malloc_zone_t *zone, size_t alignment, -#endif -#if (JEMALLOC_ZONE_VERSION >= 6) - size_t size); -static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, - size_t size); -#endif -static void *zone_destroy(malloc_zone_t *zone); -static size_t zone_good_size(malloc_zone_t *zone, size_t size); -static void zone_force_lock(malloc_zone_t *zone); -static void zone_force_unlock(malloc_zone_t *zone); - -/******************************************************************************/ -/* - * Functions. - */ - -static size_t -zone_size(malloc_zone_t *zone, void *ptr) -{ - - /* - * There appear to be places within Darwin (such as setenv(3)) that - * cause calls to this function with pointers that *no* zone owns. If - * we knew that all pointers were owned by *some* zone, we could split - * our zone into two parts, and use one as the default allocator and - * the other as the default deallocator/reallocator. Since that will - * not work in practice, we must check all pointers to assure that they - * reside within a mapped chunk before determining size. - */ - return (ivsalloc(ptr, config_prof)); -} - -static void * -zone_malloc(malloc_zone_t *zone, size_t size) -{ - - return (je_malloc(size)); -} - -static void * -zone_calloc(malloc_zone_t *zone, size_t num, size_t size) -{ - - return (je_calloc(num, size)); -} - -static void * -zone_valloc(malloc_zone_t *zone, size_t size) -{ - void *ret = NULL; /* Assignment avoids useless compiler warning. */ - - je_posix_memalign(&ret, PAGE, size); - - return (ret); -} - -static void -zone_free(malloc_zone_t *zone, void *ptr) -{ - - if (ivsalloc(ptr, config_prof) != 0) { - je_free(ptr); - return; - } - - free(ptr); -} - -static void * -zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) -{ - - if (ivsalloc(ptr, config_prof) != 0) - return (je_realloc(ptr, size)); - - return (realloc(ptr, size)); -} - -#if (JEMALLOC_ZONE_VERSION >= 5) -static void * -zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) -{ - void *ret = NULL; /* Assignment avoids useless compiler warning. */ - - je_posix_memalign(&ret, alignment, size); - - return (ret); -} -#endif - -#if (JEMALLOC_ZONE_VERSION >= 6) -static void -zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) -{ - - if (ivsalloc(ptr, config_prof) != 0) { - assert(ivsalloc(ptr, config_prof) == size); - je_free(ptr); - return; - } - - free(ptr); -} -#endif - -static void * -zone_destroy(malloc_zone_t *zone) -{ - - /* This function should never be called. */ - assert(false); - return (NULL); -} - -static size_t -zone_good_size(malloc_zone_t *zone, size_t size) -{ - - if (size == 0) - size = 1; - return (s2u(size)); -} - -static void -zone_force_lock(malloc_zone_t *zone) -{ - - if (isthreaded) - jemalloc_prefork(); -} - -static void -zone_force_unlock(malloc_zone_t *zone) -{ - - if (isthreaded) - jemalloc_postfork_parent(); -} - -JEMALLOC_ATTR(constructor) -void -register_zone(void) -{ - - /* - * If something else replaced the system default zone allocator, don't - * register jemalloc's. - */ - malloc_zone_t *default_zone = malloc_default_zone(); - if (!default_zone->zone_name || - strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) { - return; - } - - zone.size = (void *)zone_size; - zone.malloc = (void *)zone_malloc; - zone.calloc = (void *)zone_calloc; - zone.valloc = (void *)zone_valloc; - zone.free = (void *)zone_free; - zone.realloc = (void *)zone_realloc; - zone.destroy = (void *)zone_destroy; - zone.zone_name = "jemalloc_zone"; - zone.batch_malloc = NULL; - zone.batch_free = NULL; - zone.introspect = &zone_introspect; - zone.version = JEMALLOC_ZONE_VERSION; -#if (JEMALLOC_ZONE_VERSION >= 5) - zone.memalign = zone_memalign; -#endif -#if (JEMALLOC_ZONE_VERSION >= 6) - zone.free_definite_size = zone_free_definite_size; -#endif -#if (JEMALLOC_ZONE_VERSION >= 8) - zone.pressure_relief = NULL; -#endif - - zone_introspect.enumerator = NULL; - zone_introspect.good_size = (void *)zone_good_size; - zone_introspect.check = NULL; - zone_introspect.print = NULL; - zone_introspect.log = NULL; - zone_introspect.force_lock = (void *)zone_force_lock; - zone_introspect.force_unlock = (void *)zone_force_unlock; - zone_introspect.statistics = NULL; -#if (JEMALLOC_ZONE_VERSION >= 6) - zone_introspect.zone_locked = NULL; -#endif -#if (JEMALLOC_ZONE_VERSION >= 7) - zone_introspect.enable_discharge_checking = NULL; - zone_introspect.disable_discharge_checking = NULL; - zone_introspect.discharge = NULL; -#ifdef __BLOCKS__ - zone_introspect.enumerate_discharged_pointers = NULL; -#else - zone_introspect.enumerate_unavailable_without_blocks = NULL; -#endif -#endif - - /* - * The default purgeable zone is created lazily by OSX's libc. It uses - * the default zone when it is created for "small" allocations - * (< 15 KiB), but assumes the default zone is a scalable_zone. This - * obviously fails when the default zone is the jemalloc zone, so - * malloc_default_purgeable_zone is called beforehand so that the - * default purgeable zone is created when the default zone is still - * a scalable_zone. As purgeable zones only exist on >= 10.6, we need - * to check for the existence of malloc_default_purgeable_zone() at - * run time. - */ - if (malloc_default_purgeable_zone != NULL) - malloc_default_purgeable_zone(); - - /* Register the custom zone. At this point it won't be the default. */ - malloc_zone_register(&zone); - - /* - * Unregister and reregister the default zone. On OSX >= 10.6, - * unregistering takes the last registered zone and places it at the - * location of the specified zone. Unregistering the default zone thus - * makes the last registered one the default. On OSX < 10.6, - * unregistering shifts all registered zones. The first registered zone - * then becomes the default. - */ - do { - default_zone = malloc_default_zone(); - malloc_zone_unregister(default_zone); - malloc_zone_register(default_zone); - } while (malloc_default_zone() != &zone); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/aligned_alloc.c mariadb-5.5-5.5.40/extra/jemalloc/test/aligned_alloc.c --- mariadb-5.5-5.5.39/extra/jemalloc/test/aligned_alloc.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/aligned_alloc.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,119 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -#define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) -#define NITER 4 - -int -main(void) -{ - size_t alignment, size, total; - unsigned i; - void *p, *ps[NITER]; - - malloc_printf("Test begin\n"); - - /* Test error conditions. */ - alignment = 0; - set_errno(0); - p = aligned_alloc(alignment, 1); - if (p != NULL || get_errno() != EINVAL) { - malloc_printf( - "Expected error for invalid alignment %zu\n", alignment); - } - - for (alignment = sizeof(size_t); alignment < MAXALIGN; - alignment <<= 1) { - set_errno(0); - p = aligned_alloc(alignment + 1, 1); - if (p != NULL || get_errno() != EINVAL) { - malloc_printf( - "Expected error for invalid alignment %zu\n", - alignment + 1); - } - } - -#if LG_SIZEOF_PTR == 3 - alignment = UINT64_C(0x8000000000000000); - size = UINT64_C(0x8000000000000000); -#else - alignment = 0x80000000LU; - size = 0x80000000LU; -#endif - set_errno(0); - p = aligned_alloc(alignment, size); - if (p != NULL || get_errno() != ENOMEM) { - malloc_printf( - "Expected error for aligned_alloc(%zu, %zu)\n", - alignment, size); - } - -#if LG_SIZEOF_PTR == 3 - alignment = UINT64_C(0x4000000000000000); - size = UINT64_C(0x8400000000000001); -#else - alignment = 0x40000000LU; - size = 0x84000001LU; -#endif - set_errno(0); - p = aligned_alloc(alignment, size); - if (p != NULL || get_errno() != ENOMEM) { - malloc_printf( - "Expected error for aligned_alloc(%zu, %zu)\n", - alignment, size); - } - - alignment = 0x10LU; -#if LG_SIZEOF_PTR == 3 - size = UINT64_C(0xfffffffffffffff0); -#else - size = 0xfffffff0LU; -#endif - set_errno(0); - p = aligned_alloc(alignment, size); - if (p != NULL || get_errno() != ENOMEM) { - malloc_printf( - "Expected error for aligned_alloc(&p, %zu, %zu)\n", - alignment, size); - } - - for (i = 0; i < NITER; i++) - ps[i] = NULL; - - for (alignment = 8; - alignment <= MAXALIGN; - alignment <<= 1) { - total = 0; - malloc_printf("Alignment: %zu\n", alignment); - for (size = 1; - size < 3 * alignment && size < (1U << 31); - size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { - for (i = 0; i < NITER; i++) { - ps[i] = aligned_alloc(alignment, size); - if (ps[i] == NULL) { - char buf[BUFERROR_BUF]; - - buferror(buf, sizeof(buf)); - malloc_printf( - "Error for size %zu (%#zx): %s\n", - size, size, buf); - exit(1); - } - total += malloc_usable_size(ps[i]); - if (total >= (MAXALIGN << 1)) - break; - } - for (i = 0; i < NITER; i++) { - if (ps[i] != NULL) { - free(ps[i]); - ps[i] = NULL; - } - } - } - } - - malloc_printf("Test end\n"); - return (0); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/aligned_alloc.exp mariadb-5.5-5.5.40/extra/jemalloc/test/aligned_alloc.exp --- mariadb-5.5-5.5.39/extra/jemalloc/test/aligned_alloc.exp 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/aligned_alloc.exp 1970-01-01 00:00:00.000000000 +0000 @@ -1,25 +0,0 @@ -Test begin -Alignment: 8 -Alignment: 16 -Alignment: 32 -Alignment: 64 -Alignment: 128 -Alignment: 256 -Alignment: 512 -Alignment: 1024 -Alignment: 2048 -Alignment: 4096 -Alignment: 8192 -Alignment: 16384 -Alignment: 32768 -Alignment: 65536 -Alignment: 131072 -Alignment: 262144 -Alignment: 524288 -Alignment: 1048576 -Alignment: 2097152 -Alignment: 4194304 -Alignment: 8388608 -Alignment: 16777216 -Alignment: 33554432 -Test end diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/allocated.c mariadb-5.5-5.5.40/extra/jemalloc/test/allocated.c --- mariadb-5.5-5.5.39/extra/jemalloc/test/allocated.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/allocated.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,118 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -void * -je_thread_start(void *arg) -{ - int err; - void *p; - uint64_t a0, a1, d0, d1; - uint64_t *ap0, *ap1, *dp0, *dp1; - size_t sz, usize; - - sz = sizeof(a0); - if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto label_return; - } - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - exit(1); - } - sz = sizeof(ap0); - if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto label_return; - } - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - exit(1); - } - assert(*ap0 == a0); - - sz = sizeof(d0); - if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto label_return; - } - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - exit(1); - } - sz = sizeof(dp0); - if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto label_return; - } - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - exit(1); - } - assert(*dp0 == d0); - - p = malloc(1); - if (p == NULL) { - malloc_printf("%s(): Error in malloc()\n", __func__); - exit(1); - } - - sz = sizeof(a1); - mallctl("thread.allocated", &a1, &sz, NULL, 0); - sz = sizeof(ap1); - mallctl("thread.allocatedp", &ap1, &sz, NULL, 0); - assert(*ap1 == a1); - assert(ap0 == ap1); - - usize = malloc_usable_size(p); - assert(a0 + usize <= a1); - - free(p); - - sz = sizeof(d1); - mallctl("thread.deallocated", &d1, &sz, NULL, 0); - sz = sizeof(dp1); - mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0); - assert(*dp1 == d1); - assert(dp0 == dp1); - - assert(d0 + usize <= d1); - -label_return: - return (NULL); -} - -int -main(void) -{ - int ret = 0; - je_thread_t thread; - - malloc_printf("Test begin\n"); - - je_thread_start(NULL); - - je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, (void *)&ret); - - je_thread_start(NULL); - - je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, (void *)&ret); - - je_thread_start(NULL); - - malloc_printf("Test end\n"); - return (ret); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/allocated.exp mariadb-5.5-5.5.40/extra/jemalloc/test/allocated.exp --- mariadb-5.5-5.5.39/extra/jemalloc/test/allocated.exp 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/allocated.exp 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -Test begin -Test end diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/ALLOCM_ARENA.c mariadb-5.5-5.5.40/extra/jemalloc/test/ALLOCM_ARENA.c --- mariadb-5.5-5.5.39/extra/jemalloc/test/ALLOCM_ARENA.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/ALLOCM_ARENA.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,67 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -#define NTHREADS 10 - -void * -je_thread_start(void *arg) -{ - unsigned thread_ind = (unsigned)(uintptr_t)arg; - unsigned arena_ind; - int r; - void *p; - size_t rsz, sz; - - sz = sizeof(arena_ind); - if (mallctl("arenas.extend", &arena_ind, &sz, NULL, 0) - != 0) { - malloc_printf("Error in arenas.extend\n"); - abort(); - } - - if (thread_ind % 4 != 3) { - size_t mib[3]; - size_t miblen = sizeof(mib) / sizeof(size_t); - const char *dss_precs[] = {"disabled", "primary", "secondary"}; - const char *dss = dss_precs[thread_ind % 4]; - if (mallctlnametomib("arena.0.dss", mib, &miblen) != 0) { - malloc_printf("Error in mallctlnametomib()\n"); - abort(); - } - mib[1] = arena_ind; - if (mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss, - sizeof(const char *))) { - malloc_printf("Error in mallctlbymib()\n"); - abort(); - } - } - - r = allocm(&p, &rsz, 1, ALLOCM_ARENA(arena_ind)); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected allocm() error\n"); - abort(); - } - dallocm(p, 0); - - return (NULL); -} - -int -main(void) -{ - je_thread_t threads[NTHREADS]; - unsigned i; - - malloc_printf("Test begin\n"); - - for (i = 0; i < NTHREADS; i++) { - je_thread_create(&threads[i], je_thread_start, - (void *)(uintptr_t)i); - } - - for (i = 0; i < NTHREADS; i++) - je_thread_join(threads[i], NULL); - - malloc_printf("Test end\n"); - return (0); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/ALLOCM_ARENA.exp mariadb-5.5-5.5.40/extra/jemalloc/test/ALLOCM_ARENA.exp --- mariadb-5.5-5.5.39/extra/jemalloc/test/ALLOCM_ARENA.exp 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/ALLOCM_ARENA.exp 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -Test begin -Test end diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/allocm.c mariadb-5.5-5.5.40/extra/jemalloc/test/allocm.c --- mariadb-5.5-5.5.39/extra/jemalloc/test/allocm.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/allocm.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,194 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -#define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) -#define NITER 4 - -int -main(void) -{ - int r; - void *p; - size_t nsz, rsz, sz, alignment, total; - unsigned i; - void *ps[NITER]; - - malloc_printf("Test begin\n"); - - sz = 42; - nsz = 0; - r = nallocm(&nsz, sz, 0); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected nallocm() error\n"); - abort(); - } - rsz = 0; - r = allocm(&p, &rsz, sz, 0); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected allocm() error\n"); - abort(); - } - if (rsz < sz) - malloc_printf("Real size smaller than expected\n"); - if (nsz != rsz) - malloc_printf("nallocm()/allocm() rsize mismatch\n"); - if (dallocm(p, 0) != ALLOCM_SUCCESS) - malloc_printf("Unexpected dallocm() error\n"); - - r = allocm(&p, NULL, sz, 0); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected allocm() error\n"); - abort(); - } - if (dallocm(p, 0) != ALLOCM_SUCCESS) - malloc_printf("Unexpected dallocm() error\n"); - - nsz = 0; - r = nallocm(&nsz, sz, ALLOCM_ZERO); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected nallocm() error\n"); - abort(); - } - rsz = 0; - r = allocm(&p, &rsz, sz, ALLOCM_ZERO); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected allocm() error\n"); - abort(); - } - if (nsz != rsz) - malloc_printf("nallocm()/allocm() rsize mismatch\n"); - if (dallocm(p, 0) != ALLOCM_SUCCESS) - malloc_printf("Unexpected dallocm() error\n"); - -#if LG_SIZEOF_PTR == 3 - alignment = UINT64_C(0x8000000000000000); - sz = UINT64_C(0x8000000000000000); -#else - alignment = 0x80000000LU; - sz = 0x80000000LU; -#endif - nsz = 0; - r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - malloc_printf( - "Expected error for nallocm(&nsz, %zu, %#x)\n", - sz, ALLOCM_ALIGN(alignment)); - } - rsz = 0; - r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - malloc_printf( - "Expected error for allocm(&p, %zu, %#x)\n", - sz, ALLOCM_ALIGN(alignment)); - } - if (nsz != rsz) - malloc_printf("nallocm()/allocm() rsize mismatch\n"); - -#if LG_SIZEOF_PTR == 3 - alignment = UINT64_C(0x4000000000000000); - sz = UINT64_C(0x8400000000000001); -#else - alignment = 0x40000000LU; - sz = 0x84000001LU; -#endif - nsz = 0; - r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected nallocm() error\n"); - rsz = 0; - r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - malloc_printf( - "Expected error for allocm(&p, %zu, %#x)\n", - sz, ALLOCM_ALIGN(alignment)); - } - - alignment = 0x10LU; -#if LG_SIZEOF_PTR == 3 - sz = UINT64_C(0xfffffffffffffff0); -#else - sz = 0xfffffff0LU; -#endif - nsz = 0; - r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - malloc_printf( - "Expected error for nallocm(&nsz, %zu, %#x)\n", - sz, ALLOCM_ALIGN(alignment)); - } - rsz = 0; - r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - malloc_printf( - "Expected error for allocm(&p, %zu, %#x)\n", - sz, ALLOCM_ALIGN(alignment)); - } - if (nsz != rsz) - malloc_printf("nallocm()/allocm() rsize mismatch\n"); - - for (i = 0; i < NITER; i++) - ps[i] = NULL; - - for (alignment = 8; - alignment <= MAXALIGN; - alignment <<= 1) { - total = 0; - malloc_printf("Alignment: %zu\n", alignment); - for (sz = 1; - sz < 3 * alignment && sz < (1U << 31); - sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { - for (i = 0; i < NITER; i++) { - nsz = 0; - r = nallocm(&nsz, sz, - ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); - if (r != ALLOCM_SUCCESS) { - malloc_printf( - "nallocm() error for size %zu" - " (%#zx): %d\n", - sz, sz, r); - exit(1); - } - rsz = 0; - r = allocm(&ps[i], &rsz, sz, - ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); - if (r != ALLOCM_SUCCESS) { - malloc_printf( - "allocm() error for size %zu" - " (%#zx): %d\n", - sz, sz, r); - exit(1); - } - if (rsz < sz) { - malloc_printf( - "Real size smaller than" - " expected\n"); - } - if (nsz != rsz) { - malloc_printf( - "nallocm()/allocm() rsize" - " mismatch\n"); - } - if ((uintptr_t)p & (alignment-1)) { - malloc_printf( - "%p inadequately aligned for" - " alignment: %zu\n", p, alignment); - } - sallocm(ps[i], &rsz, 0); - total += rsz; - if (total >= (MAXALIGN << 1)) - break; - } - for (i = 0; i < NITER; i++) { - if (ps[i] != NULL) { - dallocm(ps[i], 0); - ps[i] = NULL; - } - } - } - } - - malloc_printf("Test end\n"); - return (0); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/allocm.exp mariadb-5.5-5.5.40/extra/jemalloc/test/allocm.exp --- mariadb-5.5-5.5.39/extra/jemalloc/test/allocm.exp 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/allocm.exp 1970-01-01 00:00:00.000000000 +0000 @@ -1,25 +0,0 @@ -Test begin -Alignment: 8 -Alignment: 16 -Alignment: 32 -Alignment: 64 -Alignment: 128 -Alignment: 256 -Alignment: 512 -Alignment: 1024 -Alignment: 2048 -Alignment: 4096 -Alignment: 8192 -Alignment: 16384 -Alignment: 32768 -Alignment: 65536 -Alignment: 131072 -Alignment: 262144 -Alignment: 524288 -Alignment: 1048576 -Alignment: 2097152 -Alignment: 4194304 -Alignment: 8388608 -Alignment: 16777216 -Alignment: 33554432 -Test end diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/bitmap.c mariadb-5.5-5.5.40/extra/jemalloc/test/bitmap.c --- mariadb-5.5-5.5.39/extra/jemalloc/test/bitmap.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/bitmap.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,153 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -#if (LG_BITMAP_MAXBITS > 12) -# define MAXBITS 4500 -#else -# define MAXBITS (1U << LG_BITMAP_MAXBITS) -#endif - -static void -test_bitmap_size(void) -{ - size_t i, prev_size; - - prev_size = 0; - for (i = 1; i <= MAXBITS; i++) { - size_t size = bitmap_size(i); - assert(size >= prev_size); - prev_size = size; - } -} - -static void -test_bitmap_init(void) -{ - size_t i; - - for (i = 1; i <= MAXBITS; i++) { - bitmap_info_t binfo; - bitmap_info_init(&binfo, i); - { - size_t j; - bitmap_t *bitmap = malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); - bitmap_init(bitmap, &binfo); - - for (j = 0; j < i; j++) - assert(bitmap_get(bitmap, &binfo, j) == false); - free(bitmap); - - } - } -} - -static void -test_bitmap_set(void) -{ - size_t i; - - for (i = 1; i <= MAXBITS; i++) { - bitmap_info_t binfo; - bitmap_info_init(&binfo, i); - { - size_t j; - bitmap_t *bitmap = malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); - bitmap_init(bitmap, &binfo); - - for (j = 0; j < i; j++) - bitmap_set(bitmap, &binfo, j); - assert(bitmap_full(bitmap, &binfo)); - free(bitmap); - } - } -} - -static void -test_bitmap_unset(void) -{ - size_t i; - - for (i = 1; i <= MAXBITS; i++) { - bitmap_info_t binfo; - bitmap_info_init(&binfo, i); - { - size_t j; - bitmap_t *bitmap = malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); - bitmap_init(bitmap, &binfo); - - for (j = 0; j < i; j++) - bitmap_set(bitmap, &binfo, j); - assert(bitmap_full(bitmap, &binfo)); - for (j = 0; j < i; j++) - bitmap_unset(bitmap, &binfo, j); - for (j = 0; j < i; j++) - bitmap_set(bitmap, &binfo, j); - assert(bitmap_full(bitmap, &binfo)); - free(bitmap); - } - } -} - -static void -test_bitmap_sfu(void) -{ - size_t i; - - for (i = 1; i <= MAXBITS; i++) { - bitmap_info_t binfo; - bitmap_info_init(&binfo, i); - { - ssize_t j; - bitmap_t *bitmap = malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); - bitmap_init(bitmap, &binfo); - - /* Iteratively set bits starting at the beginning. */ - for (j = 0; j < i; j++) - assert(bitmap_sfu(bitmap, &binfo) == j); - assert(bitmap_full(bitmap, &binfo)); - - /* - * Iteratively unset bits starting at the end, and - * verify that bitmap_sfu() reaches the unset bits. - */ - for (j = i - 1; j >= 0; j--) { - bitmap_unset(bitmap, &binfo, j); - assert(bitmap_sfu(bitmap, &binfo) == j); - bitmap_unset(bitmap, &binfo, j); - } - assert(bitmap_get(bitmap, &binfo, 0) == false); - - /* - * Iteratively set bits starting at the beginning, and - * verify that bitmap_sfu() looks past them. - */ - for (j = 1; j < i; j++) { - bitmap_set(bitmap, &binfo, j - 1); - assert(bitmap_sfu(bitmap, &binfo) == j); - bitmap_unset(bitmap, &binfo, j); - } - assert(bitmap_sfu(bitmap, &binfo) == i - 1); - assert(bitmap_full(bitmap, &binfo)); - free(bitmap); - } - } -} - -int -main(void) -{ - malloc_printf("Test begin\n"); - - test_bitmap_size(); - test_bitmap_init(); - test_bitmap_set(); - test_bitmap_unset(); - test_bitmap_sfu(); - - malloc_printf("Test end\n"); - return (0); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/bitmap.exp mariadb-5.5-5.5.40/extra/jemalloc/test/bitmap.exp --- mariadb-5.5-5.5.39/extra/jemalloc/test/bitmap.exp 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/bitmap.exp 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -Test begin -Test end diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/jemalloc_test.h.in mariadb-5.5-5.5.40/extra/jemalloc/test/jemalloc_test.h.in --- mariadb-5.5-5.5.39/extra/jemalloc/test/jemalloc_test.h.in 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/jemalloc_test.h.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,53 +0,0 @@ -/* - * This header should be included by tests, rather than directly including - * jemalloc/jemalloc.h, because --with-install-suffix may cause the header to - * have a different name. - */ -#include "jemalloc/jemalloc@install_suffix@.h" -#include "jemalloc/internal/jemalloc_internal.h" - -/* Abstraction layer for threading in tests */ -#ifdef _WIN32 -#include - -typedef HANDLE je_thread_t; - -void -je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) -{ - LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc; - *thread = CreateThread(NULL, 0, routine, arg, 0, NULL); - if (*thread == NULL) { - malloc_printf("Error in CreateThread()\n"); - exit(1); - } -} - -void -je_thread_join(je_thread_t thread, void **ret) -{ - WaitForSingleObject(thread, INFINITE); -} - -#else -#include - -typedef pthread_t je_thread_t; - -void -je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) -{ - - if (pthread_create(thread, NULL, proc, arg) != 0) { - malloc_printf("Error in pthread_create()\n"); - exit(1); - } -} - -void -je_thread_join(je_thread_t thread, void **ret) -{ - - pthread_join(thread, ret); -} -#endif diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/mremap.c mariadb-5.5-5.5.40/extra/jemalloc/test/mremap.c --- mariadb-5.5-5.5.39/extra/jemalloc/test/mremap.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/mremap.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,60 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -int -main(void) -{ - int ret, err; - size_t sz, lg_chunk, chunksize, i; - char *p, *q; - - malloc_printf("Test begin\n"); - - sz = sizeof(lg_chunk); - if ((err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0))) { - assert(err != ENOENT); - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - ret = 1; - goto label_return; - } - chunksize = ((size_t)1U) << lg_chunk; - - p = (char *)malloc(chunksize); - if (p == NULL) { - malloc_printf("malloc(%zu) --> %p\n", chunksize, p); - ret = 1; - goto label_return; - } - memset(p, 'a', chunksize); - - q = (char *)realloc(p, chunksize * 2); - if (q == NULL) { - malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize * 2, - q); - ret = 1; - goto label_return; - } - for (i = 0; i < chunksize; i++) { - assert(q[i] == 'a'); - } - - p = q; - - q = (char *)realloc(p, chunksize); - if (q == NULL) { - malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize, q); - ret = 1; - goto label_return; - } - for (i = 0; i < chunksize; i++) { - assert(q[i] == 'a'); - } - - free(q); - - ret = 0; -label_return: - malloc_printf("Test end\n"); - return (ret); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/mremap.exp mariadb-5.5-5.5.40/extra/jemalloc/test/mremap.exp --- mariadb-5.5-5.5.39/extra/jemalloc/test/mremap.exp 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/mremap.exp 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -Test begin -Test end diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/posix_memalign.c mariadb-5.5-5.5.40/extra/jemalloc/test/posix_memalign.c --- mariadb-5.5-5.5.39/extra/jemalloc/test/posix_memalign.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/posix_memalign.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,115 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -#define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ -#define MAXALIGN ((size_t)0x2000000LU) -#define NITER 4 - -int -main(void) -{ - size_t alignment, size, total; - unsigned i; - int err; - void *p, *ps[NITER]; - - malloc_printf("Test begin\n"); - - /* Test error conditions. */ - for (alignment = 0; alignment < sizeof(void *); alignment++) { - err = posix_memalign(&p, alignment, 1); - if (err != EINVAL) { - malloc_printf( - "Expected error for invalid alignment %zu\n", - alignment); - } - } - - for (alignment = sizeof(size_t); alignment < MAXALIGN; - alignment <<= 1) { - err = posix_memalign(&p, alignment + 1, 1); - if (err == 0) { - malloc_printf( - "Expected error for invalid alignment %zu\n", - alignment + 1); - } - } - -#if LG_SIZEOF_PTR == 3 - alignment = UINT64_C(0x8000000000000000); - size = UINT64_C(0x8000000000000000); -#else - alignment = 0x80000000LU; - size = 0x80000000LU; -#endif - err = posix_memalign(&p, alignment, size); - if (err == 0) { - malloc_printf( - "Expected error for posix_memalign(&p, %zu, %zu)\n", - alignment, size); - } - -#if LG_SIZEOF_PTR == 3 - alignment = UINT64_C(0x4000000000000000); - size = UINT64_C(0x8400000000000001); -#else - alignment = 0x40000000LU; - size = 0x84000001LU; -#endif - err = posix_memalign(&p, alignment, size); - if (err == 0) { - malloc_printf( - "Expected error for posix_memalign(&p, %zu, %zu)\n", - alignment, size); - } - - alignment = 0x10LU; -#if LG_SIZEOF_PTR == 3 - size = UINT64_C(0xfffffffffffffff0); -#else - size = 0xfffffff0LU; -#endif - err = posix_memalign(&p, alignment, size); - if (err == 0) { - malloc_printf( - "Expected error for posix_memalign(&p, %zu, %zu)\n", - alignment, size); - } - - for (i = 0; i < NITER; i++) - ps[i] = NULL; - - for (alignment = 8; - alignment <= MAXALIGN; - alignment <<= 1) { - total = 0; - malloc_printf("Alignment: %zu\n", alignment); - for (size = 1; - size < 3 * alignment && size < (1U << 31); - size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { - for (i = 0; i < NITER; i++) { - err = posix_memalign(&ps[i], - alignment, size); - if (err) { - malloc_printf( - "Error for size %zu (%#zx): %s\n", - size, size, strerror(err)); - exit(1); - } - total += malloc_usable_size(ps[i]); - if (total >= (MAXALIGN << 1)) - break; - } - for (i = 0; i < NITER; i++) { - if (ps[i] != NULL) { - free(ps[i]); - ps[i] = NULL; - } - } - } - } - - malloc_printf("Test end\n"); - return (0); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/posix_memalign.exp mariadb-5.5-5.5.40/extra/jemalloc/test/posix_memalign.exp --- mariadb-5.5-5.5.39/extra/jemalloc/test/posix_memalign.exp 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/posix_memalign.exp 1970-01-01 00:00:00.000000000 +0000 @@ -1,25 +0,0 @@ -Test begin -Alignment: 8 -Alignment: 16 -Alignment: 32 -Alignment: 64 -Alignment: 128 -Alignment: 256 -Alignment: 512 -Alignment: 1024 -Alignment: 2048 -Alignment: 4096 -Alignment: 8192 -Alignment: 16384 -Alignment: 32768 -Alignment: 65536 -Alignment: 131072 -Alignment: 262144 -Alignment: 524288 -Alignment: 1048576 -Alignment: 2097152 -Alignment: 4194304 -Alignment: 8388608 -Alignment: 16777216 -Alignment: 33554432 -Test end diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/rallocm.c mariadb-5.5-5.5.40/extra/jemalloc/test/rallocm.c --- mariadb-5.5-5.5.39/extra/jemalloc/test/rallocm.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/rallocm.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,127 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -int -main(void) -{ - size_t pagesize; - void *p, *q; - size_t sz, tsz; - int r; - - malloc_printf("Test begin\n"); - - /* Get page size. */ - { -#ifdef _WIN32 - SYSTEM_INFO si; - GetSystemInfo(&si); - pagesize = (size_t)si.dwPageSize; -#else - long result = sysconf(_SC_PAGESIZE); - assert(result != -1); - pagesize = (size_t)result; -#endif - } - - r = allocm(&p, &sz, 42, 0); - if (r != ALLOCM_SUCCESS) { - malloc_printf("Unexpected allocm() error\n"); - abort(); - } - - q = p; - r = rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q != p) - malloc_printf("Unexpected object move\n"); - if (tsz != sz) { - malloc_printf("Unexpected size change: %zu --> %zu\n", - sz, tsz); - } - - q = p; - r = rallocm(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q != p) - malloc_printf("Unexpected object move\n"); - if (tsz != sz) { - malloc_printf("Unexpected size change: %zu --> %zu\n", - sz, tsz); - } - - q = p; - r = rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_ERR_NOT_MOVED) - malloc_printf("Unexpected rallocm() result\n"); - if (q != p) - malloc_printf("Unexpected object move\n"); - if (tsz != sz) { - malloc_printf("Unexpected size change: %zu --> %zu\n", - sz, tsz); - } - - q = p; - r = rallocm(&q, &tsz, sz + 5, 0, 0); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q == p) - malloc_printf("Expected object move\n"); - if (tsz == sz) { - malloc_printf("Expected size change: %zu --> %zu\n", - sz, tsz); - } - p = q; - sz = tsz; - - r = rallocm(&q, &tsz, pagesize*2, 0, 0); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q == p) - malloc_printf("Expected object move\n"); - if (tsz == sz) { - malloc_printf("Expected size change: %zu --> %zu\n", - sz, tsz); - } - p = q; - sz = tsz; - - r = rallocm(&q, &tsz, pagesize*4, 0, 0); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (tsz == sz) { - malloc_printf("Expected size change: %zu --> %zu\n", - sz, tsz); - } - p = q; - sz = tsz; - - r = rallocm(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q != p) - malloc_printf("Unexpected object move\n"); - if (tsz == sz) { - malloc_printf("Expected size change: %zu --> %zu\n", - sz, tsz); - } - sz = tsz; - - r = rallocm(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - malloc_printf("Unexpected rallocm() error\n"); - if (q != p) - malloc_printf("Unexpected object move\n"); - if (tsz == sz) { - malloc_printf("Expected size change: %zu --> %zu\n", - sz, tsz); - } - sz = tsz; - - dallocm(p, 0); - - malloc_printf("Test end\n"); - return (0); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/rallocm.exp mariadb-5.5-5.5.40/extra/jemalloc/test/rallocm.exp --- mariadb-5.5-5.5.39/extra/jemalloc/test/rallocm.exp 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/rallocm.exp 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -Test begin -Test end diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/thread_arena.c mariadb-5.5-5.5.40/extra/jemalloc/test/thread_arena.c --- mariadb-5.5-5.5.39/extra/jemalloc/test/thread_arena.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/thread_arena.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,81 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -#define NTHREADS 10 - -void * -je_thread_start(void *arg) -{ - unsigned main_arena_ind = *(unsigned *)arg; - void *p; - unsigned arena_ind; - size_t size; - int err; - - p = malloc(1); - if (p == NULL) { - malloc_printf("%s(): Error in malloc()\n", __func__); - return (void *)1; - } - free(p); - - size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind, - sizeof(main_arena_ind)))) { - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - return (void *)1; - } - - size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, - 0))) { - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - return (void *)1; - } - assert(arena_ind == main_arena_ind); - - return (NULL); -} - -int -main(void) -{ - int ret = 0; - void *p; - unsigned arena_ind; - size_t size; - int err; - je_thread_t threads[NTHREADS]; - unsigned i; - - malloc_printf("Test begin\n"); - - p = malloc(1); - if (p == NULL) { - malloc_printf("%s(): Error in malloc()\n", __func__); - ret = 1; - goto label_return; - } - - size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { - malloc_printf("%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - ret = 1; - goto label_return; - } - - for (i = 0; i < NTHREADS; i++) { - je_thread_create(&threads[i], je_thread_start, - (void *)&arena_ind); - } - - for (i = 0; i < NTHREADS; i++) - je_thread_join(threads[i], (void *)&ret); - -label_return: - malloc_printf("Test end\n"); - return (ret); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/thread_arena.exp mariadb-5.5-5.5.40/extra/jemalloc/test/thread_arena.exp --- mariadb-5.5-5.5.39/extra/jemalloc/test/thread_arena.exp 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/thread_arena.exp 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -Test begin -Test end diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/thread_tcache_enabled.c mariadb-5.5-5.5.40/extra/jemalloc/test/thread_tcache_enabled.c --- mariadb-5.5-5.5.39/extra/jemalloc/test/thread_tcache_enabled.c 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/thread_tcache_enabled.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,91 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -void * -je_thread_start(void *arg) -{ - int err; - size_t sz; - bool e0, e1; - - sz = sizeof(bool); - if ((err = mallctl("thread.tcache.enabled", &e0, &sz, NULL, 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_TCACHE - assert(false); -#endif - } - goto label_return; - } - - if (e0) { - e1 = false; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) - == 0); - assert(e0); - } - - e1 = true; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0 == false); - - e1 = true; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0); - - e1 = false; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0); - - e1 = false; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0 == false); - - free(malloc(1)); - e1 = true; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0 == false); - - free(malloc(1)); - e1 = true; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0); - - free(malloc(1)); - e1 = false; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0); - - free(malloc(1)); - e1 = false; - assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); - assert(e0 == false); - - free(malloc(1)); -label_return: - return (NULL); -} - -int -main(void) -{ - int ret = 0; - je_thread_t thread; - - malloc_printf("Test begin\n"); - - je_thread_start(NULL); - - je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, (void *)&ret); - - je_thread_start(NULL); - - je_thread_create(&thread, je_thread_start, NULL); - je_thread_join(thread, (void *)&ret); - - je_thread_start(NULL); - - malloc_printf("Test end\n"); - return (ret); -} diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/test/thread_tcache_enabled.exp mariadb-5.5-5.5.40/extra/jemalloc/test/thread_tcache_enabled.exp --- mariadb-5.5-5.5.39/extra/jemalloc/test/thread_tcache_enabled.exp 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/test/thread_tcache_enabled.exp 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -Test begin -Test end diff -Nru mariadb-5.5-5.5.39/extra/jemalloc/VERSION mariadb-5.5-5.5.40/extra/jemalloc/VERSION --- mariadb-5.5-5.5.39/extra/jemalloc/VERSION 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/jemalloc/VERSION 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -3.3.1-0-g9ef9d9e8c271cdf14f664b871a8f98c827714784 diff -Nru mariadb-5.5-5.5.39/extra/yassl/certs/ca-cert.pem mariadb-5.5-5.5.40/extra/yassl/certs/ca-cert.pem --- mariadb-5.5-5.5.39/extra/yassl/certs/ca-cert.pem 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/certs/ca-cert.pem 2014-10-08 13:19:52.000000000 +0000 @@ -1,45 +1,45 @@ -----BEGIN CERTIFICATE----- -MIIEnjCCA4agAwIBAgIJAOnQp195JfQ8MA0GCSqGSIb3DQEBBQUAMIGQMQswCQYD -VQQGEwJVUzEQMA4GA1UECBMHTW9udGFuYTEQMA4GA1UEBxMHQm96ZW1hbjERMA8G -A1UEChMIU2F3dG9vdGgxEzARBgNVBAsTCkNvbnN1bHRpbmcxFjAUBgNVBAMTDXd3 -dy55YXNzbC5jb20xHTAbBgkqhkiG9w0BCQEWDmluZm9AeWFzc2wuY29tMB4XDTEx -MTAyNDE4MTgxNVoXDTE0MDcyMDE4MTgxNVowgZAxCzAJBgNVBAYTAlVTMRAwDgYD -VQQIEwdNb250YW5hMRAwDgYDVQQHEwdCb3plbWFuMREwDwYDVQQKEwhTYXd0b290 -aDETMBEGA1UECxMKQ29uc3VsdGluZzEWMBQGA1UEAxMNd3d3Lnlhc3NsLmNvbTEd -MBsGCSqGSIb3DQEJARYOaW5mb0B5YXNzbC5jb20wggEiMA0GCSqGSIb3DQEBAQUA -A4IBDwAwggEKAoIBAQC/DMotFLIehEJbzTgfSvJNdRDxtjWf38p9A5jTrN4DZu4q -8diwfW4HVAsQmCFNgMsSIOfMT95FfclydzLqypC7aVIQAy+o85XF8YtiVhvvZ2+k -EEGVrQqb46XAsNJwdlAwW6joCCx87aeieo04KRysx+3yfJWwlYJ9SVw4zXcl772A -dVOUPD3KY1ufFbXTHRMvGdE823Y6zLh9yeXC19pAb9gh3HMbQi1TnP4a/H2rejY/ -mN6EfAVnzmoUOIep8Yy1aMtof3EgK/WgY/VWL6Mm0rdvsVoX1ziZCP6TWG/+wxNJ -CBYLp01nAFIxZyNOmO1RRR25BNkL7Ngos0u97TZ5AgMBAAGjgfgwgfUwHQYDVR0O -BBYEFCeOZxF0wyYdP+0zY7Ok2B0w5ejVMIHFBgNVHSMEgb0wgbqAFCeOZxF0wyYd -P+0zY7Ok2B0w5ejVoYGWpIGTMIGQMQswCQYDVQQGEwJVUzEQMA4GA1UECBMHTW9u -dGFuYTEQMA4GA1UEBxMHQm96ZW1hbjERMA8GA1UEChMIU2F3dG9vdGgxEzARBgNV -BAsTCkNvbnN1bHRpbmcxFjAUBgNVBAMTDXd3dy55YXNzbC5jb20xHTAbBgkqhkiG -9w0BCQEWDmluZm9AeWFzc2wuY29tggkA6dCnX3kl9DwwDAYDVR0TBAUwAwEB/zAN -BgkqhkiG9w0BAQUFAAOCAQEAX4YU9FGLvKVOMNperJr4bNkmS5P54xyJb57us513 -PokgdqPm6IYVIdviM7I01dCf88Gkh5Jc+dH/MC+OA7yzPAwyo5BfGpAer53zntcH -Aql9J2ZjL68Y16wYmIyDjzjzC6w2EHX7ynYTUFsCj3O/46Dug1IlVM4mzpy9L3mr -G2C4kvEDwPw7CNnArdVyCCWAYS3cn6eDYgdH4AdMSwcwBKmHHFV/BxLQy0Jdy89m -ARoX7vkPYLfbb2jlTkFibtNvYE9LJ97PGAfxE13LP6klRNpSXMgE4VYS9SqQTtHi -rwG1I6HsMdp7Y2nEuPPnzqE9wNtt87LZRsifw7hwWh9/yg== +MIIEqjCCA5KgAwIBAgIJAJpBR82hFGKMMA0GCSqGSIb3DQEBBQUAMIGUMQswCQYD +VQQGEwJVUzEQMA4GA1UECAwHTW9udGFuYTEQMA4GA1UEBwwHQm96ZW1hbjERMA8G +A1UECgwIU2F3dG9vdGgxEzARBgNVBAsMCkNvbnN1bHRpbmcxGDAWBgNVBAMMD3d3 +dy53b2xmc3NsLmNvbTEfMB0GCSqGSIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTAe +Fw0xNDA3MTEwMzIwMDhaFw0xNzA0MDYwMzIwMDhaMIGUMQswCQYDVQQGEwJVUzEQ +MA4GA1UECAwHTW9udGFuYTEQMA4GA1UEBwwHQm96ZW1hbjERMA8GA1UECgwIU2F3 +dG9vdGgxEzARBgNVBAsMCkNvbnN1bHRpbmcxGDAWBgNVBAMMD3d3dy53b2xmc3Ns +LmNvbTEfMB0GCSqGSIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTCCASIwDQYJKoZI +hvcNAQEBBQADggEPADCCAQoCggEBAL8Myi0Ush6EQlvNOB9K8k11EPG2NZ/fyn0D +mNOs3gNm7irx2LB9bgdUCxCYIU2AyxIg58xP3kV9yXJ3MurKkLtpUhADL6jzlcXx +i2JWG+9nb6QQQZWtCpvjpcCw0nB2UDBbqOgILHztp6J6jTgpHKzH7fJ8lbCVgn1J +XDjNdyXvvYB1U5Q8PcpjW58VtdMdEy8Z0TzbdjrMuH3J5cLX2kBv2CHccxtCLVOc +/hr8fat6Nj+Y3oR8BWfOahQ4h6nxjLVoy2h/cSAr9aBj9VYvoybSt2+xWhfXOJkI +/pNYb/7DE0kIFgunTWcAUjFnI06Y7VFFHbkE2Qvs2CizS73tNnkCAwEAAaOB/DCB ++TAdBgNVHQ4EFgQUJ45nEXTDJh0/7TNjs6TYHTDl6NUwgckGA1UdIwSBwTCBvoAU +J45nEXTDJh0/7TNjs6TYHTDl6NWhgZqkgZcwgZQxCzAJBgNVBAYTAlVTMRAwDgYD +VQQIDAdNb250YW5hMRAwDgYDVQQHDAdCb3plbWFuMREwDwYDVQQKDAhTYXd0b290 +aDETMBEGA1UECwwKQ29uc3VsdGluZzEYMBYGA1UEAwwPd3d3LndvbGZzc2wuY29t +MR8wHQYJKoZIhvcNAQkBFhBpbmZvQHdvbGZzc2wuY29tggkAmkFHzaEUYowwDAYD +VR0TBAUwAwEB/zANBgkqhkiG9w0BAQUFAAOCAQEAeXgMbXmIkfw6FZz5J2IW8CEf ++n0/oqgyHvfyEal0FnRe3BjK8AAq1QMGJjDxR4P9Mm787apPfQxjYDEvfAy/mWaH +7ScIhi3EM+iYIxz+o9uaSU78WkLvccM/rdxKqNKjHQmsMwR7hvNtAFmjyNvRPHP2 +DpDWXkngvzZjCHulsI81O1aMETVJBBzQ57pWxQ0KkY3Wt2IZNBJSTNJtfMU9DxiB +VMv2POWE0tZxFewaNAvwoCF0Q8ijsN/ZZ9rirZNI+KCHvXkU4GIK3/cxLjF70TIq +Cv5dFO/ZZFDkg5G8cA3XiI3ZvIQOxRqzv2QCTlGRpKKFFYOv8FubKElfsrMD2A== -----END CERTIFICATE----- Certificate: Data: Version: 3 (0x2) Serial Number: - e9:d0:a7:5f:79:25:f4:3c - Signature Algorithm: sha1WithRSAEncryption - Issuer: C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting, CN=www.yassl.com/emailAddress=info@yassl.com + 9a:41:47:cd:a1:14:62:8c + Signature Algorithm: sha1WithRSAEncryption + Issuer: C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting, CN=www.wolfssl.com/emailAddress=info@wolfssl.com Validity - Not Before: Oct 24 18:18:15 2011 GMT - Not After : Jul 20 18:18:15 2014 GMT - Subject: C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting, CN=www.yassl.com/emailAddress=info@yassl.com + Not Before: Jul 11 03:20:08 2014 GMT + Not After : Apr 6 03:20:08 2017 GMT + Subject: C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting, CN=www.wolfssl.com/emailAddress=info@wolfssl.com Subject Public Key Info: Public Key Algorithm: rsaEncryption - RSA Public Key: (2048 bit) - Modulus (2048 bit): + Public-Key: (2048 bit) + Modulus: 00:bf:0c:ca:2d:14:b2:1e:84:42:5b:cd:38:1f:4a: f2:4d:75:10:f1:b6:35:9f:df:ca:7d:03:98:d3:ac: de:03:66:ee:2a:f1:d8:b0:7d:6e:07:54:0b:10:98: @@ -64,24 +64,24 @@ 27:8E:67:11:74:C3:26:1D:3F:ED:33:63:B3:A4:D8:1D:30:E5:E8:D5 X509v3 Authority Key Identifier: keyid:27:8E:67:11:74:C3:26:1D:3F:ED:33:63:B3:A4:D8:1D:30:E5:E8:D5 - DirName:/C=US/ST=Montana/L=Bozeman/O=Sawtooth/OU=Consulting/CN=www.yassl.com/emailAddress=info@yassl.com - serial:E9:D0:A7:5F:79:25:F4:3C + DirName:/C=US/ST=Montana/L=Bozeman/O=Sawtooth/OU=Consulting/CN=www.wolfssl.com/emailAddress=info@wolfssl.com + serial:9A:41:47:CD:A1:14:62:8C X509v3 Basic Constraints: CA:TRUE Signature Algorithm: sha1WithRSAEncryption - 5f:86:14:f4:51:8b:bc:a5:4e:30:da:5e:ac:9a:f8:6c:d9:26: - 4b:93:f9:e3:1c:89:6f:9e:ee:b3:9d:77:3e:89:20:76:a3:e6: - e8:86:15:21:db:e2:33:b2:34:d5:d0:9f:f3:c1:a4:87:92:5c: - f9:d1:ff:30:2f:8e:03:bc:b3:3c:0c:32:a3:90:5f:1a:90:1e: - af:9d:f3:9e:d7:07:02:a9:7d:27:66:63:2f:af:18:d7:ac:18: - 98:8c:83:8f:38:f3:0b:ac:36:10:75:fb:ca:76:13:50:5b:02: - 8f:73:bf:e3:a0:ee:83:52:25:54:ce:26:ce:9c:bd:2f:79:ab: - 1b:60:b8:92:f1:03:c0:fc:3b:08:d9:c0:ad:d5:72:08:25:80: - 61:2d:dc:9f:a7:83:62:07:47:e0:07:4c:4b:07:30:04:a9:87: - 1c:55:7f:07:12:d0:cb:42:5d:cb:cf:66:01:1a:17:ee:f9:0f: - 60:b7:db:6f:68:e5:4e:41:62:6e:d3:6f:60:4f:4b:27:de:cf: - 18:07:f1:13:5d:cb:3f:a9:25:44:da:52:5c:c8:04:e1:56:12: - f5:2a:90:4e:d1:e2:af:01:b5:23:a1:ec:31:da:7b:63:69:c4: - b8:f3:e7:ce:a1:3d:c0:db:6d:f3:b2:d9:46:c8:9f:c3:b8:70: - 5a:1f:7f:ca + 79:78:0c:6d:79:88:91:fc:3a:15:9c:f9:27:62:16:f0:21:1f: + fa:7d:3f:a2:a8:32:1e:f7:f2:11:a9:74:16:74:5e:dc:18:ca: + f0:00:2a:d5:03:06:26:30:f1:47:83:fd:32:6e:fc:ed:aa:4f: + 7d:0c:63:60:31:2f:7c:0c:bf:99:66:87:ed:27:08:86:2d:c4: + 33:e8:98:23:1c:fe:a3:db:9a:49:4e:fc:5a:42:ef:71:c3:3f: + ad:dc:4a:a8:d2:a3:1d:09:ac:33:04:7b:86:f3:6d:00:59:a3: + c8:db:d1:3c:73:f6:0e:90:d6:5e:49:e0:bf:36:63:08:7b:a5: + b0:8f:35:3b:56:8c:11:35:49:04:1c:d0:e7:ba:56:c5:0d:0a: + 91:8d:d6:b7:62:19:34:12:52:4c:d2:6d:7c:c5:3d:0f:18:81: + 54:cb:f6:3c:e5:84:d2:d6:71:15:ec:1a:34:0b:f0:a0:21:74: + 43:c8:a3:b0:df:d9:67:da:e2:ad:93:48:f8:a0:87:bd:79:14: + e0:62:0a:df:f7:31:2e:31:7b:d1:32:2a:0a:fe:5d:14:ef:d9: + 64:50:e4:83:91:bc:70:0d:d7:88:8d:d9:bc:84:0e:c5:1a:b3: + bf:64:02:4e:51:91:a4:a2:85:15:83:af:f0:5b:9b:28:49:5f: + b2:b3:03:d8 Binary files /tmp/QjjFQPSKln/mariadb-5.5-5.5.39/extra/yassl/certs/client-cert.der and /tmp/N_pNtyJsAC/mariadb-5.5-5.5.40/extra/yassl/certs/client-cert.der differ diff -Nru mariadb-5.5-5.5.39/extra/yassl/certs/client-cert.pem mariadb-5.5-5.5.40/extra/yassl/certs/client-cert.pem --- mariadb-5.5-5.5.39/extra/yassl/certs/client-cert.pem 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/certs/client-cert.pem 2014-10-08 13:19:52.000000000 +0000 @@ -2,17 +2,17 @@ Data: Version: 3 (0x2) Serial Number: - 87:4a:75:be:91:66:d8:3d - Signature Algorithm: sha1WithRSAEncryption - Issuer: C=US, ST=Oregon, L=Portland, O=yaSSL, OU=Programming, CN=www.yassl.com/emailAddress=info@yassl.com + b6:63:af:8f:5d:62:57:a0 + Signature Algorithm: sha1WithRSAEncryption + Issuer: C=US, ST=Montana, L=Bozeman, O=wolfSSL, OU=Programming, CN=www.wolfssl.com/emailAddress=info@wolfssl.com Validity - Not Before: Oct 24 18:21:55 2011 GMT - Not After : Jul 20 18:21:55 2014 GMT - Subject: C=US, ST=Oregon, L=Portland, O=yaSSL, OU=Programming, CN=www.yassl.com/emailAddress=info@yassl.com + Not Before: Jul 11 17:39:44 2014 GMT + Not After : Apr 6 17:39:44 2017 GMT + Subject: C=US, ST=Montana, L=Bozeman, O=wolfSSL, OU=Programming, CN=www.wolfssl.com/emailAddress=info@wolfssl.com Subject Public Key Info: Public Key Algorithm: rsaEncryption - RSA Public Key: (2048 bit) - Modulus (2048 bit): + Public-Key: (2048 bit) + Modulus: 00:c3:03:d1:2b:fe:39:a4:32:45:3b:53:c8:84:2b: 2a:7c:74:9a:bd:aa:2a:52:07:47:d6:a6:36:b2:07: 32:8e:d0:ba:69:7b:c6:c3:44:9e:d4:81:48:fd:2d: @@ -37,51 +37,51 @@ 33:D8:45:66:D7:68:87:18:7E:54:0D:70:27:91:C7:26:D7:85:65:C0 X509v3 Authority Key Identifier: keyid:33:D8:45:66:D7:68:87:18:7E:54:0D:70:27:91:C7:26:D7:85:65:C0 - DirName:/C=US/ST=Oregon/L=Portland/O=yaSSL/OU=Programming/CN=www.yassl.com/emailAddress=info@yassl.com - serial:87:4A:75:BE:91:66:D8:3D + DirName:/C=US/ST=Montana/L=Bozeman/O=wolfSSL/OU=Programming/CN=www.wolfssl.com/emailAddress=info@wolfssl.com + serial:B6:63:AF:8F:5D:62:57:A0 X509v3 Basic Constraints: CA:TRUE Signature Algorithm: sha1WithRSAEncryption - 1c:7c:42:81:29:9e:21:cf:d0:d8:c1:54:6f:cc:ae:14:09:38: - ff:68:98:9a:95:53:76:18:7b:e6:30:76:ec:28:0d:75:a7:de: - e0:cd:8e:d5:55:23:6a:47:2b:4e:8d:fc:7d:06:a3:d8:0f:ad: - 5e:d6:04:c9:00:33:fb:77:27:d3:b5:03:b3:7b:21:74:31:0b: - 4a:af:2d:1a:b3:93:8e:cc:f3:5f:3d:90:3f:cc:e3:55:19:91: - 7b:78:24:2e:4a:09:bb:18:4e:61:2d:9c:c6:0a:a0:34:91:88: - 70:6b:3b:48:47:bc:79:94:a2:a0:4d:32:47:54:c2:a3:dc:2e: - d2:51:4c:29:39:11:ff:e2:15:5e:58:97:36:f6:e9:06:06:86: - 0e:8d:9d:95:03:72:b2:8b:19:7c:e9:14:6e:a1:88:73:68:58: - 6d:71:5e:c2:d5:d3:13:d2:5f:de:ea:03:be:e2:00:40:e5:ce: - fd:e6:92:31:57:c3:eb:bb:66:ac:cb:2f:1a:fa:e0:62:a2:47: - f4:93:43:2a:4b:6c:5e:0a:2f:f9:e7:e6:4a:63:86:b0:ac:2a: - a1:eb:b4:5b:67:cd:32:e4:b6:11:4b:9a:72:66:0d:a2:4a:76: - 8f:fe:22:bc:83:fd:db:b7:d5:a9:ee:05:c9:b1:71:7e:1b:2b: - e1:e3:af:c0 + 85:10:90:c5:5d:de:25:8c:f2:57:7b:2d:14:1c:05:f9:71:63: + 40:b0:e3:c1:c1:2e:13:2a:7a:b7:d6:24:58:87:eb:03:fb:0d: + af:e0:f4:d0:c8:bc:51:36:10:4f:79:cc:4f:66:7d:af:99:cb: + 7b:ce:68:94:c6:36:aa:42:6e:8c:78:5b:b2:85:ca:d1:e1:a8: + 31:d1:81:d9:f9:c1:a3:9e:34:43:ef:0a:79:7d:3e:83:61:fc: + 14:5c:d1:dd:bc:0e:d7:51:b7:71:6e:41:7e:8b:2c:5a:9a:cb: + 77:4b:6a:f5:06:ff:02:af:1e:e6:63:4f:bc:44:d9:3f:56:9e: + 09:9c:43:f9:55:21:32:46:82:09:86:a9:7b:74:1c:9e:5a:2a: + bf:03:79:91:cb:f2:29:7f:c9:15:82:89:b9:53:cd:7e:07:90: + a9:5d:76:e1:19:5e:0d:58:b8:59:d5:0d:df:23:ab:6b:63:76: + 19:9e:9c:df:b0:57:49:6c:d0:86:97:c3:6c:3c:fa:e0:56:c2: + 1b:e3:a1:42:1a:58:62:85:9d:74:19:83:08:af:59:90:f8:99: + bd:67:d3:4a:ea:0e:c9:ca:61:8a:0d:8a:42:cc:90:e9:2e:c2: + 54:73:7f:5e:af:8d:e2:32:cb:45:20:d6:19:4d:5b:77:31:cc: + 0f:2d:c0:7e -----BEGIN CERTIFICATE----- -MIIEmDCCA4CgAwIBAgIJAIdKdb6RZtg9MA0GCSqGSIb3DQEBBQUAMIGOMQswCQYD -VQQGEwJVUzEPMA0GA1UECBMGT3JlZ29uMREwDwYDVQQHEwhQb3J0bGFuZDEOMAwG -A1UEChMFeWFTU0wxFDASBgNVBAsTC1Byb2dyYW1taW5nMRYwFAYDVQQDEw13d3cu -eWFzc2wuY29tMR0wGwYJKoZIhvcNAQkBFg5pbmZvQHlhc3NsLmNvbTAeFw0xMTEw -MjQxODIxNTVaFw0xNDA3MjAxODIxNTVaMIGOMQswCQYDVQQGEwJVUzEPMA0GA1UE -CBMGT3JlZ29uMREwDwYDVQQHEwhQb3J0bGFuZDEOMAwGA1UEChMFeWFTU0wxFDAS -BgNVBAsTC1Byb2dyYW1taW5nMRYwFAYDVQQDEw13d3cueWFzc2wuY29tMR0wGwYJ -KoZIhvcNAQkBFg5pbmZvQHlhc3NsLmNvbTCCASIwDQYJKoZIhvcNAQEBBQADggEP -ADCCAQoCggEBAMMD0Sv+OaQyRTtTyIQrKnx0mr2qKlIHR9amNrIHMo7Quml7xsNE -ntSBSP0taKKLZ7uhdcg2LErSG/eLus8N+e/s8YEee5sDR5q/Zcx/ZSRppugUiVvk -NPfFsBST9Wd7Onp44QFWVpGmE0KN0jxAnEzv0YbfN1EbDKE79fGjSjXk4c6W3xt+ -v06X0BDoqAgwga8gC0MUxXRntDKCb42GwohAmTaDuh5AciIX11JlJHOwzu8Zza7/ -eGx7wBID1E5yDVBtO6M7o5lencjZDIWz2YrZVCbbbfqsu/8lTMTRefRx04ZAGBOw -Y7VyTjDEl4SGLVYv1xX3f8Cu9fxb5fuhutMCAwEAAaOB9jCB8zAdBgNVHQ4EFgQU -M9hFZtdohxh+VA1wJ5HHJteFZcAwgcMGA1UdIwSBuzCBuIAUM9hFZtdohxh+VA1w -J5HHJteFZcChgZSkgZEwgY4xCzAJBgNVBAYTAlVTMQ8wDQYDVQQIEwZPcmVnb24x -ETAPBgNVBAcTCFBvcnRsYW5kMQ4wDAYDVQQKEwV5YVNTTDEUMBIGA1UECxMLUHJv -Z3JhbW1pbmcxFjAUBgNVBAMTDXd3dy55YXNzbC5jb20xHTAbBgkqhkiG9w0BCQEW -DmluZm9AeWFzc2wuY29tggkAh0p1vpFm2D0wDAYDVR0TBAUwAwEB/zANBgkqhkiG -9w0BAQUFAAOCAQEAHHxCgSmeIc/Q2MFUb8yuFAk4/2iYmpVTdhh75jB27CgNdafe -4M2O1VUjakcrTo38fQaj2A+tXtYEyQAz+3cn07UDs3shdDELSq8tGrOTjszzXz2Q -P8zjVRmRe3gkLkoJuxhOYS2cxgqgNJGIcGs7SEe8eZSioE0yR1TCo9wu0lFMKTkR -/+IVXliXNvbpBgaGDo2dlQNysosZfOkUbqGIc2hYbXFewtXTE9Jf3uoDvuIAQOXO -/eaSMVfD67tmrMsvGvrgYqJH9JNDKktsXgov+efmSmOGsKwqoeu0W2fNMuS2EUua -cmYNokp2j/4ivIP927fVqe4FybFxfhsr4eOvwA== +MIIEqjCCA5KgAwIBAgIJALZjr49dYlegMA0GCSqGSIb3DQEBBQUAMIGUMQswCQYD +VQQGEwJVUzEQMA4GA1UECAwHTW9udGFuYTEQMA4GA1UEBwwHQm96ZW1hbjEQMA4G +A1UECgwHd29sZlNTTDEUMBIGA1UECwwLUHJvZ3JhbW1pbmcxGDAWBgNVBAMMD3d3 +dy53b2xmc3NsLmNvbTEfMB0GCSqGSIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTAe +Fw0xNDA3MTExNzM5NDRaFw0xNzA0MDYxNzM5NDRaMIGUMQswCQYDVQQGEwJVUzEQ +MA4GA1UECAwHTW9udGFuYTEQMA4GA1UEBwwHQm96ZW1hbjEQMA4GA1UECgwHd29s +ZlNTTDEUMBIGA1UECwwLUHJvZ3JhbW1pbmcxGDAWBgNVBAMMD3d3dy53b2xmc3Ns +LmNvbTEfMB0GCSqGSIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTCCASIwDQYJKoZI +hvcNAQEBBQADggEPADCCAQoCggEBAMMD0Sv+OaQyRTtTyIQrKnx0mr2qKlIHR9am +NrIHMo7Quml7xsNEntSBSP0taKKLZ7uhdcg2LErSG/eLus8N+e/s8YEee5sDR5q/ +Zcx/ZSRppugUiVvkNPfFsBST9Wd7Onp44QFWVpGmE0KN0jxAnEzv0YbfN1EbDKE7 +9fGjSjXk4c6W3xt+v06X0BDoqAgwga8gC0MUxXRntDKCb42GwohAmTaDuh5AciIX +11JlJHOwzu8Zza7/eGx7wBID1E5yDVBtO6M7o5lencjZDIWz2YrZVCbbbfqsu/8l +TMTRefRx04ZAGBOwY7VyTjDEl4SGLVYv1xX3f8Cu9fxb5fuhutMCAwEAAaOB/DCB ++TAdBgNVHQ4EFgQUM9hFZtdohxh+VA1wJ5HHJteFZcAwgckGA1UdIwSBwTCBvoAU +M9hFZtdohxh+VA1wJ5HHJteFZcChgZqkgZcwgZQxCzAJBgNVBAYTAlVTMRAwDgYD +VQQIDAdNb250YW5hMRAwDgYDVQQHDAdCb3plbWFuMRAwDgYDVQQKDAd3b2xmU1NM +MRQwEgYDVQQLDAtQcm9ncmFtbWluZzEYMBYGA1UEAwwPd3d3LndvbGZzc2wuY29t +MR8wHQYJKoZIhvcNAQkBFhBpbmZvQHdvbGZzc2wuY29tggkAtmOvj11iV6AwDAYD +VR0TBAUwAwEB/zANBgkqhkiG9w0BAQUFAAOCAQEAhRCQxV3eJYzyV3stFBwF+XFj +QLDjwcEuEyp6t9YkWIfrA/sNr+D00Mi8UTYQT3nMT2Z9r5nLe85olMY2qkJujHhb +soXK0eGoMdGB2fnBo540Q+8KeX0+g2H8FFzR3bwO11G3cW5BfossWprLd0tq9Qb/ +Aq8e5mNPvETZP1aeCZxD+VUhMkaCCYape3QcnloqvwN5kcvyKX/JFYKJuVPNfgeQ +qV124RleDVi4WdUN3yOra2N2GZ6c37BXSWzQhpfDbDz64FbCG+OhQhpYYoWddBmD +CK9ZkPiZvWfTSuoOycphig2KQsyQ6S7CVHN/Xq+N4jLLRSDWGU1bdzHMDy3Afg== -----END CERTIFICATE----- Binary files /tmp/QjjFQPSKln/mariadb-5.5-5.5.39/extra/yassl/certs/client-key.der and /tmp/N_pNtyJsAC/mariadb-5.5-5.5.40/extra/yassl/certs/client-key.der differ diff -Nru mariadb-5.5-5.5.39/extra/yassl/certs/client-keyEnc.pem mariadb-5.5-5.5.40/extra/yassl/certs/client-keyEnc.pem --- mariadb-5.5-5.5.39/extra/yassl/certs/client-keyEnc.pem 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/certs/client-keyEnc.pem 2014-10-08 13:19:52.000000000 +0000 @@ -1,30 +1,12 @@ -----BEGIN RSA PRIVATE KEY----- Proc-Type: 4,ENCRYPTED -DEK-Info: DES-CBC,B9D8FB94E38635AB +DEK-Info: DES-EDE3-CBC,BDE979D13CCC0ABD -3OTcffWLy2Ddlu2oUwnMWkvIb3e9wLL1jrKOpC0aeb//uiawgw50+KuU4pewB5fN -lfEJwpX4NjfPL+Nk+B1VAVrv5gwk5/SY9SwIJluutzmGS4TfVOhqi2SVd0mc9kOD -cSWQ9ltAohFu67jdx36j2u+eghDTOjls1lM8EpzL5cu3Bp4G+ST0nXAdnGtSZdV6 -eToLWjIHiC/JqeRSsKAlG0M5verw14sbb5MO4ZQF4Tdu0fCFgFvDSUM2V4ZLtS1N -VysLEkHoF56YKZ5H2FYLxOVDpn5lSiLnOgRbteEzsysyJ1zLxXWFFwJPCpLVNL0e -P7OoEoCR+oAdzGkkPF+EdMoULtQP+n6U7jGx3oFVS17NORIFvyxyP0hD4pGTGLnl -qAEk30lhKGAE5GgvA2itxZIno/sxPKr5T5Sc2yWh9RdQuLWYNrOb8Kz8J1iXV5l6 -/5TLGu5XVWIlBnUtjMFUe5M54tqGQ6SuDUlL2ud5YeLVN0T+RU/bqV2fXGoBUqKb -Oe8PECm62Ls0wjv27BIOXXV32WSXwsywSzBqq2YXZ5zc9Q0+Mf1Zl6jKwcr8rXhv -rA2kcpicONryggsPZnn/us1bVuWKndsCbm7A5om6HowpamNMPuxhISigzzE59L6X -X6Sl2F0N0zhrfUVlAAlfYTrwcQVtyBDj1xp2nzJFocurJt9EylLaT0Sw3nxWtuOg -yQuF05UPCzxqow/7dMVqtQKng0ptpsn/E+Kr/Egk1YaTpUUxref6mD3R1S+qWML8 -uqTa3y1CWd4u+aJZH2oZU3gmEd8GvuWnmhsw8iTyq1bzYIga1rQZqh4W5Ok9V+jR -GioT/x3mTIhtuEZ1Cmhne5qM3gWYgM3rC3D4+RnUFeThOC5lHtOYHtIEpg66cs7g -QYAn75ghEkyfG7ZvdxAU9Ngn6hckux9tFu3GmeEtdqhVOHaOMaYi60uGSk6uBnTv -P1sUqi70kMrIBWU7TgldKlTqVdReM87Nkb2O5v1xqtoswLWIi65hFWTqt/H65c1H -aEBG1cBqnqBMYuFk8b4TzZbuU9o1UKj0/6N5mpm//BmW65B0htEDP7IYpGF0mt0H -LkU+4ISmuLfPfQeviYio6/yASaFkHpxfK7N8CQvmyAG9U8FHRio2QCGSb2EO+BnT -Bti6L9oMiQbAsCLWTbvBhCVxdncFw1ncq8gkPMXjEEVUsqAo5Kg+903pRHUyHLzS -R6R3C6tTJnNtucJ0zqQMF3K1FHS1m8GrOm+hskJLTHgZLdz6tFTYkXfZBSCwIl7s -plg0wq9CrNC2B8MczWn/j3/h7qSI3wBNqADHMdoiOHECffCeyGEYjW3+0iMoj1m1 -wY0DIym4DDRzk6wsEesxVi8iiCVpYwWnjJAvWYECEO+hWuwCez+eGVkhCT/5g3xW -hPSRhivNuJT05tdR5o+yqONHn1eAQH7Ar3cj+neY5WC0iS5FK9axTqbHXotofD1e -pJX17ZVWsmIIpRvAWGD+LOcfTMZsaB9DJbkrPSWlMW3lC2S5JOq8OgfMNWIDDUN1 -guwpK5Z/lWV1qMMnaWeDVgPH/G0FssECXlCU5+/Ol654h8tm2bRXYAYHPM+OoW67 +N7yz2JV13EmQ7MZPL5wamid5+G1V1gp8FKqMemAC5JDxonS/W9oViMLUcxbfPTDx +FznKdYSVTIQ7vv3ofmDG4MEyV/2C568N2kdtAw+jTfrZFN+IU9CI+W+In/nacirF +02sAcvDMofustnooKNOO7/iyb5+3vRvEt5vSSRQn5WuSQ9sUKjuzoLs/lbf7fyAt +4NeqfI3rYBZXxiUOLITOGXzGNRuFoY+o2uDCfelLAJ8uhiVG6ME3LeJEo1dT5lZ8 +CSJOLPasKg0iG4V7olM4j9FvAfZr48RRsSfUen756Jo2HpI4bad8LKhFYIdNs2Au +WwKLmjpo6QB9hBmRshR04rEXPdrgTqLBExCE08PyaGYnWU8ggWritCeBzDQFj/n4 +sI+NO0Mymuvg98e5RpO52lg3Xnqv9RIK3guLFOmI6aEHC0PS4WwOEQ== -----END RSA PRIVATE KEY----- Binary files /tmp/QjjFQPSKln/mariadb-5.5-5.5.39/extra/yassl/certs/dsa1024.der and /tmp/N_pNtyJsAC/mariadb-5.5-5.5.40/extra/yassl/certs/dsa1024.der differ diff -Nru mariadb-5.5-5.5.39/extra/yassl/certs/dsa1024.pem mariadb-5.5-5.5.40/extra/yassl/certs/dsa1024.pem --- mariadb-5.5-5.5.39/extra/yassl/certs/dsa1024.pem 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/certs/dsa1024.pem 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,12 @@ +-----BEGIN DSA PRIVATE KEY----- +MIIBvAIBAAKBgQC9Ue5KMuCKx+rG4epwxFFDzyoH4ccSwlglXsRdvqswDRK/oQvT +NNNoWiVxTn3kvQ8qDlhWy9KjGTrqr/ttgmh56FFpe6tz4yTgCNyR9D+eGclD7lNf +dPUc4E3SA6efopG6+ymI55bS+9xUFTG402UCrYSKT59zI2HBfuI6dltsxQIVAJHJ +7WDQ+jBn/nmMyCQzdi+0qJx1AoGBAJJacRK36s5yGY1b6qhxWqvpoAC+SfEKylZn +YWGYf2PM+Iwo6AgPKEw6BSsX+7Nmc4Gjyr4JWhComKi6onPamO/A2CbMM0DCxb47 +BeLBWfqWAgXVj0CODT4MQos5yugnviR/YpEgbzLxvrXr469lKWsAyB19/gFmGmQW +cCgAwGm6AoGBAJ3LY89yHyvQ/TsQ6zlYbovjbk/ogndsMqPdNUvL4RuPTgJP/caa +DDa0XJ7ak6A7TJ+QheLNwOXoZPYJC4EGFSDAXpYniGhbWIrVTCGe6lmZDfnx40WX +S0kk3m/DHaC03ElLAiybxVGxyqoUfbT3Zv1JwftWMuiqHH5uADhdXuXVAhQ01VXa +Rr8IPem35lKghVKnq/kGQw== +-----END DSA PRIVATE KEY----- Binary files /tmp/QjjFQPSKln/mariadb-5.5-5.5.39/extra/yassl/certs/dsa512.der and /tmp/N_pNtyJsAC/mariadb-5.5-5.5.40/extra/yassl/certs/dsa512.der differ diff -Nru mariadb-5.5-5.5.39/extra/yassl/certs/dsa512.pem mariadb-5.5-5.5.40/extra/yassl/certs/dsa512.pem --- mariadb-5.5-5.5.39/extra/yassl/certs/dsa512.pem 2014-08-03 12:00:41.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/certs/dsa512.pem 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ ------BEGIN DSA PRIVATE KEY----- -MIH3AgEAAkEAmSlpgMk8mGhFqYL+Z+uViMW0DNYmRZUZLKAgW37faencww/zYQol -m/IhAWrNqow358pm21b0D3160Ri5Qv0bEQIVAK0lKasKnwkcwa0DIHZ/prfdTQMJ -AkASiJna59ALk5vm7jwhf5yztI2ljOI3gD8X0YFPvfBxtjIIVN2/AeKzdwZkdYoE -1nk5sQIDA8YGdOWQBQoQRhkxAkAEhKAmMXIM6E9dUxdisYDKwBZfwx7qxdmYOPm+ -VlNHaM4IIlccuw13kc9bNu3zJIKQis2QfNt3+Rctc3Pvu7mCAhQjg+e+aqykxwwc -E2V27tjDFY02uA== ------END DSA PRIVATE KEY----- diff -Nru mariadb-5.5-5.5.39/extra/yassl/certs/server-cert.pem mariadb-5.5-5.5.40/extra/yassl/certs/server-cert.pem --- mariadb-5.5-5.5.39/extra/yassl/certs/server-cert.pem 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/certs/server-cert.pem 2014-10-08 13:19:52.000000000 +0000 @@ -1,17 +1,17 @@ Certificate: Data: - Version: 1 (0x0) - Serial Number: 2 (0x2) - Signature Algorithm: sha1WithRSAEncryption - Issuer: C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting, CN=www.yassl.com/emailAddress=info@yassl.com + Version: 3 (0x2) + Serial Number: 1 (0x1) + Signature Algorithm: sha1WithRSAEncryption + Issuer: C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting, CN=www.wolfssl.com/emailAddress=info@wolfssl.com Validity - Not Before: Oct 24 18:27:13 2011 GMT - Not After : Jul 20 18:27:13 2014 GMT - Subject: C=US, ST=Montana, L=Bozeman, O=yaSSL, OU=Support, CN=www.yassl.com/emailAddress=info@yassl.com + Not Before: Jul 11 17:20:14 2014 GMT + Not After : Apr 6 17:20:14 2017 GMT + Subject: C=US, ST=Montana, L=Bozeman, O=wolfSSL, OU=Support, CN=www.wolfssl.com/emailAddress=info@wolfssl.com Subject Public Key Info: Public Key Algorithm: rsaEncryption - RSA Public Key: (2048 bit) - Modulus (2048 bit): + Public-Key: (2048 bit) + Modulus: 00:c0:95:08:e1:57:41:f2:71:6d:b7:d2:45:41:27: 01:65:c6:45:ae:f2:bc:24:30:b8:95:ce:2f:4e:d6: f6:1c:88:bc:7c:9f:fb:a8:67:7f:fe:5c:9c:51:75: @@ -31,59 +31,74 @@ a7:aa:eb:c4:e1:e6:61:83:c5:d2:96:df:d9:d0:4f: ad:d7 Exponent: 65537 (0x10001) + X509v3 extensions: + X509v3 Subject Key Identifier: + B3:11:32:C9:92:98:84:E2:C9:F8:D0:3B:6E:03:42:CA:1F:0E:8E:3C + X509v3 Authority Key Identifier: + keyid:27:8E:67:11:74:C3:26:1D:3F:ED:33:63:B3:A4:D8:1D:30:E5:E8:D5 + DirName:/C=US/ST=Montana/L=Bozeman/O=Sawtooth/OU=Consulting/CN=www.wolfssl.com/emailAddress=info@wolfssl.com + serial:9A:41:47:CD:A1:14:62:8C + + X509v3 Basic Constraints: + CA:TRUE Signature Algorithm: sha1WithRSAEncryption - 71:4e:d3:62:df:cc:4c:f7:cd:b7:6e:52:0b:6c:6e:e0:bd:c2: - 2d:07:d7:c0:b0:6e:43:1e:35:bc:30:01:50:f0:ff:99:23:6c: - 18:1a:41:b6:11:d6:d4:19:61:fd:e4:77:97:1c:39:e1:57:ab: - c5:15:63:77:11:36:5e:74:e2:24:0b:1f:41:78:ad:b7:81:e7: - b4:40:66:80:f0:4b:91:a0:6d:a8:6e:3d:53:d9:8b:ce:2a:e1: - 0b:45:65:87:a1:96:ae:ee:3e:88:d5:12:1f:78:17:ae:2c:c5: - 73:44:d8:dc:f4:af:d8:cc:ae:4c:e1:0c:be:55:a4:99:f7:6e: - 96:c0:c8:45:87:bf:dc:51:57:ff:9e:73:37:6a:18:9c:c3:f9: - 22:7a:f4:b0:52:bd:fc:21:30:f8:c5:ff:1e:87:7d:ad:a2:5a: - 35:f5:22:a8:b4:0a:76:38:e6:76:b0:98:af:1b:ec:8a:0a:43: - 74:d2:85:34:37:84:07:e1:f6:23:b2:29:de:a6:b6:b7:4c:57: - 7e:96:06:cb:a9:16:25:29:3a:03:2d:55:7d:a6:8c:a4:f7:9e: - 81:c9:95:b6:7c:c1:4a:ce:94:66:0c:ca:88:eb:d2:09:f5:5b: - 19:58:82:df:27:fd:67:95:78:b7:02:06:d5:a7:61:bd:ef:3a: - fc:b2:61:cd + 3d:8c:70:05:5b:62:4b:bf:6c:b6:48:61:01:10:1d:5e:05:ba: + 55:94:2c:ae:59:6f:97:80:5d:6c:86:ec:9a:eb:15:45:44:e4: + 56:f8:75:ca:8a:45:32:f4:c7:e1:fa:f2:98:1c:91:d3:3f:e8: + 0e:c9:1b:fa:e1:79:99:67:0e:0d:6b:8a:ec:1a:2c:59:c4:34: + 04:8d:39:77:cd:b5:e9:60:5b:82:bf:34:ce:ed:c6:4f:3f:b4: + 5c:4d:8a:b4:f4:0a:04:12:a0:56:c1:e1:33:37:a1:54:87:48: + e9:81:c2:0f:8f:6f:d3:52:4c:4c:32:4c:6b:9f:3a:04:8f:77: + 5d:ad:dc:3d:2b:f2:c9:df:3c:60:5d:d8:fc:86:72:7c:3d:d0: + 84:4b:8c:df:26:43:fe:c0:cc:5b:e1:36:b3:3d:32:28:a3:ef: + 0c:20:d6:b1:50:39:d6:67:a9:8b:84:bc:92:34:eb:19:23:e8: + 10:8f:ea:bd:18:8c:93:27:3c:74:75:8e:58:04:fa:2a:74:44: + 7d:fc:4d:39:df:54:17:ba:78:e1:5d:6a:70:d3:7c:a2:80:81: + e6:19:51:91:c3:44:51:ec:bb:88:a9:53:e1:d7:a9:8c:28:f4: + 21:1c:42:51:09:b4:12:6d:a0:d6:25:09:85:c6:2a:0c:af:a7: + 58:e6:52:8b -----BEGIN CERTIFICATE----- -MIIDkDCCAngCAQIwDQYJKoZIhvcNAQEFBQAwgZAxCzAJBgNVBAYTAlVTMRAwDgYD -VQQIEwdNb250YW5hMRAwDgYDVQQHEwdCb3plbWFuMREwDwYDVQQKEwhTYXd0b290 -aDETMBEGA1UECxMKQ29uc3VsdGluZzEWMBQGA1UEAxMNd3d3Lnlhc3NsLmNvbTEd -MBsGCSqGSIb3DQEJARYOaW5mb0B5YXNzbC5jb20wHhcNMTExMDI0MTgyNzEzWhcN -MTQwNzIwMTgyNzEzWjCBijELMAkGA1UEBhMCVVMxEDAOBgNVBAgTB01vbnRhbmEx -EDAOBgNVBAcTB0JvemVtYW4xDjAMBgNVBAoTBXlhU1NMMRAwDgYDVQQLEwdTdXBw -b3J0MRYwFAYDVQQDEw13d3cueWFzc2wuY29tMR0wGwYJKoZIhvcNAQkBFg5pbmZv -QHlhc3NsLmNvbTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMCVCOFX -QfJxbbfSRUEnAWXGRa7yvCQwuJXOL07W9hyIvHyf+6hnf/5cnFF194rKB+c1L4/h -vXvAL3yrZKgX/Mpde7rgIeVyLm8uhtiVc9qsG1O5Xz/XGQ0lT+FjY1GLC2Q/rUO4 -pRxcNLOuAKBjxfZ/C1loeHOmjBipAm2vwxkBLrgQ48bMQLRpo0YzaYduxLsXpvPo -3a1zvHsvIbX9ZlEMvVSz4W1fHLwjc9EJA4kU0hC5ZMMq0KGWSrzh1Bpbx6DAwWN4 -D0Q3MDKWgDIjlaF3uhPSl3PiXSXJag3DOWCktLBpQkIJ6dgIvDMgs1gip6rrxOHm -YYPF0pbf2dBPrdcCAwEAATANBgkqhkiG9w0BAQUFAAOCAQEAcU7TYt/MTPfNt25S -C2xu4L3CLQfXwLBuQx41vDABUPD/mSNsGBpBthHW1Blh/eR3lxw54VerxRVjdxE2 -XnTiJAsfQXitt4HntEBmgPBLkaBtqG49U9mLzirhC0Vlh6GWru4+iNUSH3gXrizF -c0TY3PSv2MyuTOEMvlWkmfdulsDIRYe/3FFX/55zN2oYnMP5Inr0sFK9/CEw+MX/ -Hod9raJaNfUiqLQKdjjmdrCYrxvsigpDdNKFNDeEB+H2I7Ip3qa2t0xXfpYGy6kW -JSk6Ay1VfaaMpPeegcmVtnzBSs6UZgzKiOvSCfVbGViC3yf9Z5V4twIG1adhve86 -/LJhzQ== +MIIEnjCCA4agAwIBAgIBATANBgkqhkiG9w0BAQUFADCBlDELMAkGA1UEBhMCVVMx +EDAOBgNVBAgMB01vbnRhbmExEDAOBgNVBAcMB0JvemVtYW4xETAPBgNVBAoMCFNh +d3Rvb3RoMRMwEQYDVQQLDApDb25zdWx0aW5nMRgwFgYDVQQDDA93d3cud29sZnNz +bC5jb20xHzAdBgkqhkiG9w0BCQEWEGluZm9Ad29sZnNzbC5jb20wHhcNMTQwNzEx +MTcyMDE0WhcNMTcwNDA2MTcyMDE0WjCBkDELMAkGA1UEBhMCVVMxEDAOBgNVBAgM +B01vbnRhbmExEDAOBgNVBAcMB0JvemVtYW4xEDAOBgNVBAoMB3dvbGZTU0wxEDAO +BgNVBAsMB1N1cHBvcnQxGDAWBgNVBAMMD3d3dy53b2xmc3NsLmNvbTEfMB0GCSqG +SIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTCCASIwDQYJKoZIhvcNAQEBBQADggEP +ADCCAQoCggEBAMCVCOFXQfJxbbfSRUEnAWXGRa7yvCQwuJXOL07W9hyIvHyf+6hn +f/5cnFF194rKB+c1L4/hvXvAL3yrZKgX/Mpde7rgIeVyLm8uhtiVc9qsG1O5Xz/X +GQ0lT+FjY1GLC2Q/rUO4pRxcNLOuAKBjxfZ/C1loeHOmjBipAm2vwxkBLrgQ48bM +QLRpo0YzaYduxLsXpvPo3a1zvHsvIbX9ZlEMvVSz4W1fHLwjc9EJA4kU0hC5ZMMq +0KGWSrzh1Bpbx6DAwWN4D0Q3MDKWgDIjlaF3uhPSl3PiXSXJag3DOWCktLBpQkIJ +6dgIvDMgs1gip6rrxOHmYYPF0pbf2dBPrdcCAwEAAaOB/DCB+TAdBgNVHQ4EFgQU +sxEyyZKYhOLJ+NA7bgNCyh8OjjwwgckGA1UdIwSBwTCBvoAUJ45nEXTDJh0/7TNj +s6TYHTDl6NWhgZqkgZcwgZQxCzAJBgNVBAYTAlVTMRAwDgYDVQQIDAdNb250YW5h +MRAwDgYDVQQHDAdCb3plbWFuMREwDwYDVQQKDAhTYXd0b290aDETMBEGA1UECwwK +Q29uc3VsdGluZzEYMBYGA1UEAwwPd3d3LndvbGZzc2wuY29tMR8wHQYJKoZIhvcN +AQkBFhBpbmZvQHdvbGZzc2wuY29tggkAmkFHzaEUYowwDAYDVR0TBAUwAwEB/zAN +BgkqhkiG9w0BAQUFAAOCAQEAPYxwBVtiS79stkhhARAdXgW6VZQsrllvl4BdbIbs +musVRUTkVvh1yopFMvTH4frymByR0z/oDskb+uF5mWcODWuK7BosWcQ0BI05d821 +6WBbgr80zu3GTz+0XE2KtPQKBBKgVsHhMzehVIdI6YHCD49v01JMTDJMa586BI93 +Xa3cPSvyyd88YF3Y/IZyfD3QhEuM3yZD/sDMW+E2sz0yKKPvDCDWsVA51mepi4S8 +kjTrGSPoEI/qvRiMkyc8dHWOWAT6KnREffxNOd9UF7p44V1qcNN8ooCB5hlRkcNE +Uey7iKlT4depjCj0IRxCUQm0Em2g1iUJhcYqDK+nWOZSiw== -----END CERTIFICATE----- Certificate: Data: Version: 3 (0x2) Serial Number: - e9:d0:a7:5f:79:25:f4:3c - Signature Algorithm: sha1WithRSAEncryption - Issuer: C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting, CN=www.yassl.com/emailAddress=info@yassl.com + 9a:41:47:cd:a1:14:62:8c + Signature Algorithm: sha1WithRSAEncryption + Issuer: C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting, CN=www.wolfssl.com/emailAddress=info@wolfssl.com Validity - Not Before: Oct 24 18:18:15 2011 GMT - Not After : Jul 20 18:18:15 2014 GMT - Subject: C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting, CN=www.yassl.com/emailAddress=info@yassl.com + Not Before: Jul 11 03:20:08 2014 GMT + Not After : Apr 6 03:20:08 2017 GMT + Subject: C=US, ST=Montana, L=Bozeman, O=Sawtooth, OU=Consulting, CN=www.wolfssl.com/emailAddress=info@wolfssl.com Subject Public Key Info: Public Key Algorithm: rsaEncryption - RSA Public Key: (2048 bit) - Modulus (2048 bit): + Public-Key: (2048 bit) + Modulus: 00:bf:0c:ca:2d:14:b2:1e:84:42:5b:cd:38:1f:4a: f2:4d:75:10:f1:b6:35:9f:df:ca:7d:03:98:d3:ac: de:03:66:ee:2a:f1:d8:b0:7d:6e:07:54:0b:10:98: @@ -104,54 +119,55 @@ 36:79 Exponent: 65537 (0x10001) X509v3 extensions: - X509v3 Subject Key Identifier: + X509v3 Subject Key Identifier: 27:8E:67:11:74:C3:26:1D:3F:ED:33:63:B3:A4:D8:1D:30:E5:E8:D5 - X509v3 Authority Key Identifier: + X509v3 Authority Key Identifier: keyid:27:8E:67:11:74:C3:26:1D:3F:ED:33:63:B3:A4:D8:1D:30:E5:E8:D5 - DirName:/C=US/ST=Montana/L=Bozeman/O=Sawtooth/OU=Consulting/CN=www.yassl.com/emailAddress=info@yassl.com - serial:E9:D0:A7:5F:79:25:F4:3C + DirName:/C=US/ST=Montana/L=Bozeman/O=Sawtooth/OU=Consulting/CN=www.wolfssl.com/emailAddress=info@wolfssl.com + serial:9A:41:47:CD:A1:14:62:8C - X509v3 Basic Constraints: + X509v3 Basic Constraints: CA:TRUE Signature Algorithm: sha1WithRSAEncryption - 5f:86:14:f4:51:8b:bc:a5:4e:30:da:5e:ac:9a:f8:6c:d9:26: - 4b:93:f9:e3:1c:89:6f:9e:ee:b3:9d:77:3e:89:20:76:a3:e6: - e8:86:15:21:db:e2:33:b2:34:d5:d0:9f:f3:c1:a4:87:92:5c: - f9:d1:ff:30:2f:8e:03:bc:b3:3c:0c:32:a3:90:5f:1a:90:1e: - af:9d:f3:9e:d7:07:02:a9:7d:27:66:63:2f:af:18:d7:ac:18: - 98:8c:83:8f:38:f3:0b:ac:36:10:75:fb:ca:76:13:50:5b:02: - 8f:73:bf:e3:a0:ee:83:52:25:54:ce:26:ce:9c:bd:2f:79:ab: - 1b:60:b8:92:f1:03:c0:fc:3b:08:d9:c0:ad:d5:72:08:25:80: - 61:2d:dc:9f:a7:83:62:07:47:e0:07:4c:4b:07:30:04:a9:87: - 1c:55:7f:07:12:d0:cb:42:5d:cb:cf:66:01:1a:17:ee:f9:0f: - 60:b7:db:6f:68:e5:4e:41:62:6e:d3:6f:60:4f:4b:27:de:cf: - 18:07:f1:13:5d:cb:3f:a9:25:44:da:52:5c:c8:04:e1:56:12: - f5:2a:90:4e:d1:e2:af:01:b5:23:a1:ec:31:da:7b:63:69:c4: - b8:f3:e7:ce:a1:3d:c0:db:6d:f3:b2:d9:46:c8:9f:c3:b8:70: - 5a:1f:7f:ca + 79:78:0c:6d:79:88:91:fc:3a:15:9c:f9:27:62:16:f0:21:1f: + fa:7d:3f:a2:a8:32:1e:f7:f2:11:a9:74:16:74:5e:dc:18:ca: + f0:00:2a:d5:03:06:26:30:f1:47:83:fd:32:6e:fc:ed:aa:4f: + 7d:0c:63:60:31:2f:7c:0c:bf:99:66:87:ed:27:08:86:2d:c4: + 33:e8:98:23:1c:fe:a3:db:9a:49:4e:fc:5a:42:ef:71:c3:3f: + ad:dc:4a:a8:d2:a3:1d:09:ac:33:04:7b:86:f3:6d:00:59:a3: + c8:db:d1:3c:73:f6:0e:90:d6:5e:49:e0:bf:36:63:08:7b:a5: + b0:8f:35:3b:56:8c:11:35:49:04:1c:d0:e7:ba:56:c5:0d:0a: + 91:8d:d6:b7:62:19:34:12:52:4c:d2:6d:7c:c5:3d:0f:18:81: + 54:cb:f6:3c:e5:84:d2:d6:71:15:ec:1a:34:0b:f0:a0:21:74: + 43:c8:a3:b0:df:d9:67:da:e2:ad:93:48:f8:a0:87:bd:79:14: + e0:62:0a:df:f7:31:2e:31:7b:d1:32:2a:0a:fe:5d:14:ef:d9: + 64:50:e4:83:91:bc:70:0d:d7:88:8d:d9:bc:84:0e:c5:1a:b3: + bf:64:02:4e:51:91:a4:a2:85:15:83:af:f0:5b:9b:28:49:5f: + b2:b3:03:d8 -----BEGIN CERTIFICATE----- -MIIEnjCCA4agAwIBAgIJAOnQp195JfQ8MA0GCSqGSIb3DQEBBQUAMIGQMQswCQYD -VQQGEwJVUzEQMA4GA1UECBMHTW9udGFuYTEQMA4GA1UEBxMHQm96ZW1hbjERMA8G -A1UEChMIU2F3dG9vdGgxEzARBgNVBAsTCkNvbnN1bHRpbmcxFjAUBgNVBAMTDXd3 -dy55YXNzbC5jb20xHTAbBgkqhkiG9w0BCQEWDmluZm9AeWFzc2wuY29tMB4XDTEx -MTAyNDE4MTgxNVoXDTE0MDcyMDE4MTgxNVowgZAxCzAJBgNVBAYTAlVTMRAwDgYD -VQQIEwdNb250YW5hMRAwDgYDVQQHEwdCb3plbWFuMREwDwYDVQQKEwhTYXd0b290 -aDETMBEGA1UECxMKQ29uc3VsdGluZzEWMBQGA1UEAxMNd3d3Lnlhc3NsLmNvbTEd -MBsGCSqGSIb3DQEJARYOaW5mb0B5YXNzbC5jb20wggEiMA0GCSqGSIb3DQEBAQUA -A4IBDwAwggEKAoIBAQC/DMotFLIehEJbzTgfSvJNdRDxtjWf38p9A5jTrN4DZu4q -8diwfW4HVAsQmCFNgMsSIOfMT95FfclydzLqypC7aVIQAy+o85XF8YtiVhvvZ2+k -EEGVrQqb46XAsNJwdlAwW6joCCx87aeieo04KRysx+3yfJWwlYJ9SVw4zXcl772A -dVOUPD3KY1ufFbXTHRMvGdE823Y6zLh9yeXC19pAb9gh3HMbQi1TnP4a/H2rejY/ -mN6EfAVnzmoUOIep8Yy1aMtof3EgK/WgY/VWL6Mm0rdvsVoX1ziZCP6TWG/+wxNJ -CBYLp01nAFIxZyNOmO1RRR25BNkL7Ngos0u97TZ5AgMBAAGjgfgwgfUwHQYDVR0O -BBYEFCeOZxF0wyYdP+0zY7Ok2B0w5ejVMIHFBgNVHSMEgb0wgbqAFCeOZxF0wyYd -P+0zY7Ok2B0w5ejVoYGWpIGTMIGQMQswCQYDVQQGEwJVUzEQMA4GA1UECBMHTW9u -dGFuYTEQMA4GA1UEBxMHQm96ZW1hbjERMA8GA1UEChMIU2F3dG9vdGgxEzARBgNV -BAsTCkNvbnN1bHRpbmcxFjAUBgNVBAMTDXd3dy55YXNzbC5jb20xHTAbBgkqhkiG -9w0BCQEWDmluZm9AeWFzc2wuY29tggkA6dCnX3kl9DwwDAYDVR0TBAUwAwEB/zAN -BgkqhkiG9w0BAQUFAAOCAQEAX4YU9FGLvKVOMNperJr4bNkmS5P54xyJb57us513 -PokgdqPm6IYVIdviM7I01dCf88Gkh5Jc+dH/MC+OA7yzPAwyo5BfGpAer53zntcH -Aql9J2ZjL68Y16wYmIyDjzjzC6w2EHX7ynYTUFsCj3O/46Dug1IlVM4mzpy9L3mr -G2C4kvEDwPw7CNnArdVyCCWAYS3cn6eDYgdH4AdMSwcwBKmHHFV/BxLQy0Jdy89m -ARoX7vkPYLfbb2jlTkFibtNvYE9LJ97PGAfxE13LP6klRNpSXMgE4VYS9SqQTtHi -rwG1I6HsMdp7Y2nEuPPnzqE9wNtt87LZRsifw7hwWh9/yg== +MIIEqjCCA5KgAwIBAgIJAJpBR82hFGKMMA0GCSqGSIb3DQEBBQUAMIGUMQswCQYD +VQQGEwJVUzEQMA4GA1UECAwHTW9udGFuYTEQMA4GA1UEBwwHQm96ZW1hbjERMA8G +A1UECgwIU2F3dG9vdGgxEzARBgNVBAsMCkNvbnN1bHRpbmcxGDAWBgNVBAMMD3d3 +dy53b2xmc3NsLmNvbTEfMB0GCSqGSIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTAe +Fw0xNDA3MTEwMzIwMDhaFw0xNzA0MDYwMzIwMDhaMIGUMQswCQYDVQQGEwJVUzEQ +MA4GA1UECAwHTW9udGFuYTEQMA4GA1UEBwwHQm96ZW1hbjERMA8GA1UECgwIU2F3 +dG9vdGgxEzARBgNVBAsMCkNvbnN1bHRpbmcxGDAWBgNVBAMMD3d3dy53b2xmc3Ns +LmNvbTEfMB0GCSqGSIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTCCASIwDQYJKoZI +hvcNAQEBBQADggEPADCCAQoCggEBAL8Myi0Ush6EQlvNOB9K8k11EPG2NZ/fyn0D +mNOs3gNm7irx2LB9bgdUCxCYIU2AyxIg58xP3kV9yXJ3MurKkLtpUhADL6jzlcXx +i2JWG+9nb6QQQZWtCpvjpcCw0nB2UDBbqOgILHztp6J6jTgpHKzH7fJ8lbCVgn1J +XDjNdyXvvYB1U5Q8PcpjW58VtdMdEy8Z0TzbdjrMuH3J5cLX2kBv2CHccxtCLVOc +/hr8fat6Nj+Y3oR8BWfOahQ4h6nxjLVoy2h/cSAr9aBj9VYvoybSt2+xWhfXOJkI +/pNYb/7DE0kIFgunTWcAUjFnI06Y7VFFHbkE2Qvs2CizS73tNnkCAwEAAaOB/DCB ++TAdBgNVHQ4EFgQUJ45nEXTDJh0/7TNjs6TYHTDl6NUwgckGA1UdIwSBwTCBvoAU +J45nEXTDJh0/7TNjs6TYHTDl6NWhgZqkgZcwgZQxCzAJBgNVBAYTAlVTMRAwDgYD +VQQIDAdNb250YW5hMRAwDgYDVQQHDAdCb3plbWFuMREwDwYDVQQKDAhTYXd0b290 +aDETMBEGA1UECwwKQ29uc3VsdGluZzEYMBYGA1UEAwwPd3d3LndvbGZzc2wuY29t +MR8wHQYJKoZIhvcNAQkBFhBpbmZvQHdvbGZzc2wuY29tggkAmkFHzaEUYowwDAYD +VR0TBAUwAwEB/zANBgkqhkiG9w0BAQUFAAOCAQEAeXgMbXmIkfw6FZz5J2IW8CEf ++n0/oqgyHvfyEal0FnRe3BjK8AAq1QMGJjDxR4P9Mm787apPfQxjYDEvfAy/mWaH +7ScIhi3EM+iYIxz+o9uaSU78WkLvccM/rdxKqNKjHQmsMwR7hvNtAFmjyNvRPHP2 +DpDWXkngvzZjCHulsI81O1aMETVJBBzQ57pWxQ0KkY3Wt2IZNBJSTNJtfMU9DxiB +VMv2POWE0tZxFewaNAvwoCF0Q8ijsN/ZZ9rirZNI+KCHvXkU4GIK3/cxLjF70TIq +Cv5dFO/ZZFDkg5G8cA3XiI3ZvIQOxRqzv2QCTlGRpKKFFYOv8FubKElfsrMD2A== +-----END CERTIFICATE----- diff -Nru mariadb-5.5-5.5.39/extra/yassl/certs/server-keyEnc.pem mariadb-5.5-5.5.40/extra/yassl/certs/server-keyEnc.pem --- mariadb-5.5-5.5.39/extra/yassl/certs/server-keyEnc.pem 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/certs/server-keyEnc.pem 2014-10-08 13:19:52.000000000 +0000 @@ -1,30 +1,30 @@ -----BEGIN RSA PRIVATE KEY----- Proc-Type: 4,ENCRYPTED -DEK-Info: DES-CBC,08132C1FFF5BC8CC +DEK-Info: DES-CBC,136C7D8A69656668 -W+krChiFlNU+koE0Bep+U45OG4V4IFZv67ex6yJHgcsPd+HQ692A/h+5dYc8rdlW -2LDgSODHHIMTt6RVJDxXxXs3qFmJQbnVXeXxV209X8EfaRarh+yiMKeUP6K8hIvj -+IYRma6iKOs+d4KlcZZudGs2f/x8nhxXbmQtrLhGd4h91mnJk2sKmiz7UkUy6Qng -gOHnT2dfF4Qk2ZYsjisRHjpWZiqh40GO1LuTgUjZoH+LGhMwMwOAE6+ss5xa+yE+ -Xd9Yljm0/QW68JILkCJQjLDRvPGxDJyvYq6TT/kSElsRlI/AuRrZH1YVD3hn/xjx -tDoEB+JEbH6iu9ne2srxnGSKLzoUbb4XPaCjLIW9BJf7oANmmFQpZZQiRTyIUVWi -IE5hJciqF7ra7IwfZAW/PeWGXpzNOVN9QAvyAMsmvUCzJdxd1ySUatjhZ+mSFYGk -rDVtyrgt4ZQgV0EdJV0Yn1ZWMOk1qEKXT0JAnI+9S6Y+QEdwXmdz3xlVuq61Jvub -iJUVepnD/1QeFfWy8JwlscWpWFrkr569f3SNG+FGb6fufnUP7K6sX3urj+pj1QET -f9NmmvLBsVsbj1Egg3wnxbVHIUPky64LY04wtNJaAwhuG6mKCvaClKYMTmTCyrzP -aRwghhMQ3yHUbo2A1ZppYsXXg8lX30eW+5O77N9Q3xfP0phODHXsnXhBH09ml1JQ -MmiCaL5n6sIVcjtFmN/kyaEuz/1VrBSaDCPeW88n61UXUidXrGOZN/2c/2xFir8B -2rdE82lQLl07SJxzQQ6aJVvrc5tnbV/ENDySS5dG6Yl/w89/nuu0RFHmAweKqfGC -8m0XOkmonIk6h3YT7XrkE0b/2jkf1mMaMKrGGfRmxqNt1nGxMCJHAO/Sn9v+I9rU -W7HCZ04RTnRp1BXcqDxdwlveDKJRVfiKOSSEOpEXXlexS5R1vikmxrCwK5YVUTkT -3tgahVtHJkFHnBHBzXyHUDwWahxZaU9TO43z0JFxs0zINWUWppldf0oyWjP1FSrI -a9tXBs7aoykUY9Av9K0p4UJJU005qzD/tuegZFX34wRETJO0BJnlZHTTZSqLSVX+ -KZg4nPq8Xii1VHta3tgw7up2z1tpepsBerTsRQ1+IDpxLaIxgt9am0hXVTiMLex/ -DD9UvQC/eBUmpmWraK/Mqeq/UrPl+lmeoXsG6LWIvEp9d19rJ/3OhIJf2pDh9dC8 -NzJoNP9qOrDajAwzeeF5dbQxCaG+X8am9s4wryC0p+NrQ0tzv8efey0zBodDIOgo -F1G7+ADgHy+V565q8sdL52xx0xB9Ty5p9IOfOUbxa3K65TJf/I/QAQjl4LyTbkfr -kzpYAG2uF55EB3Eq3aMrj47pzZy0ELXXN2qYJ9Oelgl+h6MzYbmd+Wm+A2Cofv3u -7ANAyjAYN7/Lo3lTFAt7sXAXGKnqw62JNSSMkIqZVrG5dn7Jxj5AJCVyYxTrm6Y+ -DDcblX47XrWxVoVJN/dLJZ8FzWs4o/8w9Yn8U54Ci7F0g+j2f+OpDy9PGFYT9pKw -xWG8chkYE6QPilEYvdi26ZnZ3u236q9PMtyRP87NmBN2sLkj/rbBTzBxWIaGS+Mt +jvNTyPaztxPIoAzbdmZnD0Zw2+60tMxNc0GMHNmeOyG25aHP/dT+TWiKFpFVkkkY +uoCIhYUyw7gmpw+CnRJwWd+ans4nrvAjwy5oWJvarvsyUpjqvnPoIlAqd+d4TDKN +eESzcI76+gHdisAtCrQD+fGqgTZhli5TgDbnpasL/QnY2qDlutvakkVw7gPXe156 +2Phy8WN+efr65J6wt3K/dj7Datl9u4JeHQK81gYyWBVX+EagEjPGDzkFQCj9Z0q7 +8K3iB5GW1JAqJS0IfZPB40AnSTF/n1TL1SN3qfU3l7hTGNrx9o7580bgDEoAR7pI +F8eZlS15KHtZmh11AnU1KTKZ6kmgnNqeMTGMN6N0ct2wMKW1dV87eTDlF0oiR2ol +XwtFgKmrIjfpmzkdWjbJmWnGMjD56KdiFZga/ZyKMsPrVoYLgfJEpn36iQspfygx +HCGNTf0PjIsjEWU0WyQiF86t+c45W3wNFsv/AxVyfMl+su02yrd6u2ecuQDir3Cs +b2k8IKtQgVe/NIpEWLKuiHG5oedIPPQyDYK5uq+gHxCGeOoKnWlsWFEHZRiza4X5 +tbgTrJB8Sw0ENWrvVGGmQZN4pSImlsMwzQ2qik5CQ00N1b3+56/obn0z75I3bUSb +tC5g8DRjl6oclAenNgh/MYMT287y5W2dD4npxHcekX4O3J2CDXNfg4vV2j5GRxtg +LVJdYE2p7bpYePCDHrYng8b9ubBprx0CrEnkIvvtUjzNPf6VDL0+MBKl+XgR2/nz +iRqTuZnlGGOyM+KYDwXpgwfs/HfvFGksxTAlO/40GkGh+WGPaIoNyCK0SgQKhyb4 +JIkR0vd2/yLg3lWMJrGwh7A0Gm07Z/781oURP3uWd+PaCOgGcd5ipcAjcEyuxNly +AthipWqmQWUcbf6Z2N9j3OA22Hv2Uzk8HSfi9VOZtL9svdEEZ0NnOekJgnc6stQp +bXiknlK/T5WdrWxSyCfgUq68Vf6DFfIRAVuFdJ3WHT2wVXHrDfft6D+Ne/XCxPoE +8zGmkyusaph33UHQ1oNyUbLbwcDCDSmOo8gYoedD3IwxtMA3wJRugomqosItwV8X +vkgmcy8eSE/+gZUxJEN2gnLcfKFhCkC80J6oFhmoDD6vuUnPHcFdKZgVPw2rzPk5 +Vb1kX+gpORplYmKpq1vz/ujscL4T0TmYLz02hkIS4edpW55ncTTv7JWefpRiTB1J +RB3td3me4htqR+YIDWJ+emrOmqsCG2WvpAS+MTw2mj1jYk9LL/ZYobTjSCEWmuwT +yVK6m303irR7HQDauxhslRFgoK21w63viOyj5NKIU1gQtaAANGDxcgORC1XLjjgt +oNutSQA+7P42vfHSHK4cnTBXl6V32H/GyVpdHQOZqSrqIjgLmUZodSmRPROxosZF +a46B1O7m/rJFxkiKW4vod+/WqjoE0Hhfrb8rRrkRjzGeCqqSSnQ3vrunVkvF8hlA +b6FOv4ZBJL4piC1GKH+rscqke9NEiDqXN8C3iYz86jbck/Ha21yUS8T3X7N52sg+ +B3AmOGnLK6BebYeto9vZxQjacChJZSixSxLV+l9/nVQ0+mW42azHdzk0ru59TGAj -----END RSA PRIVATE KEY----- diff -Nru mariadb-5.5-5.5.39/extra/yassl/include/buffer.hpp mariadb-5.5-5.5.40/extra/yassl/include/buffer.hpp --- mariadb-5.5-5.5.39/extra/yassl/include/buffer.hpp 2014-08-03 12:00:37.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/include/buffer.hpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -48,7 +48,11 @@ struct NoCheck { - void check(uint, uint); + int check(uint, uint); +}; + +struct Check { + int check(uint, uint); }; /* input_buffer operates like a smart c style array with a checking option, @@ -60,11 +64,13 @@ * write to the buffer bulk wise and have the correct size */ -class input_buffer : public NoCheck { +class input_buffer : public Check { uint size_; // number of elements in buffer uint current_; // current offset position in buffer byte* buffer_; // storage for buffer byte* end_; // end of storage marker + int error_; // error number + byte zero_; // for returning const reference to zero byte public: input_buffer(); @@ -93,6 +99,10 @@ uint get_remaining() const; + int get_error() const; + + void set_error(); + void set_current(uint i); // read only access through [], advance current @@ -103,7 +113,7 @@ bool eof(); // peek ahead - byte peek() const; + byte peek(); // write function, should use at/near construction void assign(const byte* t, uint s); diff -Nru mariadb-5.5-5.5.39/extra/yassl/include/openssl/ssl.h mariadb-5.5-5.5.40/extra/yassl/include/openssl/ssl.h --- mariadb-5.5-5.5.39/extra/yassl/include/openssl/ssl.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/include/openssl/ssl.h 2014-10-08 13:19:52.000000000 +0000 @@ -35,7 +35,7 @@ #include "rsa.h" -#define YASSL_VERSION "2.3.0" +#define YASSL_VERSION "2.3.4" #if defined(__cplusplus) diff -Nru mariadb-5.5-5.5.39/extra/yassl/README mariadb-5.5-5.5.40/extra/yassl/README --- mariadb-5.5-5.5.39/extra/yassl/README 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/README 2014-10-08 13:19:52.000000000 +0000 @@ -12,15 +12,31 @@ *** end Note *** -yaSSL Release notes, version 2.3.0 (12/5/2013) +yaSSL Release notes, version 2.3.4 (8/15/2014) - This release of yaSSL updates asm for newer GCC versions. + This release of yaSSL adds checking to the input_buffer class itself. See normal build instructions below under 1.0.6. See libcurl build instructions below under 1.3.0 and note in 1.5.8. -*****************yaSSL Release notes, version 2.2.3b (4/23/2013) +yaSSL Release notes, version 2.3.2 (7/25/2014) + + This release of yaSSL updates test certs. + +See normal build instructions below under 1.0.6. +See libcurl build instructions below under 1.3.0 and note in 1.5.8. + + +*****************yaSSL Release notes, version 2.3.0 (12/5/2013) + + This release of yaSSL updates asm for newer GCC versions. + +See normal build instructions below under 1.0.6. +See libcurl build instructions below under 1.3.0 and note in 1.5.8. + + +*****************yaSSL Release notes, version 2.2.3 (4/23/2013) This release of yaSSL updates the test certificates as they were expired diff -Nru mariadb-5.5-5.5.39/extra/yassl/src/buffer.cpp mariadb-5.5-5.5.40/extra/yassl/src/buffer.cpp --- mariadb-5.5-5.5.39/extra/yassl/src/buffer.cpp 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/src/buffer.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,8 +35,19 @@ -void NoCheck::check(uint, uint) +/* return 0 on check success, always true for NoCheck policy */ +int NoCheck::check(uint, uint) { + return 0; +} + +/* return 0 on check success */ +int Check::check(uint i, uint max) +{ + if (i < max) + return 0; + + return -1; } @@ -51,18 +62,20 @@ input_buffer::input_buffer() - : size_(0), current_(0), buffer_(0), end_(0) + : size_(0), current_(0), buffer_(0), end_(0), error_(0), zero_(0) {} input_buffer::input_buffer(uint s) - : size_(0), current_(0), buffer_(NEW_YS byte[s]), end_(buffer_ + s) + : size_(0), current_(0), buffer_(NEW_YS byte[s]), end_(buffer_ + s), + error_(0), zero_(0) {} // with assign input_buffer::input_buffer(uint s, const byte* t, uint len) - : size_(0), current_(0), buffer_(NEW_YS byte[s]), end_(buffer_ + s) + : size_(0), current_(0), buffer_(NEW_YS byte[s]), end_(buffer_ + s), + error_(0), zero_(0) { assign(t, len); } @@ -77,8 +90,10 @@ // users can pass defualt zero length buffer and then allocate void input_buffer::allocate(uint s) { - buffer_ = NEW_YS byte[s]; - end_ = buffer_ + s; + if (error_ == 0) { + buffer_ = NEW_YS byte[s]; + end_ = buffer_ + s; + } } @@ -93,40 +108,67 @@ // if you know the size before the write use assign() void input_buffer::add_size(uint i) { - check(size_ + i-1, get_capacity()); - size_ += i; + if (error_ == 0 && check(size_ + i-1, get_capacity()) == 0) + size_ += i; + else + error_ = -1; } uint input_buffer::get_capacity() const { - return (uint) (end_ - buffer_); + if (error_ == 0) + return end_ - buffer_; + + return 0; } uint input_buffer::get_current() const { - return current_; + if (error_ == 0) + return current_; + + return 0; } uint input_buffer::get_size() const { - return size_; + if (error_ == 0) + return size_; + + return 0; } uint input_buffer::get_remaining() const { - return size_ - current_; + if (error_ == 0) + return size_ - current_; + + return 0; +} + + +int input_buffer::get_error() const +{ + return error_; +} + + +void input_buffer::set_error() +{ + error_ = -1; } void input_buffer::set_current(uint i) { - if (i) - check(i - 1, size_); - current_ = i; + if (error_ == 0 && i && check(i - 1, size_) == 0) + current_ = i; + else + error_ = -1; } @@ -134,40 +176,59 @@ // user passes in AUTO index for ease of use const byte& input_buffer::operator[](uint i) { - check(current_, size_); - return buffer_[current_++]; + if (error_ == 0 && check(current_, size_) == 0) + return buffer_[current_++]; + + error_ = -1; + return zero_; } // end of input test bool input_buffer::eof() { + if (error_ != 0) + return true; + return current_ >= size_; } // peek ahead -byte input_buffer::peek() const +byte input_buffer::peek() { - return buffer_[current_]; + if (error_ == 0 && check(current_, size_) == 0) + return buffer_[current_]; + + error_ = -1; + return 0; } // write function, should use at/near construction void input_buffer::assign(const byte* t, uint s) { - check(current_, get_capacity()); - add_size(s); - memcpy(&buffer_[current_], t, s); + if (t && error_ == 0 && check(current_, get_capacity()) == 0) { + add_size(s); + if (error_ == 0) { + memcpy(&buffer_[current_], t, s); + return; // success + } + } + + error_ = -1; } // use read to query input, adjusts current void input_buffer::read(byte* dst, uint length) { - check(current_ + length - 1, size_); - memcpy(dst, &buffer_[current_], length); - current_ += length; + if (dst && error_ == 0 && check(current_ + length - 1, size_) == 0) { + memcpy(dst, &buffer_[current_], length); + current_ += length; + } else { + error_ = -1; + } } diff -Nru mariadb-5.5-5.5.39/extra/yassl/src/handshake.cpp mariadb-5.5-5.5.40/extra/yassl/src/handshake.cpp --- mariadb-5.5-5.5.39/extra/yassl/src/handshake.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/src/handshake.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -522,7 +522,7 @@ // some clients still send sslv2 client hello void ProcessOldClientHello(input_buffer& input, SSL& ssl) { - if (input.get_remaining() < 2) { + if (input.get_error() || input.get_remaining() < 2) { ssl.SetError(bad_input); return; } @@ -549,20 +549,24 @@ byte len[2]; - input.read(len, sizeof(len)); + len[0] = input[AUTO]; + len[1] = input[AUTO]; ato16(len, ch.suite_len_); - input.read(len, sizeof(len)); + len[0] = input[AUTO]; + len[1] = input[AUTO]; uint16 sessionLen; ato16(len, sessionLen); ch.id_len_ = sessionLen; - input.read(len, sizeof(len)); + len[0] = input[AUTO]; + len[1] = input[AUTO]; uint16 randomLen; ato16(len, randomLen); - if (ch.suite_len_ > MAX_SUITE_SZ || sessionLen > ID_LEN || - randomLen > RAN_LEN) { + if (input.get_error() || ch.suite_len_ > MAX_SUITE_SZ || + ch.suite_len_ > input.get_remaining() || + sessionLen > ID_LEN || randomLen > RAN_LEN) { ssl.SetError(bad_input); return; } @@ -580,13 +584,12 @@ ch.suite_len_ = j; if (ch.id_len_) - input.read(ch.session_id_, ch.id_len_); + input.read(ch.session_id_, ch.id_len_); // id_len_ from sessionLen if (randomLen < RAN_LEN) memset(ch.random_, 0, RAN_LEN - randomLen); input.read(&ch.random_[RAN_LEN - randomLen], randomLen); - ch.Process(input, ssl); } @@ -787,6 +790,9 @@ ssl.verifyState(hdr); } + if (ssl.GetError()) + return 0; + // make sure we have enough input in buffer to process this record if (needHdr || hdr.length_ > buffer.get_remaining()) { // put header in front for next time processing @@ -799,6 +805,9 @@ while (buffer.get_current() < hdr.length_ + RECORD_HEADER + offset) { // each message in record, can be more than 1 if not encrypted + if (ssl.GetError()) + return 0; + if (ssl.getSecurity().get_parms().pending_ == false) { // cipher on // sanity check for malicious/corrupted/illegal input if (buffer.get_remaining() < hdr.length_) { diff -Nru mariadb-5.5-5.5.39/extra/yassl/src/yassl_imp.cpp mariadb-5.5-5.5.40/extra/yassl/src/yassl_imp.cpp --- mariadb-5.5-5.5.39/extra/yassl/src/yassl_imp.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/src/yassl_imp.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -220,16 +220,26 @@ // read PreMaster secret and decrypt, server side void EncryptedPreMasterSecret::read(SSL& ssl, input_buffer& input) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } + const CertManager& cert = ssl.getCrypto().get_certManager(); RSA rsa(cert.get_privateKey(), cert.get_privateKeyLength(), false); uint16 cipherLen = rsa.get_cipherLength(); if (ssl.isTLS()) { byte len[2]; - input.read(len, sizeof(len)); + len[0] = input[AUTO]; + len[1] = input[AUTO]; ato16(len, cipherLen); } alloc(cipherLen); input.read(secret_, length_); + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } opaque preMasterSecret[SECRET_LEN]; rsa.decrypt(preMasterSecret, secret_, length_, @@ -277,6 +287,11 @@ // read client's public key, server side void ClientDiffieHellmanPublic::read(SSL& ssl, input_buffer& input) { + if (input.get_error() || input.get_remaining() < (uint)LENGTH_SZ) { + ssl.SetError(bad_input); + return; + } + DiffieHellman& dh = ssl.useCrypto().use_dh(); uint16 keyLength; @@ -287,6 +302,10 @@ alloc(keyLength); input.read(Yc_, keyLength); + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } dh.makeAgreement(Yc_, keyLength); // because of encoding, first byte might be 0, don't use for preMaster @@ -331,6 +350,10 @@ // read server's p, g, public key and sig, client side void DH_Server::read(SSL& ssl, input_buffer& input) { + if (input.get_error() || input.get_remaining() < (uint)LENGTH_SZ) { + ssl.SetError(bad_input); + return; + } uint16 length, messageTotal = 6; // pSz + gSz + pubSz byte tmp[2]; @@ -341,6 +364,10 @@ messageTotal += length; input.read(parms_.alloc_p(length), length); + if (input.get_error() || input.get_remaining() < (uint)LENGTH_SZ) { + ssl.SetError(bad_input); + return; + } // g tmp[0] = input[AUTO]; @@ -349,6 +376,10 @@ messageTotal += length; input.read(parms_.alloc_g(length), length); + if (input.get_error() || input.get_remaining() < (uint)LENGTH_SZ) { + ssl.SetError(bad_input); + return; + } // pub tmp[0] = input[AUTO]; @@ -357,12 +388,20 @@ messageTotal += length; input.read(parms_.alloc_pub(length), length); + if (input.get_error() || input.get_remaining() < (uint)LENGTH_SZ) { + ssl.SetError(bad_input); + return; + } // save message for hash verify input_buffer message(messageTotal); input.set_current(input.get_current() - messageTotal); input.read(message.get_buffer(), messageTotal); message.add_size(messageTotal); + if (input.get_error() || input.get_remaining() < (uint)LENGTH_SZ) { + ssl.SetError(bad_input); + return; + } // signature tmp[0] = input[AUTO]; @@ -371,6 +410,10 @@ signature_ = NEW_YS byte[length]; input.read(signature_, length); + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } // verify signature byte hash[FINISHED_SZ]; @@ -645,6 +688,10 @@ { ssl.verifyState(*this); if (ssl.GetError()) return; + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } const HandShakeFactory& hsf = ssl.getFactory().getHandShake(); mySTL::auto_ptr hs(hsf.CreateObject(type_)); if (!hs.get()) { @@ -810,8 +857,13 @@ // CipherSpec processing handler -void ChangeCipherSpec::Process(input_buffer&, SSL& ssl) +void ChangeCipherSpec::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } + ssl.useSecurity().use_parms().pending_ = false; if (ssl.getSecurity().get_resuming()) { if (ssl.getSecurity().get_parms().entity_ == client_end) @@ -873,6 +925,11 @@ // Alert processing handler void Alert::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } + if (ssl.getSecurity().get_parms().pending_ == false) { // encrypted alert int aSz = get_length(); // alert size already read on input opaque verify[SHA_LEN]; @@ -890,12 +947,19 @@ if (ssl.getSecurity().get_parms().cipher_type_ == block) { int ivExtra = 0; + opaque fill; if (ssl.isTLSv1_1()) ivExtra = ssl.getCrypto().get_cipher().get_blockSize(); int padSz = ssl.getSecurity().get_parms().encrypt_size_ - ivExtra - aSz - digestSz; - input.set_current(input.get_current() + padSz); + for (int i = 0; i < padSz; i++) + fill = input[AUTO]; + } + + if (input.get_error()) { + ssl.SetError(bad_input); + return; } // verify @@ -1112,6 +1176,11 @@ // Process handler for Data void Data::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } + int msgSz = ssl.getSecurity().get_parms().encrypt_size_; int pad = 0, padSz = 0; int ivExtra = 0; @@ -1154,7 +1223,7 @@ int dataSz = msgSz - ivExtra - digestSz - pad - padSz; - if (dataSz < 0) { + if (dataSz < 0 || dataSz > (MAX_RECORD_SIZE + COMPRESS_EXTRA)) { ssl.SetError(bad_input); return; } @@ -1180,6 +1249,10 @@ // advance past mac and fill input.set_current(input.get_current() + digestSz + pad + padSz); + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } } @@ -1244,6 +1317,11 @@ // certificate processing handler void Certificate::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } + CertManager& cm = ssl.useCrypto().use_certManager(); uint32 list_sz; @@ -1412,6 +1490,10 @@ // Session hello.id_len_ = input[AUTO]; + if (hello.id_len_ > ID_LEN) { + input.set_error(); + return input; + } if (hello.id_len_) input.read(hello.session_id_, hello.id_len_); @@ -1452,8 +1534,13 @@ // Server Hello processing handler -void ServerHello::Process(input_buffer&, SSL& ssl) +void ServerHello::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } + if (ssl.GetMultiProtocol()) { // SSLv23 support if (ssl.isTLS() && server_version_.minor_ < 1) // downgrade to SSLv3 @@ -1547,8 +1634,12 @@ // Server Hello Done processing handler -void ServerHelloDone::Process(input_buffer&, SSL& ssl) +void ServerHelloDone::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } ssl.useStates().useClient() = serverHelloDoneComplete; } @@ -1667,8 +1758,13 @@ // Client Hello processing handler -void ClientHello::Process(input_buffer&, SSL& ssl) +void ClientHello::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } + // store version for pre master secret ssl.useSecurity().use_connection().chVersion_ = client_version_; @@ -1800,9 +1896,17 @@ // Server Key Exchange processing handler void ServerKeyExchange::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } createKey(ssl); if (ssl.GetError()) return; server_key_->read(ssl, input); + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } ssl.useStates().useClient() = serverKeyExchangeComplete; } @@ -1924,18 +2028,24 @@ { // types request.typeTotal_ = input[AUTO]; + if (request.typeTotal_ > CERT_TYPES) { + input.set_error(); + return input; + } for (int i = 0; i < request.typeTotal_; i++) request.certificate_types_[i] = ClientCertificateType(input[AUTO]); - byte tmp[REQUEST_HEADER]; - input.read(tmp, sizeof(tmp)); + byte tmp[2]; + tmp[0] = input[AUTO]; + tmp[1] = input[AUTO]; uint16 sz; ato16(tmp, sz); // authorities while (sz) { uint16 dnSz; - input.read(tmp, sizeof(tmp)); + tmp[0] = input[AUTO]; + tmp[1] = input[AUTO]; ato16(tmp, dnSz); DistinguishedName dn; @@ -1945,6 +2055,9 @@ input.read(&dn[REQUEST_HEADER], dnSz); sz -= dnSz + REQUEST_HEADER; + + if (input.get_error()) + break; } return input; @@ -1983,8 +2096,12 @@ // CertificateRequest processing handler -void CertificateRequest::Process(input_buffer&, SSL& ssl) +void CertificateRequest::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } CertManager& cm = ssl.useCrypto().use_certManager(); cm.setSendVerify(); @@ -2067,7 +2184,8 @@ input_buffer& operator>>(input_buffer& input, CertificateVerify& request) { byte tmp[VERIFY_HEADER]; - input.read(tmp, sizeof(tmp)); + tmp[0] = input[AUTO]; + tmp[1] = input[AUTO]; uint16 sz = 0; ato16(tmp, sz); @@ -2091,8 +2209,13 @@ // CertificateVerify processing handler -void CertificateVerify::Process(input_buffer&, SSL& ssl) +void CertificateVerify::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } + const Hashes& hashVerify = ssl.getHashes().get_certVerify(); const CertManager& cert = ssl.getCrypto().get_certManager(); @@ -2131,9 +2254,17 @@ // Client Key Exchange processing handler void ClientKeyExchange::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } createKey(ssl); if (ssl.GetError()) return; client_key_->read(ssl, input); + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } if (ssl.getCrypto().get_certManager().verifyPeer()) build_certHashes(ssl, ssl.useHashes().use_certVerify()); @@ -2220,11 +2351,19 @@ // Finished processing handler void Finished::Process(input_buffer& input, SSL& ssl) { + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } // verify hashes const Finished& verify = ssl.getHashes().get_verify(); uint finishedSz = ssl.isTLS() ? TLS_FINISHED_SZ : FINISHED_SZ; input.read(hashes_.md5_, finishedSz); + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } if (memcmp(&hashes_, &verify.hashes_, finishedSz)) { ssl.SetError(verify_error); @@ -2246,19 +2385,23 @@ opaque mac[SHA_LEN]; // max size int digestSz = ssl.getCrypto().get_digest().get_digestSize(); input.read(mac, digestSz); + if (input.get_error()) { + ssl.SetError(bad_input); + return; + } uint ivExtra = 0; if (ssl.getSecurity().get_parms().cipher_type_ == block) if (ssl.isTLSv1_1()) ivExtra = ssl.getCrypto().get_cipher().get_blockSize(); + opaque fill; int padSz = ssl.getSecurity().get_parms().encrypt_size_ - ivExtra - HANDSHAKE_HEADER - finishedSz - digestSz; - input.set_current(input.get_current() + padSz); - - // verify mac - if (memcmp(mac, verifyMAC, digestSz)) { - ssl.SetError(verify_error); + for (int i = 0; i < padSz; i++) + fill = input[AUTO]; + if (input.get_error()) { + ssl.SetError(bad_input); return; } diff -Nru mariadb-5.5-5.5.39/extra/yassl/src/yassl_int.cpp mariadb-5.5-5.5.40/extra/yassl/src/yassl_int.cpp --- mariadb-5.5-5.5.39/extra/yassl/src/yassl_int.cpp 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/src/yassl_int.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2005, 2012, Oracle and/or its affiliates + Copyright (c) 2005, 2014, Oracle and/or its affiliates This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2534,8 +2534,9 @@ int DeCompress(input_buffer& in, int sz, input_buffer& out) { byte tmp[LENGTH_SZ]; - - in.read(tmp, sizeof(tmp)); + + tmp[0] = in[AUTO]; + tmp[1] = in[AUTO]; uint16 len; ato16(tmp, len); diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/asn.hpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/asn.hpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/asn.hpp 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/asn.hpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -111,7 +111,7 @@ MAX_LENGTH_SZ = 5, MAX_SEQ_SZ = 5, // enum(seq|con) + length(4) MAX_ALGO_SIZE = 9, - MAX_DIGEST_SZ = 25, // SHA + enum(Bit or Octet) + length(4) + MAX_DIGEST_SZ = 69, // SHA512 + enum(Bit or Octet) + length(4) DSA_SIG_SZ = 40, ASN_NAME_MAX = 512 // max total of all included names }; @@ -257,8 +257,11 @@ enum ContentType { HUH = 651 }; -enum SigType { SHAwDSA = 517, MD2wRSA = 646, MD5wRSA = 648, SHAwRSA =649}; -enum HashType { MD2h = 646, MD5h = 649, SHAh = 88 }; +enum SigType { SHAwDSA = 517, MD2wRSA = 646, MD5wRSA = 648, SHAwRSA = 649, + SHA256wRSA = 655, SHA384wRSA = 656, SHA512wRSA = 657, + SHA256wDSA = 416 }; +enum HashType { MD2h = 646, MD5h = 649, SHAh = 88, SHA256h = 414, SHA384h = 415, + SHA512h = 416 }; enum KeyType { DSAk = 515, RSAk = 645 }; // sums of algo OID diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/block.hpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/block.hpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/block.hpp 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/block.hpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -74,7 +74,7 @@ if (preserve) { A b = A(); typename A::pointer newPointer = b.allocate(newSize, 0); - memcpy(newPointer, p, sizeof(T) * min((word32) oldSize, (word32) newSize)); + memcpy(newPointer, p, sizeof(T) * min(oldSize, newSize)); a.deallocate(p, oldSize); STL::swap(a, b); return newPointer; @@ -187,9 +187,9 @@ ~Block() { allocator_.deallocate(buffer_, sz_); } private: + A allocator_; word32 sz_; // size in Ts T* buffer_; - A allocator_; }; diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/integer.hpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/integer.hpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/integer.hpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/integer.hpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,7 +47,7 @@ #ifdef TAOCRYPT_X86ASM_AVAILABLE #if defined(__GNUC__) && (__GNUC__ >= 4) - // GCC 4 or greater optimizes too much inline on recursive for bigint, + // GCC 4 or greater optimizes too much inline on recursive for bigint, // -O3 just as fast without asm here anyway #undef TAOCRYPT_X86ASM_AVAILABLE #endif diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/pwdbased.hpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/pwdbased.hpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/pwdbased.hpp 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/pwdbased.hpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,6 +16,7 @@ MA 02110-1301 USA. */ + /* pwdbased.hpp defines PBKDF2 from PKCS #5 */ diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/runtime.hpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/runtime.hpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/runtime.hpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/runtime.hpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2005, 2012, Oracle and/or its affiliates + Copyright (c) 2005, 2014, Oracle and/or its affiliates This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,7 +34,10 @@ // Handler for pure virtual functions namespace __Crun { - void pure_error(void); + static void pure_error(void) + { + // "Pure virtual method called, Aborted", GCC 4.2 str cmp fix + } } // namespace __Crun #endif // __sun @@ -48,7 +51,15 @@ #if defined(DO_TAOCRYPT_KERNEL_MODE) #include "kernelc.hpp" #endif - int __cxa_pure_virtual () __attribute__ ((weak)); + +/* Disallow inline __cxa_pure_virtual() */ +static int __cxa_pure_virtual() __attribute__((noinline, used)); +static int __cxa_pure_virtual() +{ + // oops, pure virtual called! + return 0; +} + } // extern "C" #endif // __GNUC__ > 2 diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/sha.hpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/sha.hpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/include/sha.hpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/include/sha.hpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,5 @@ /* - Copyright (C) 2000-2007 MySQL AB - Use is subject to license terms + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -159,6 +158,12 @@ void Transform(); }; +enum { MAX_SHA2_DIGEST_SIZE = 64 }; // SHA512 + +#else + +enum { MAX_SHA2_DIGEST_SIZE = 32 }; // SHA256 + #endif // WORD64_AVAILABLE diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/aes.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/aes.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/aes.cpp 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/aes.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -66,7 +66,7 @@ in += BLOCK_SIZE; } } - else { + else { while (blocks--) { AsmDecrypt(in, out, (void*)Td0); @@ -79,8 +79,8 @@ out += BLOCK_SIZE; in += BLOCK_SIZE; } - } - } + } + } } #endif // DO_AES_ASM @@ -466,14 +466,13 @@ "movd mm7, ebp;" \ "movd mm4, eax;" \ "mov ebp, edx;" \ - "sub esp, 4;" - + "sub esp, 4;" #define EPILOG() \ "add esp, 4;" \ "pop ebp;" \ "pop ebx;" \ - "emms;" \ - ".att_syntax;" \ + "emms;" \ + ".att_syntax;" \ : \ : "c" (this), "S" (inBlock), "d" (boxes), "a" (outBlock) \ : "%edi", "memory", "cc" \ @@ -834,9 +833,9 @@ #ifdef _MSC_VER - __declspec(naked) + __declspec(naked) #else - __attribute__ ((noinline)) + __attribute__ ((noinline)) #endif void AES::AsmDecrypt(const byte* inBlock, byte* outBlock, void* boxes) const { diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/algebra.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/algebra.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/algebra.cpp 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/algebra.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -185,10 +185,10 @@ struct WindowSlider { - WindowSlider(const Integer &expIn, bool fastNegateIn, + WindowSlider(const Integer &exp, bool fastNegate, unsigned int windowSizeIn=0) - : exp(expIn), windowModulus(Integer::One()), windowSize(windowSizeIn), - windowBegin(0), fastNegate(fastNegateIn), firstTime(true), + : exp(exp), windowModulus(Integer::One()), windowSize(windowSizeIn), + windowBegin(0), fastNegate(fastNegate), firstTime(true), finished(false) { if (windowSize == 0) diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/arc4.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/arc4.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/arc4.cpp 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/arc4.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -121,12 +121,11 @@ "push ebx;" \ "push ebp;" \ "mov ebp, eax;" - #define EPILOG() \ "pop ebp;" \ "pop ebx;" \ - "emms;" \ - ".att_syntax;" \ + "emms;" \ + ".att_syntax;" \ : \ : "c" (this), "D" (out), "S" (in), "a" (length) \ : "%edx", "memory", "cc" \ @@ -180,7 +179,7 @@ #ifdef _MSC_VER AS1( loopStart: ) // loopStart #else - AS1( 0: ) // loopStart for some gas (need numeric for jump back + AS1( 0: ) // loopStart for some gas (need numeric for jump back #endif // y = (y+a) & 0xff; @@ -232,7 +231,7 @@ AS1( nothing: ) - // inline adjust + // inline adjust AS2( add esp, 4 ) // fix room on stack EPILOG() diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/asn.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/asn.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/asn.cpp 2014-08-03 12:00:37.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/asn.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,5 @@ /* - Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved. - Use is subject to license terms. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -761,7 +760,7 @@ while (source_.get_index() < length) { GetSet(); if (source_.GetError().What() == SET_E) { - source_.SetError(NO_ERROR_E); // extensions may only have sequence + source_.SetError(NO_ERROR_E); // extensions may only have sequence source_.prev(); } GetSequence(); @@ -832,10 +831,8 @@ if (source_.IsLeft(length) == false) return; if (email) { - if (!(ptr = AddTag(ptr, buf_end, "/emailAddress=", 14, length))) { - source_.SetError(CONTENT_E); - return; - } + if (!(ptr = AddTag(ptr, buf_end, "/emailAddress=", 14, length))) + return; } source_.advance(length); @@ -972,12 +969,26 @@ hasher.reset(NEW_TC SHA); ht = SHAh; } + else if (signatureOID_ == SHA256wRSA || signatureOID_ == SHA256wDSA) { + hasher.reset(NEW_TC SHA256); + ht = SHA256h; + } +#ifdef WORD64_AVAILABLE + else if (signatureOID_ == SHA384wRSA) { + hasher.reset(NEW_TC SHA384); + ht = SHA384h; + } + else if (signatureOID_ == SHA512wRSA) { + hasher.reset(NEW_TC SHA512); + ht = SHA512h; + } +#endif else { source_.SetError(UNKOWN_SIG_E); return false; } - byte digest[SHA::DIGEST_SIZE]; // largest size + byte digest[MAX_SHA2_DIGEST_SIZE]; // largest size hasher->Update(source_.get_buffer() + certBegin_, sigIndex_ - certBegin_); hasher->Final(digest); @@ -1050,6 +1061,12 @@ 0x02, 0x05, 0x05, 0x00 }; static const byte md2AlgoID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x02, 0x05, 0x00}; + static const byte sha256AlgoID[] = { 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, + 0x04, 0x02, 0x01, 0x05, 0x00 }; + static const byte sha384AlgoID[] = { 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, + 0x04, 0x02, 0x02, 0x05, 0x00 }; + static const byte sha512AlgoID[] = { 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, + 0x04, 0x02, 0x03, 0x05, 0x00 }; int algoSz = 0; const byte* algoName = 0; @@ -1060,6 +1077,21 @@ algoName = shaAlgoID; break; + case SHA256h: + algoSz = sizeof(sha256AlgoID); + algoName = sha256AlgoID; + break; + + case SHA384h: + algoSz = sizeof(sha384AlgoID); + algoName = sha384AlgoID; + break; + + case SHA512h: + algoSz = sizeof(sha512AlgoID); + algoName = sha512AlgoID; + break; + case MD2h: algoSz = sizeof(md2AlgoID); algoName = md2AlgoID; diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/blowfish.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/blowfish.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/blowfish.cpp 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/blowfish.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -237,8 +237,8 @@ #define EPILOG() \ "pop ebp;" \ "pop ebx;" \ - "emms;" \ - ".att_syntax;" \ + "emms;" \ + ".att_syntax;" \ : \ : "c" (this), "S" (inBlock), "a" (outBlock) \ : "%edi", "%edx", "memory", "cc" \ @@ -291,7 +291,7 @@ #ifdef _MSC_VER - __declspec(naked) + __declspec(naked) #else __attribute__ ((noinline)) #endif diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/des.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/des.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/des.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/des.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,5 @@ /* - Copyright (C) 2000-2007 MySQL AB - Use is subject to license terms + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -642,9 +641,9 @@ #ifdef _MSC_VER - __declspec(naked) + __declspec(naked) #else - __attribute__ ((noinline)) + __attribute__ ((noinline)) #endif void DES_EDE3::AsmProcess(const byte* in, byte* out, void* box) const { @@ -664,8 +663,8 @@ #define EPILOG() \ "pop ebp;" \ "pop ebx;" \ - "emms;" \ - ".att_syntax;" \ + "emms;" \ + ".att_syntax;" \ : \ : "d" (this), "S" (in), "a" (box), "c" (out) \ : "%edi", "memory", "cc" \ diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/integer.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/integer.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/integer.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/integer.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -56,9 +56,8 @@ #endif #elif defined(_MSC_VER) && defined(_M_IX86) /* #pragma message("You do not seem to have the Visual C++ Processor Pack ") - #pragma message("installed, so use of SSE2 intrinsics will be disabled.") -*/ #pragma message("installed, so use of SSE2 intrinsics will be disabled.") +*/ #elif defined(__GNUC__) && defined(__i386__) /* #warning You do not have GCC 3.3 or later, or did not specify the -msse2 \ compiler option. Use of SSE2 intrinsics will be disabled. @@ -194,7 +193,7 @@ "a" (a), "rm" (b) : "cc"); #elif defined(__mips64) - __asm__("dmultu %2,%3" : "=h" (r.halfs_.high), "=l" (r.halfs_.low) + __asm__("dmultu %2,%3" : "=d" (r.halfs_.high), "=l" (r.halfs_.low) : "r" (a), "r" (b)); #elif defined(_M_IX86) @@ -282,7 +281,12 @@ word GetHighHalfAsBorrow() const {return 0-halfs_.high;} private: - struct dword_struct + union + { + #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + dword whole_; + #endif + struct { #ifdef LITTLE_ENDIAN_ORDER word low; @@ -291,14 +295,7 @@ word high; word low; #endif - }; - - union - { - #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE - dword whole_; - #endif - struct dword_struct halfs_; + } halfs_; }; }; @@ -1201,24 +1198,20 @@ #define AS1(x) #x ";" #define AS2(x, y) #x ", " #y ";" #define AddPrologue \ - word res; \ __asm__ __volatile__ \ ( \ "push %%ebx;" /* save this manually, in case of -fPIC */ \ - "mov %3, %%ebx;" \ + "mov %2, %%ebx;" \ ".intel_syntax noprefix;" \ "push ebp;" #define AddEpilogue \ "pop ebp;" \ ".att_syntax prefix;" \ "pop %%ebx;" \ - "mov %%eax, %0;" \ - : "=g" (res) \ + : \ : "c" (C), "d" (A), "m" (B), "S" (N) \ : "%edi", "memory", "cc" \ - ); \ - return res; - + ); #define MulPrologue \ __asm__ __volatile__ \ ( \ diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/md5.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/md5.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/md5.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/md5.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -223,7 +223,7 @@ #ifdef _MSC_VER - __declspec(naked) + __declspec(naked) #else __attribute__ ((noinline)) #endif @@ -242,8 +242,8 @@ #define EPILOG() \ "pop ebp;" \ "pop ebx;" \ - "emms;" \ - ".att_syntax;" \ + "emms;" \ + ".att_syntax;" \ : \ : "c" (this), "D" (data), "a" (times) \ : "%esi", "%edx", "memory", "cc" \ @@ -297,7 +297,7 @@ #ifdef _MSC_VER AS1( loopStart: ) // loopStart #else - AS1( 0: ) // loopStart for some gas (need numeric for jump back + AS1( 0: ) // loopStart for some gas (need numeric for jump back #endif // set up diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/misc.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/misc.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/misc.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/misc.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -84,17 +84,7 @@ } -#ifdef __sun - -// Handler for pure virtual functions -namespace __Crun { - void pure_error() { - } -} - -#endif - -#if defined(__ICC) || defined(__INTEL_COMPILER) || (__GNUC__ > 2) +#if defined(__ICC) || defined(__INTEL_COMPILER) extern "C" { diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/rabbit.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/rabbit.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/rabbit.cpp 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/rabbit.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,15 +1,15 @@ /* - Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved. - + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, @@ -236,7 +236,7 @@ NextState(Work); /* Generate 16 bytes of pseudo-random data */ - tmp[0] = LITTLE32(workCtx_.x[0] ^ + tmp[0] = LITTLE32(workCtx_.x[0] ^ (workCtx_.x[5]>>16) ^ U32V(workCtx_.x[3]<<16)); tmp[1] = LITTLE32(workCtx_.x[2] ^ (workCtx_.x[7]>>16) ^ U32V(workCtx_.x[5]<<16)); diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/random.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/random.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/random.cpp 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/random.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,6 +27,7 @@ #include #if defined(_WIN32) + #define _WIN32_WINNT 0x0400 #include #include #else diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/ripemd.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/ripemd.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/ripemd.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/ripemd.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -507,6 +507,8 @@ #ifdef _MSC_VER __declspec(naked) +#else + __attribute__ ((noinline)) #endif void RIPEMD160::AsmTransform(const byte* data, word32 times) { @@ -520,12 +522,11 @@ ".intel_syntax noprefix;" \ "push ebx;" \ "push ebp;" - #define EPILOG() \ "pop ebp;" \ "pop ebx;" \ - "emms;" \ - ".att_syntax;" \ + "emms;" \ + ".att_syntax;" \ : \ : "c" (this), "D" (data), "d" (times) \ : "%esi", "%eax", "memory", "cc" \ @@ -571,7 +572,7 @@ #ifdef _MSC_VER AS1( loopStart: ) // loopStart #else - AS1( 0: ) // loopStart for some gas (need numeric for jump back + AS1( 0: ) // loopStart for some gas (need numeric for jump back #endif AS2( movd mm2, edx ) // store times_ @@ -830,7 +831,7 @@ AS1( jnz 0b ) // loopStart #endif - // inline adjust + // inline adjust AS2( add esp, 24 ) // fix room on stack EPILOG() diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/sha.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/sha.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/sha.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/sha.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -776,12 +776,11 @@ ".intel_syntax noprefix;" \ "push ebx;" \ "push ebp;" - #define EPILOG() \ "pop ebp;" \ "pop ebx;" \ - "emms;" \ - ".att_syntax;" \ + "emms;" \ + ".att_syntax;" \ : \ : "c" (this), "D" (data), "a" (times) \ : "%esi", "%edx", "memory", "cc" \ @@ -830,7 +829,7 @@ #ifdef _MSC_VER AS1( loopStart: ) // loopStart #else - AS1( 0: ) // loopStart for some gas (need numeric for jump back + AS1( 0: ) // loopStart for some gas (need numeric for jump back #endif // byte reverse 16 words of input, 4 at a time, put on stack for W[] @@ -1022,7 +1021,7 @@ AS1( jnz 0b ) // loopStart #endif - // inline adjust + // inline adjust AS2( add esp, 68 ) // fix room on stack EPILOG() diff -Nru mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/twofish.cpp mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/twofish.cpp --- mariadb-5.5-5.5.39/extra/yassl/taocrypt/src/twofish.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/extra/yassl/taocrypt/src/twofish.cpp 2014-10-08 13:19:52.000000000 +0000 @@ -285,12 +285,11 @@ "push ebp;" \ "movd mm3, eax;" \ "movd mm6, ebp;" - #define EPILOG() \ "pop ebp;" \ "pop ebx;" \ - "emms;" \ - ".att_syntax;" \ + "emms;" \ + ".att_syntax;" \ : \ : "D" (this), "S" (inBlock), "a" (outBlock) \ : "%ecx", "%edx", "memory", "cc" \ @@ -479,7 +478,7 @@ AS2( movd ebp, mm6 ) AS2( movd esi, mm0 ) // k_ #ifdef __GNUC__ - AS2( movd edi, mm3 ) // outBlock + AS2( movd edi, mm3 ) // outBlock #else AS2( mov edi, [ebp + 12] ) // outBlock #endif @@ -500,7 +499,7 @@ #ifdef _MSC_VER - __declspec(naked) + __declspec(naked) #else __attribute__ ((noinline)) #endif @@ -551,7 +550,7 @@ AS2( movd ebp, mm6 ) AS2( movd esi, mm0 ) // k_ #ifdef __GNUC__ - AS2( movd edi, mm3 ) // outBlock + AS2( movd edi, mm3 ) // outBlock #else AS2( mov edi, [ebp + 12] ) // outBlock #endif diff -Nru mariadb-5.5-5.5.39/include/my_bitmap.h mariadb-5.5-5.5.40/include/my_bitmap.h --- mariadb-5.5-5.5.39/include/my_bitmap.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/include/my_bitmap.h 2014-10-08 13:19:51.000000000 +0000 @@ -42,6 +42,7 @@ extern "C" { #endif extern void create_last_word_mask(MY_BITMAP *map); +#define bitmap_init(A,B,C,D) my_bitmap_init(A,B,C,D) extern my_bool bitmap_init(MY_BITMAP *map, my_bitmap_map *buf, uint n_bits, my_bool thread_safe); extern my_bool bitmap_is_clear_all(const MY_BITMAP *map); diff -Nru mariadb-5.5-5.5.39/include/my_cpu.h mariadb-5.5-5.5.40/include/my_cpu.h --- mariadb-5.5-5.5.39/include/my_cpu.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/include/my_cpu.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,44 @@ +/* Copyright (c) 2013, MariaDB foundation Ab and SkySQL + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1307 USA +*/ + +/* instructions for specific cpu's */ + +/* + Macros for adjusting thread priority (hardware multi-threading) + The defines are the same ones used by the linux kernel +*/ + +#if defined(__powerpc__) +/* Very low priority */ +#define HMT_very_low() asm volatile("or 31,31,31") +/* Low priority */ +#define HMT_low() asm volatile("or 1,1,1") +/* Medium low priority */ +#define HMT_medium_low() asm volatile("or 6,6,6") +/* Medium priority */ +#define HMT_medium() asm volatile("or 2,2,2") +/* Medium high priority */ +#define HMT_medium_high() asm volatile("or 5,5,5") +/* High priority */ +#define HMT_high() asm volatile("or 3,3,3") +#else +#define HMT_very_low() +#define HMT_low() +#define HMT_medium_low() +#define HMT_medium() +#define HMT_medium_high() +#define HMT_high() +#endif diff -Nru mariadb-5.5-5.5.39/include/myisam.h mariadb-5.5-5.5.40/include/myisam.h --- mariadb-5.5-5.5.39/include/myisam.h 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/include/myisam.h 2014-10-08 13:19:52.000000000 +0000 @@ -40,6 +40,12 @@ #endif #define MI_MAX_POSSIBLE_KEY_BUFF HA_MAX_POSSIBLE_KEY_BUFF +/* + The following defines can be increased if necessary. + But beware the dependency of MI_MAX_POSSIBLE_KEY_BUFF and MI_MAX_KEY_LENGTH. +*/ +#define MI_MAX_KEY_LENGTH 1000 /* Max length in bytes */ +#define MI_MAX_KEY_SEG 16 /* Max segments for key */ #define MI_NAME_IEXT ".MYI" #define MI_NAME_DEXT ".MYD" diff -Nru mariadb-5.5-5.5.39/libmysql/errmsg.c mariadb-5.5-5.5.40/libmysql/errmsg.c --- mariadb-5.5-5.5.39/libmysql/errmsg.c 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/libmysql/errmsg.c 2014-10-08 13:19:51.000000000 +0000 @@ -81,7 +81,7 @@ "Attempt to read a row while there is no result set associated with the statement", "This feature is not implemented yet", "Lost connection to MySQL server at '%s', system error: %d", - "Statement closed indirectly because of a preceeding %s() call", + "Statement closed indirectly because of a preceding %s() call", "The number of columns in the result set differs from the number of bound buffers. You must reset the statement, rebind the result set columns, and execute the statement again", "This handle is already connected. Use a separate handle for each connection.", "Authentication plugin '%s' cannot be loaded: %s", diff -Nru mariadb-5.5-5.5.39/mysql-test/disabled.def mariadb-5.5-5.5.40/mysql-test/disabled.def --- mariadb-5.5-5.5.39/mysql-test/disabled.def 2014-08-03 12:00:37.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/disabled.def 2014-10-08 13:19:52.000000000 +0000 @@ -16,3 +16,4 @@ archive-big : Bug#11817185 2011-03-10 Anitha Disabled since this leads to timeout on Solaris Sparc log_tables-big : Bug#11756699 2010-11-15 mattiasj report already exists mysql_embedded : Bug#12561297 2011-05-14 Anitha Dependent on PB2 changes - eventum#41836 +file_contents : MDEV-6526 these files are not installed anymore diff -Nru mariadb-5.5-5.5.39/mysql-test/extra/rpl_tests/rpl_ddl.test mariadb-5.5-5.5.40/mysql-test/extra/rpl_tests/rpl_ddl.test --- mariadb-5.5-5.5.39/mysql-test/extra/rpl_tests/rpl_ddl.test 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/extra/rpl_tests/rpl_ddl.test 2014-10-08 13:19:51.000000000 +0000 @@ -98,8 +98,8 @@ # --> less switching of AUTOCOMMIT mode on master side. # # 4. Never use a test object, which was direct or indirect affected by a -# preceeding test sequence again. -# If one preceeding test sequence hits a (sometimes not visible, +# preceding test sequence again. +# If one preceding test sequence hits a (sometimes not visible, # because the sql error code of the statement might be 0) bug # and these rules are ignored, a following test sequence might earn ugly # effects like failing 'sync_slave_with_master', crashes of the slave or diff -Nru mariadb-5.5-5.5.39/mysql-test/extra/rpl_tests/rpl_row_basic.test mariadb-5.5-5.5.40/mysql-test/extra/rpl_tests/rpl_row_basic.test --- mariadb-5.5-5.5.39/mysql-test/extra/rpl_tests/rpl_row_basic.test 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/extra/rpl_tests/rpl_row_basic.test 2014-10-08 13:19:51.000000000 +0000 @@ -221,7 +221,7 @@ SELECT * FROM t7 ORDER BY C1; # since bug#31552/31609 idempotency is not default any longer. In order -# the preceeding test INSERT INTO t7 to pass the mode is switched +# the preceding test INSERT INTO t7 to pass the mode is switched # temprorarily set @@global.slave_exec_mode= 'IDEMPOTENT'; @@ -260,7 +260,7 @@ SELECT * FROM t8 ORDER BY a; # since bug#31552/31609 idempotency is not default any longer. In order -# the preceeding test INSERT INTO t8 to pass the mode is switched +# the preceding test INSERT INTO t8 to pass the mode is switched # temprorarily set @@global.slave_exec_mode= 'IDEMPOTENT'; diff -Nru mariadb-5.5-5.5.39/mysql-test/include/mysqlhotcopy.inc mariadb-5.5-5.5.40/mysql-test/include/mysqlhotcopy.inc --- mariadb-5.5-5.5.39/mysql-test/include/mysqlhotcopy.inc 2014-08-03 12:00:44.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/include/mysqlhotcopy.inc 2014-10-08 13:19:52.000000000 +0000 @@ -109,7 +109,7 @@ --replace_result $MYSQLD_DATADIR MYSQLD_DATADIR --list_files $MYSQLD_DATADIR/hotcopy_save --replace_result $MASTER_MYSOCK MASTER_MYSOCK ---error 9,11,2304 +--error 1 --exec $MYSQLHOTCOPY --quiet -S $MASTER_MYSOCK -u root hotcopy_test hotcopy_save --replace_result $MASTER_MYSOCK MASTER_MYSOCK --exec $MYSQLHOTCOPY --quiet --allowold -S $MASTER_MYSOCK -u root hotcopy_test hotcopy_save diff -Nru mariadb-5.5-5.5.39/mysql-test/include/wait_until_count_sessions.inc mariadb-5.5-5.5.40/mysql-test/include/wait_until_count_sessions.inc --- mariadb-5.5-5.5.39/mysql-test/include/wait_until_count_sessions.inc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/include/wait_until_count_sessions.inc 2014-10-08 13:19:51.000000000 +0000 @@ -10,7 +10,7 @@ # 1. We wait for $current_sessions <= $count_sessions because in the use case # with count_sessions.inc before and wait_until_count_sessions.inc after # the core of the test it could happen that the disconnects of sessions -# belonging to the preceeding test are not finished. +# belonging to the preceding test are not finished. # sessions at test begin($count_sessions) = m + n # sessions of the previous test which will be soon disconnected = n (n >= 0) # sessions at test end ($current sessions, assuming the test disconnects diff -Nru mariadb-5.5-5.5.39/mysql-test/lib/My/Platform.pm mariadb-5.5-5.5.40/mysql-test/lib/My/Platform.pm --- mariadb-5.5-5.5.39/mysql-test/lib/My/Platform.pm 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/lib/My/Platform.pm 2014-10-08 13:19:51.000000000 +0000 @@ -110,6 +110,8 @@ # This may not be true, but we can't test for it on AIX due to Perl bug # See Bug #45771 return 0 if ($^O eq 'aix'); + # See Debian bug #670722 - failing on kFreeBSD even after setting short path + return 0 if $^O eq 'gnukfreebsd' and length $path < 40; require IO::Socket::UNIX; diff -Nru mariadb-5.5-5.5.39/mysql-test/r/ctype_cp932.result mariadb-5.5-5.5.40/mysql-test/r/ctype_cp932.result --- mariadb-5.5-5.5.39/mysql-test/r/ctype_cp932.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/ctype_cp932.result 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,35 @@ +# +# Bug #11755818 LIKE DOESN'T MATCH WHEN CP932_BIN/SJIS_BIN COLLATIONS ARE +# USED. +# +SET @old_character_set_client= @@character_set_client; +SET @old_character_set_connection= @@character_set_connection; +SET @old_character_set_results= @@character_set_results; +SET character_set_client= 'utf8'; +SET character_set_connection= 'utf8'; +SET character_set_results= 'utf8'; +CREATE TABLE t1 (a VARCHAR(10) COLLATE cp932_bin); +INSERT INTO t1 VALUES('カカ'); +SELECT * FROM t1 WHERE a LIKE '%カ'; +a +カカ +SELECT * FROM t1 WHERE a LIKE '_カ'; +a +カカ +SELECT * FROM t1 WHERE a LIKE '%_カ'; +a +カカ +ALTER TABLE t1 MODIFY a VARCHAR(100) COLLATE sjis_bin; +SELECT * FROM t1 WHERE a LIKE '%カ'; +a +カカ +SELECT * FROM t1 WHERE a LIKE '_カ'; +a +カカ +SELECT * FROM t1 WHERE a LIKE '%_カ'; +a +カカ +DROP TABLE t1; +SET @@character_set_client= @old_character_set_client; +SET @@character_set_connection= @old_character_set_connection; +SET @@character_set_results= @old_character_set_results; diff -Nru mariadb-5.5-5.5.39/mysql-test/r/derived_view.result mariadb-5.5-5.5.40/mysql-test/r/derived_view.result --- mariadb-5.5-5.5.39/mysql-test/r/derived_view.result 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/derived_view.result 2014-10-08 13:19:51.000000000 +0000 @@ -2406,6 +2406,94 @@ drop table t1,t2; set optimizer_switch=@save_optimizer_switch5740; # +# Bug mdev-5721: possible long key access to a materialized derived table +# (see also the test case for Bug#13261277 that is actually the same bug) +# +CREATE TABLE t1 ( +id varchar(255) NOT NULL DEFAULT '', +familyid int(11) DEFAULT NULL, +withdrawndate date DEFAULT NULL, +KEY index_td_familyid_id (familyid,id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; +CREATE TABLE t2 ( +id int(11) NOT NULL AUTO_INCREMENT, +activefromts datetime NOT NULL DEFAULT '0000-00-00 00:00:00', +shortdescription text, +useraccessfamily varchar(512) DEFAULT NULL, +serialized longtext, +PRIMARY KEY (id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; +insert into t1 values ('picture/89/1369722032695.pmd',89,NULL); +insert into t1 values ('picture/90/1369832057370.pmd',90,NULL); +insert into t2 values (38,'2013-03-04 07:49:22','desc','CODE','string'); +EXPLAIN +SELECT * FROM t2 x, +(SELECT t2.useraccessfamily, t2.serialized AS picturesubuser, COUNT(*) +FROM t2, t1 GROUP BY t2.useraccessfamily, picturesubuser) y +WHERE x.useraccessfamily = y.useraccessfamily; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY x system NULL NULL NULL NULL 1 +1 PRIMARY ALL NULL NULL NULL NULL 2 Using where +2 DERIVED t2 system NULL NULL NULL NULL 1 +2 DERIVED t1 index NULL index_td_familyid_id 772 NULL 2 Using index +SELECT * FROM t2 x, +(SELECT t2.useraccessfamily, t2.serialized AS picturesubuser, COUNT(*) +FROM t2, t1 GROUP BY t2.useraccessfamily, picturesubuser) y +WHERE x.useraccessfamily = y.useraccessfamily; +id activefromts shortdescription useraccessfamily serialized useraccessfamily picturesubuser COUNT(*) +38 2013-03-04 07:49:22 desc CODE string CODE string 2 +DROP TABLE t1,t2; +# +# Bug#13261277: Unchecked key length caused missing records. +# +CREATE TABLE t1 ( +col_varchar varchar(1024) CHARACTER SET utf8 DEFAULT NULL, +stub1 varchar(1024) CHARACTER SET utf8 DEFAULT NULL, +stub2 varchar(1024) CHARACTER SET utf8 DEFAULT NULL, +stub3 varchar(1024) CHARACTER SET utf8 DEFAULT NULL +); +INSERT INTO t1 VALUES +('d','d','l','ther'), +(NULL,'s','NJBIQ','trzetuchv'), +(-715390976,'coul','MYWFB','cfhtrzetu'), +(1696792576,'f','i\'s','c'), + (1,'i','ltpemcfhtr','gsltpemcf'), + (-663027712,'mgsltpemcf','sa','amgsltpem'), + (-1686700032,'JPRVK','i','vamgsltpe'), + (NULL,'STUNB','UNVJV','u'), + (5,'oka','qyihvamgsl','AXSMD'), + (NULL,'tqwmqyihva','h','yntqwmqyi'), + (3,'EGMJN','e','e'); +CREATE TABLE t2 ( +col_varchar varchar(10) DEFAULT NULL, +col_int INT DEFAULT NULL +); +INSERT INTO t2 VALUES ('d',9); +set optimizer_switch='derived_merge=off,derived_with_keys=on'; +SET @save_heap_size= @@max_heap_table_size; +SET @@max_heap_table_size= 16384; +SELECT t2.col_int +FROM t2 +RIGHT JOIN ( SELECT * FROM t1 ) AS dt +ON t2.col_varchar = dt.col_varchar +WHERE t2.col_int IS NOT NULL ; +col_int +9 +# Shouldn't use auto_key0 for derived table +EXPLAIN +SELECT t2.col_int +FROM t2 +RIGHT JOIN ( SELECT * FROM t1 ) AS dt +ON t2.col_varchar = dt.col_varchar +WHERE t2.col_int IS NOT NULL ; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 system NULL NULL NULL NULL 1 +1 PRIMARY ALL NULL NULL NULL NULL 11 Using where +2 DERIVED t1 ALL NULL NULL NULL NULL 11 +SET @@max_heap_table_size= @save_heap_size; +SET optimizer_switch=@save_optimizer_switch; +DROP TABLE t1,t2; +# # end of 5.3 tests # set optimizer_switch=@exit_optimizer_switch; diff -Nru mariadb-5.5-5.5.39/mysql-test/r/func_group.result mariadb-5.5-5.5.40/mysql-test/r/func_group.result --- mariadb-5.5-5.5.39/mysql-test/r/func_group.result 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/func_group.result 2014-10-08 13:19:52.000000000 +0000 @@ -1,4 +1,4 @@ -drop table if exists t1,t2; +drop table if exists t1,t2,t3,t4,t5,t6; set @sav_dpi= @@div_precision_increment; set div_precision_increment= 5; show variables like 'div_precision_increment'; @@ -2239,3 +2239,34 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Select tables optimized away DROP TABLE t1; +# +# MDEV-6743 crash in GROUP_CONCAT(IF () ORDER BY 1) +# +CREATE TABLE t1 (pk INT, t2_id INT, t5_id INT, PRIMARY KEY (pk)); +INSERT INTO t1 VALUES (1,3,12),(2,3,15); +CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)); +INSERT INTO t2 VALUES (4),(5); +CREATE TABLE t3 (t2_id INT, t4_id INT); +INSERT INTO t3 VALUES (6,11),(7,12); +CREATE TABLE t4 (id INT); +INSERT INTO t4 VALUES (13),(14); +CREATE TABLE t5 (pk INT, f VARCHAR(50), t6_id INT, PRIMARY KEY (pk)); +INSERT INTO t5 VALUES (9,'FOO',NULL); +CREATE TABLE t6 (pk INT, f VARCHAR(120), b TINYINT(4), PRIMARY KEY (pk)); +PREPARE stmt FROM " + SELECT t1.t2_id, GROUP_CONCAT(IF (t6.b, t6.f, t5.f) ORDER BY 1) + FROM t1 + JOIN t2 ON t1.t2_id = t2.pk + JOIN t3 ON t2.pk = t3.t2_id + JOIN t4 ON t4.id = t3.t4_id + JOIN t5 ON t1.t5_id = t5.pk + LEFT JOIN t6 ON t6.pk = t5.t6_id + GROUP BY t1.t2_id +"; +EXECUTE stmt; +t2_id GROUP_CONCAT(IF (t6.b, t6.f, t5.f) ORDER BY 1) +EXECUTE stmt; +t2_id GROUP_CONCAT(IF (t6.b, t6.f, t5.f) ORDER BY 1) +EXECUTE stmt; +t2_id GROUP_CONCAT(IF (t6.b, t6.f, t5.f) ORDER BY 1) +DROP TABLE t1,t2,t3,t4,t5,t6; diff -Nru mariadb-5.5-5.5.39/mysql-test/r/join_cache.result mariadb-5.5-5.5.40/mysql-test/r/join_cache.result --- mariadb-5.5-5.5.39/mysql-test/r/join_cache.result 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/join_cache.result 2014-10-08 13:19:51.000000000 +0000 @@ -5656,4 +5656,153 @@ 1 SIMPLE t1 ALL NULL NULL NULL NULL 10 1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) drop table t0,t1,t2; +# +# mdev-6292: huge performance degradation for a sequence +# of LEFT JOIN operations when using join buffer +# +CREATE TABLE t1 ( +id int(11) NOT NULL AUTO_INCREMENT, +col1 varchar(255) NOT NULL DEFAULT '', +PRIMARY KEY (id) +) ENGINE=INNODB; +CREATE TABLE t2 ( +id int(11) NOT NULL AUTO_INCREMENT, +parent_id smallint(3) NOT NULL DEFAULT '0', +col2 varchar(25) NOT NULL DEFAULT '', +PRIMARY KEY (id) +) ENGINE=INNODB; +set join_buffer_size=8192; +set join_cache_level=0; +set @init_time:=now(); +SELECT t.* +FROM +t1 t +LEFT JOIN t2 c1 ON c1.parent_id = t.id AND c1.col2 = "val" + LEFT JOIN t2 c2 ON c2.parent_id = t.id AND c2.col2 = "val" + LEFT JOIN t2 c3 ON c3.parent_id = t.id AND c3.col2 = "val" + LEFT JOIN t2 c4 ON c4.parent_id = t.id AND c4.col2 = "val" + LEFT JOIN t2 c5 ON c5.parent_id = t.id AND c5.col2 = "val" + LEFT JOIN t2 c6 ON c6.parent_id = t.id AND c6.col2 = "val" + LEFT JOIN t2 c7 ON c7.parent_id = t.id AND c7.col2 = "val" + LEFT JOIN t2 c8 ON c8.parent_id = t.id AND c8.col2 = "val" + LEFT JOIN t2 c9 ON c9.parent_id = t.id AND c9.col2 = "val" + LEFT JOIN t2 c10 ON c10.parent_id = t.id AND c10.col2 = "val" + LEFT JOIN t2 c11 ON c11.parent_id = t.id AND c11.col2 = "val" + LEFT JOIN t2 c12 ON c12.parent_id = t.id AND c12.col2 = "val" + LEFT JOIN t2 c13 ON c13.parent_id = t.id AND c13.col2 = "val" + LEFT JOIN t2 c14 ON c14.parent_id = t.id AND c14.col2 = "val" + LEFT JOIN t2 c15 ON c15.parent_id = t.id AND c15.col2 = "val" + LEFT JOIN t2 c16 ON c16.parent_id = t.id AND c16.col2 = "val" + LEFT JOIN t2 c17 ON c17.parent_id = t.id AND c17.col2 = "val" + LEFT JOIN t2 c18 ON c18.parent_id = t.id AND c18.col2 = "val" + LEFT JOIN t2 c19 ON c19.parent_id = t.id AND c19.col2 = "val" + LEFT JOIN t2 c20 ON c20.parent_id = t.id AND c20.col2 = "val" + LEFT JOIN t2 c21 ON c21.parent_id = t.id AND c21.col2 = "val" + LEFT JOIN t2 c22 ON c22.parent_id = t.id AND c22.col2 = "val" + LEFT JOIN t2 c23 ON c23.parent_id = t.id AND c23.col2 = "val" + LEFT JOIN t2 c24 ON c24.parent_id = t.id AND c24.col2 = "val" + LEFT JOIN t2 c25 ON c25.parent_id = t.id AND c25.col2 = "val" +ORDER BY +col1; +id col1 +select timestampdiff(second, @init_time, now()) <= 1; +timestampdiff(second, @init_time, now()) <= 1 +1 +set join_cache_level=2; +set @init_time:=now(); +SELECT t.* +FROM +t1 t +LEFT JOIN t2 c1 ON c1.parent_id = t.id AND c1.col2 = "val" + LEFT JOIN t2 c2 ON c2.parent_id = t.id AND c2.col2 = "val" + LEFT JOIN t2 c3 ON c3.parent_id = t.id AND c3.col2 = "val" + LEFT JOIN t2 c4 ON c4.parent_id = t.id AND c4.col2 = "val" + LEFT JOIN t2 c5 ON c5.parent_id = t.id AND c5.col2 = "val" + LEFT JOIN t2 c6 ON c6.parent_id = t.id AND c6.col2 = "val" + LEFT JOIN t2 c7 ON c7.parent_id = t.id AND c7.col2 = "val" + LEFT JOIN t2 c8 ON c8.parent_id = t.id AND c8.col2 = "val" + LEFT JOIN t2 c9 ON c9.parent_id = t.id AND c9.col2 = "val" + LEFT JOIN t2 c10 ON c10.parent_id = t.id AND c10.col2 = "val" + LEFT JOIN t2 c11 ON c11.parent_id = t.id AND c11.col2 = "val" + LEFT JOIN t2 c12 ON c12.parent_id = t.id AND c12.col2 = "val" + LEFT JOIN t2 c13 ON c13.parent_id = t.id AND c13.col2 = "val" + LEFT JOIN t2 c14 ON c14.parent_id = t.id AND c14.col2 = "val" + LEFT JOIN t2 c15 ON c15.parent_id = t.id AND c15.col2 = "val" + LEFT JOIN t2 c16 ON c16.parent_id = t.id AND c16.col2 = "val" + LEFT JOIN t2 c17 ON c17.parent_id = t.id AND c17.col2 = "val" + LEFT JOIN t2 c18 ON c18.parent_id = t.id AND c18.col2 = "val" + LEFT JOIN t2 c19 ON c19.parent_id = t.id AND c19.col2 = "val" + LEFT JOIN t2 c20 ON c20.parent_id = t.id AND c20.col2 = "val" + LEFT JOIN t2 c21 ON c21.parent_id = t.id AND c21.col2 = "val" + LEFT JOIN t2 c22 ON c22.parent_id = t.id AND c22.col2 = "val" + LEFT JOIN t2 c23 ON c23.parent_id = t.id AND c23.col2 = "val" + LEFT JOIN t2 c24 ON c24.parent_id = t.id AND c24.col2 = "val" + LEFT JOIN t2 c25 ON c25.parent_id = t.id AND c25.col2 = "val" +ORDER BY +col1; +id col1 +select timestampdiff(second, @init_time, now()) <= 1; +timestampdiff(second, @init_time, now()) <= 1 +1 +EXPLAIN +SELECT t.* +FROM +t1 t +LEFT JOIN t2 c1 ON c1.parent_id = t.id AND c1.col2 = "val" + LEFT JOIN t2 c2 ON c2.parent_id = t.id AND c2.col2 = "val" + LEFT JOIN t2 c3 ON c3.parent_id = t.id AND c3.col2 = "val" + LEFT JOIN t2 c4 ON c4.parent_id = t.id AND c4.col2 = "val" + LEFT JOIN t2 c5 ON c5.parent_id = t.id AND c5.col2 = "val" + LEFT JOIN t2 c6 ON c6.parent_id = t.id AND c6.col2 = "val" + LEFT JOIN t2 c7 ON c7.parent_id = t.id AND c7.col2 = "val" + LEFT JOIN t2 c8 ON c8.parent_id = t.id AND c8.col2 = "val" + LEFT JOIN t2 c9 ON c9.parent_id = t.id AND c9.col2 = "val" + LEFT JOIN t2 c10 ON c10.parent_id = t.id AND c10.col2 = "val" + LEFT JOIN t2 c11 ON c11.parent_id = t.id AND c11.col2 = "val" + LEFT JOIN t2 c12 ON c12.parent_id = t.id AND c12.col2 = "val" + LEFT JOIN t2 c13 ON c13.parent_id = t.id AND c13.col2 = "val" + LEFT JOIN t2 c14 ON c14.parent_id = t.id AND c14.col2 = "val" + LEFT JOIN t2 c15 ON c15.parent_id = t.id AND c15.col2 = "val" + LEFT JOIN t2 c16 ON c16.parent_id = t.id AND c16.col2 = "val" + LEFT JOIN t2 c17 ON c17.parent_id = t.id AND c17.col2 = "val" + LEFT JOIN t2 c18 ON c18.parent_id = t.id AND c18.col2 = "val" + LEFT JOIN t2 c19 ON c19.parent_id = t.id AND c19.col2 = "val" + LEFT JOIN t2 c20 ON c20.parent_id = t.id AND c20.col2 = "val" + LEFT JOIN t2 c21 ON c21.parent_id = t.id AND c21.col2 = "val" + LEFT JOIN t2 c22 ON c22.parent_id = t.id AND c22.col2 = "val" + LEFT JOIN t2 c23 ON c23.parent_id = t.id AND c23.col2 = "val" + LEFT JOIN t2 c24 ON c24.parent_id = t.id AND c24.col2 = "val" + LEFT JOIN t2 c25 ON c25.parent_id = t.id AND c25.col2 = "val" +ORDER BY +col1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t ALL NULL NULL NULL NULL 1 Using temporary; Using filesort +1 SIMPLE c1 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (flat, BNL join) +1 SIMPLE c2 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c3 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c4 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c5 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c6 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c7 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c8 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c9 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c10 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c11 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c12 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c13 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c14 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c15 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c16 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c17 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c18 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c19 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c20 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c21 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c22 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c23 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c24 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE c25 ALL NULL NULL NULL NULL 1 Using where; Using join buffer (incremental, BNL join) +set join_buffer_size=default; +set join_cache_level = default; +DROP TABLE t1,t2; set @@optimizer_switch=@save_optimizer_switch; diff -Nru mariadb-5.5-5.5.39/mysql-test/r/join_nested_jcl6.result mariadb-5.5-5.5.40/mysql-test/r/join_nested_jcl6.result --- mariadb-5.5-5.5.39/mysql-test/r/join_nested_jcl6.result 2014-08-03 12:00:37.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/join_nested_jcl6.result 2014-10-08 13:19:51.000000000 +0000 @@ -705,18 +705,18 @@ (t8.b=t9.b OR t8.c IS NULL) AND (t9.a=1); a b a b a b a b a b a b a b a b a b a b -1 2 3 2 4 2 1 2 3 2 2 2 6 2 2 2 0 2 1 2 -1 2 3 2 4 2 1 2 4 2 2 2 6 2 2 2 0 2 1 2 1 2 3 2 4 2 1 2 3 2 3 1 6 2 1 1 NULL NULL 1 1 1 2 3 2 4 2 1 2 4 2 3 1 6 2 1 1 NULL NULL 1 1 +1 2 3 2 4 2 1 2 3 2 2 2 6 2 2 2 0 2 1 2 +1 2 3 2 4 2 1 2 4 2 2 2 6 2 2 2 0 2 1 2 1 2 3 2 4 2 1 2 3 2 3 1 6 2 1 1 NULL NULL 1 2 1 2 3 2 4 2 1 2 4 2 3 1 6 2 1 1 NULL NULL 1 2 1 2 3 2 4 2 1 2 3 2 3 3 NULL NULL NULL NULL NULL NULL 1 1 1 2 3 2 4 2 1 2 4 2 3 3 NULL NULL NULL NULL NULL NULL 1 1 1 2 3 2 4 2 1 2 3 2 3 3 NULL NULL NULL NULL NULL NULL 1 2 1 2 3 2 4 2 1 2 4 2 3 3 NULL NULL NULL NULL NULL NULL 1 2 -1 2 3 2 5 3 NULL NULL NULL NULL 2 2 6 2 2 2 0 2 1 2 1 2 3 2 5 3 NULL NULL NULL NULL 3 1 6 2 1 1 NULL NULL 1 1 +1 2 3 2 5 3 NULL NULL NULL NULL 2 2 6 2 2 2 0 2 1 2 1 2 3 2 5 3 NULL NULL NULL NULL 3 1 6 2 1 1 NULL NULL 1 2 1 2 3 2 5 3 NULL NULL NULL NULL 3 3 NULL NULL NULL NULL NULL NULL 1 1 1 2 3 2 5 3 NULL NULL NULL NULL 3 3 NULL NULL NULL NULL NULL NULL 1 2 diff -Nru mariadb-5.5-5.5.39/mysql-test/r/join_outer_jcl6.result mariadb-5.5-5.5.40/mysql-test/r/join_outer_jcl6.result --- mariadb-5.5-5.5.39/mysql-test/r/join_outer_jcl6.result 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/join_outer_jcl6.result 2014-10-08 13:19:51.000000000 +0000 @@ -696,9 +696,9 @@ insert into t3 values (2,4), (3,4); select * from t1 left join t2 on b1 = a1 left join t3 on c1 = a1 and b1 is null; a1 a2 b1 b2 c1 c2 +3 2 NULL NULL 3 4 1 2 1 3 NULL NULL 2 2 2 3 NULL NULL -3 2 NULL NULL 3 4 explain select * from t1 left join t2 on b1 = a1 left join t3 on c1 = a1 and b1 is null; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 3 diff -Nru mariadb-5.5-5.5.39/mysql-test/r/kill_processlist-6619.result mariadb-5.5-5.5.40/mysql-test/r/kill_processlist-6619.result --- mariadb-5.5-5.5.39/mysql-test/r/kill_processlist-6619.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/kill_processlist-6619.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,14 @@ +connect con1,localhost,root,,; +SHOW PROCESSLIST; +Id User Host db Command Time State Info Progress +# root # test Sleep # # NULL 0.000 +# root # test Query # # SHOW PROCESSLIST 0.000 +connection default; +KILL QUERY con_id; +connection con1; +SHOW PROCESSLIST; +ERROR 70100: Query execution was interrupted +SHOW PROCESSLIST; +Id User Host db Command Time State Info Progress +# root # test Sleep # # NULL 0.000 +# root # test Query # # SHOW PROCESSLIST 0.000 diff -Nru mariadb-5.5-5.5.39/mysql-test/r/log_errchk.result mariadb-5.5-5.5.40/mysql-test/r/log_errchk.result --- mariadb-5.5-5.5.39/mysql-test/r/log_errchk.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/log_errchk.result 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,10 @@ +call mtr.add_suppression("Could not use"); +# Case 1: Setting fife file to general_log_file and slow_query_log_file +# system variable. +SET GLOBAL general_log_file="MYSQLTEST_VARDIR/tmp/general_log.fifo";; +ERROR 42000: Variable 'general_log_file' can't be set to the value of 'MYSQLTEST_VARDIR/tmp/general_log.fifo' +SET GLOBAL slow_query_log_file="MYSQLTEST_VARDIR/tmp/slow_log.fifo";; +ERROR 42000: Variable 'slow_query_log_file' can't be set to the value of 'MYSQLTEST_VARDIR/tmp/slow_log.fifo' +# Case 2: Starting server with fifo file as general log file +# and slow query log file. +Setting fifo file as general log file and slow query log failed. diff -Nru mariadb-5.5-5.5.39/mysql-test/r/show_bad_definer-5553.result mariadb-5.5-5.5.40/mysql-test/r/show_bad_definer-5553.result --- mariadb-5.5-5.5.39/mysql-test/r/show_bad_definer-5553.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/show_bad_definer-5553.result 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,13 @@ +create database mysqltest1; +use mysqltest1; +create table t1(id int primary key); +create definer=unknownuser@'%' sql security definer view v1 as select t1.id from t1 group by t1.id; +Warnings: +Note 1449 The user specified as a definer ('unknownuser'@'%') does not exist +show table status; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MyISAM 10 Fixed 0 0 0 # 1024 0 NULL # # NULL latin1_swedish_ci NULL +v1 NULL NULL NULL NULL NULL NULL # NULL NULL NULL # # NULL NULL NULL NULL VIEW +Warnings: +Note 1449 The user specified as a definer ('unknownuser'@'%') does not exist +drop database mysqltest1; diff -Nru mariadb-5.5-5.5.39/mysql-test/r/sp-bugs.result mariadb-5.5-5.5.40/mysql-test/r/sp-bugs.result --- mariadb-5.5-5.5.39/mysql-test/r/sp-bugs.result 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/sp-bugs.result 2014-10-08 13:19:52.000000000 +0000 @@ -268,3 +268,9 @@ CALL test_5531(1); DROP PROCEDURE test_5531; DROP TABLE t1; +create procedure sp() begin +commit; +end| +start transaction; +call sp(); +drop procedure sp; diff -Nru mariadb-5.5-5.5.39/mysql-test/r/type_newdecimal.result mariadb-5.5-5.5.40/mysql-test/r/type_newdecimal.result --- mariadb-5.5-5.5.39/mysql-test/r/type_newdecimal.result 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/type_newdecimal.result 2014-10-08 13:19:51.000000000 +0000 @@ -1988,3 +1988,12 @@ d1 * d2 0 DROP TABLE t1; +select 0.000000000000000000000000000000000000000000000000001 mod 1; +0.000000000000000000000000000000000000000000000000001 mod 1 +0.000000000000000000000000000000 +select 0.0000000001 mod 1; +0.0000000001 mod 1 +0.0000000001 +select 0.01 mod 1; +0.01 mod 1 +0.01 diff -Nru mariadb-5.5-5.5.39/mysql-test/r/type_time.result mariadb-5.5-5.5.40/mysql-test/r/type_time.result --- mariadb-5.5-5.5.39/mysql-test/r/type_time.result 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/type_time.result 2014-10-08 13:19:53.000000000 +0000 @@ -352,3 +352,14 @@ '-24:00:00' = (SELECT f1 FROM t1) 1 DROP TABLE t1; +# +# MDEV-6592 Assertion `ltime->day == 0' failed with TIMESTAMP, MAKETIME +# +CREATE TABLE t1 (d DATE, c VARCHAR(10), KEY(d)) engine=myisam; +INSERT INTO t1 VALUES ('2008-10-02','2008-10-02'), ('2008-10-02','2008-10-02'); +SELECT * FROM t1 WHERE TIMESTAMP(c,'02:04:42') AND d <=> MAKETIME(97,0,7); +d c +DROP TABLE t1; +# +# End of 5.5 tests +# diff -Nru mariadb-5.5-5.5.39/mysql-test/r/variables.result mariadb-5.5-5.5.40/mysql-test/r/variables.result --- mariadb-5.5-5.5.39/mysql-test/r/variables.result 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/r/variables.result 2014-10-08 13:19:52.000000000 +0000 @@ -1804,4 +1804,13 @@ ERROR 42000: Variable 'rand_seed1' doesn't have a default value set autocommit = values(v); ERROR 42S22: Unknown column 'v' in 'field list' +set session sql_mode=ansi_quotes; +select * from information_schema.session_variables where variable_name='sql_mode'; +VARIABLE_NAME VARIABLE_VALUE +SQL_MODE ANSI_QUOTES +show global status like 'foobar'; +Variable_name Value +select * from information_schema.session_variables where variable_name='sql_mode'; +VARIABLE_NAME VARIABLE_VALUE +SQL_MODE ANSI_QUOTES End of 5.5 tests diff -Nru mariadb-5.5-5.5.39/mysql-test/suite/funcs_1/views/func_view.inc mariadb-5.5-5.5.40/mysql-test/suite/funcs_1/views/func_view.inc --- mariadb-5.5-5.5.39/mysql-test/suite/funcs_1/views/func_view.inc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/suite/funcs_1/views/func_view.inc 2014-10-08 13:19:51.000000000 +0000 @@ -282,7 +282,7 @@ # other interesting value # numbers -> 0 # strings, blobs, binaries -> not full length of used data type, "exotic" -# characters and preceeding and trailing spaces +# characters and preceding and trailing spaces # FIXME enum, set ?? INSERT INTO t1_values SET my_char_30 = ' ---äÖüß@µ*$-- ', diff -Nru mariadb-5.5-5.5.39/mysql-test/suite/funcs_1/views/views_master.inc mariadb-5.5-5.5.40/mysql-test/suite/funcs_1/views/views_master.inc --- mariadb-5.5-5.5.39/mysql-test/suite/funcs_1/views/views_master.inc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/suite/funcs_1/views/views_master.inc 2014-10-08 13:19:51.000000000 +0000 @@ -545,7 +545,7 @@ # view names are accepted, at creation time, alteration time, # and drop time. ############################################################################### -# Note(mleich): non-qualified view name means a view name without preceeding +# Note(mleich): non-qualified view name means a view name without preceding # database name --disable_warnings DROP VIEW IF EXISTS v1 ; diff -Nru mariadb-5.5-5.5.39/mysql-test/suite/heap/btree_varchar_null.result mariadb-5.5-5.5.40/mysql-test/suite/heap/btree_varchar_null.result --- mariadb-5.5-5.5.39/mysql-test/suite/heap/btree_varchar_null.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/suite/heap/btree_varchar_null.result 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,6 @@ +create table t1 (f1 varchar(128), f2 varchar(128), key (f2,f1) using btree) engine=memory; +insert into t1 values (null,'not'),('one',null),('two',null),('three',''); +select * from t1 where f1 = 'one' and f2 is null; +f1 f2 +one NULL +drop table t1; diff -Nru mariadb-5.5-5.5.39/mysql-test/suite/heap/btree_varchar_null.test mariadb-5.5-5.5.40/mysql-test/suite/heap/btree_varchar_null.test --- mariadb-5.5-5.5.39/mysql-test/suite/heap/btree_varchar_null.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/suite/heap/btree_varchar_null.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,7 @@ +# +# MDEV-4813 Replication fails on updating a MEMORY table with an index using btree +# +create table t1 (f1 varchar(128), f2 varchar(128), key (f2,f1) using btree) engine=memory; +insert into t1 values (null,'not'),('one',null),('two',null),('three',''); +select * from t1 where f1 = 'one' and f2 is null; +drop table t1; diff -Nru mariadb-5.5-5.5.39/mysql-test/suite/innodb/r/foreign-keys.result mariadb-5.5-5.5.40/mysql-test/suite/innodb/r/foreign-keys.result --- mariadb-5.5-5.5.39/mysql-test/suite/innodb/r/foreign-keys.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/suite/innodb/r/foreign-keys.result 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,16 @@ +# +# Bug #19471516 SERVER CRASHES WHEN EXECUTING ALTER TABLE +# ADD FOREIGN KEY +# +CREATE TABLE `department` (`department_id` INT, `department_people_fk` INT, +PRIMARY KEY (`department_id`)) engine=innodb; +CREATE TABLE `title` (`title_id` INT, `title_manager_fk` INT, +`title_reporter_fk` INT, PRIMARY KEY (`title_id`)) engine=innodb; +CREATE TABLE `people` (`people_id` INT, PRIMARY KEY (`people_id`)) engine=innodb; +ALTER TABLE `department` ADD FOREIGN KEY (`department_people_fk`) REFERENCES +`people` (`people_id`); +ALTER TABLE `title` ADD FOREIGN KEY (`title_manager_fk`) REFERENCES `people` +(`people_id`); +ALTER TABLE `title` ADD FOREIGN KEY (`title_reporter_fk`) REFERENCES `people` +(`people_id`); +drop table title, department, people; diff -Nru mariadb-5.5-5.5.39/mysql-test/suite/innodb/r/innodb_simulate_comp_failures_small.result mariadb-5.5-5.5.40/mysql-test/suite/innodb/r/innodb_simulate_comp_failures_small.result --- mariadb-5.5-5.5.39/mysql-test/suite/innodb/r/innodb_simulate_comp_failures_small.result 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/suite/innodb/r/innodb_simulate_comp_failures_small.result 2014-10-08 13:19:51.000000000 +0000 @@ -5,4 +5,4 @@ SET GLOBAL innodb_simulate_comp_failures = 25; SELECT COUNT(*) FROM t1; COUNT(*) -10000 +1000 diff -Nru mariadb-5.5-5.5.39/mysql-test/suite/innodb/t/foreign-keys.test mariadb-5.5-5.5.40/mysql-test/suite/innodb/t/foreign-keys.test --- mariadb-5.5-5.5.39/mysql-test/suite/innodb/t/foreign-keys.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/suite/innodb/t/foreign-keys.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,31 @@ +--source include/have_innodb.inc +--source include/have_debug.inc + +if (`select plugin_auth_version <= "5.5.39-MariaDB-36.0" from information_schema.plugins where plugin_name='innodb'`) +{ + --skip Not fixed in XtraDB as of 5.5.39-MariaDB-36.0 or earlier +} + +--echo # +--echo # Bug #19471516 SERVER CRASHES WHEN EXECUTING ALTER TABLE +--echo # ADD FOREIGN KEY +--echo # + +CREATE TABLE `department` (`department_id` INT, `department_people_fk` INT, +PRIMARY KEY (`department_id`)) engine=innodb; + +CREATE TABLE `title` (`title_id` INT, `title_manager_fk` INT, +`title_reporter_fk` INT, PRIMARY KEY (`title_id`)) engine=innodb; + +CREATE TABLE `people` (`people_id` INT, PRIMARY KEY (`people_id`)) engine=innodb; + +ALTER TABLE `department` ADD FOREIGN KEY (`department_people_fk`) REFERENCES +`people` (`people_id`); + +ALTER TABLE `title` ADD FOREIGN KEY (`title_manager_fk`) REFERENCES `people` +(`people_id`); + +ALTER TABLE `title` ADD FOREIGN KEY (`title_reporter_fk`) REFERENCES `people` +(`people_id`); + +drop table title, department, people; diff -Nru mariadb-5.5-5.5.39/mysql-test/suite/innodb/t/innodb_simulate_comp_failures_small.test mariadb-5.5-5.5.40/mysql-test/suite/innodb/t/innodb_simulate_comp_failures_small.test --- mariadb-5.5-5.5.39/mysql-test/suite/innodb/t/innodb_simulate_comp_failures_small.test 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/suite/innodb/t/innodb_simulate_comp_failures_small.test 2014-10-08 13:19:51.000000000 +0000 @@ -1,5 +1,5 @@ ---let $num_inserts = 10000 ---let $num_ops = 3000 +--let $num_inserts = 1000 +--let $num_ops = 30 --source suite/innodb/include/innodb_simulate_comp_failures.inc # clean exit --exit diff -Nru mariadb-5.5-5.5.39/mysql-test/suite/rpl/t/rpl_ddl.test mariadb-5.5-5.5.40/mysql-test/suite/rpl/t/rpl_ddl.test --- mariadb-5.5-5.5.39/mysql-test/suite/rpl/t/rpl_ddl.test 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/suite/rpl/t/rpl_ddl.test 2014-10-08 13:19:51.000000000 +0000 @@ -13,10 +13,10 @@ # sequences start. # # 2. Never use a test object, which was direct or indirect affected by a -# preceeding test sequence again. +# preceding test sequence again. # Except table d1.t1 where ONLY DML is allowed. # -# If one preceeding test sequence hits a (sometimes not good visible, +# If one preceding test sequence hits a (sometimes not good visible, # because the sql error code of the statement might be 0) bug # and these rules are ignored, a following test sequence might earn ugly # effects like failing 'sync_slave_with_master', crashes of the slave or diff -Nru mariadb-5.5-5.5.39/mysql-test/suite/rpl/t/rpl_row_basic_11bugs.test mariadb-5.5-5.5.40/mysql-test/suite/rpl/t/rpl_row_basic_11bugs.test --- mariadb-5.5-5.5.39/mysql-test/suite/rpl/t/rpl_row_basic_11bugs.test 2014-08-03 12:00:45.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/suite/rpl/t/rpl_row_basic_11bugs.test 2014-10-08 13:19:51.000000000 +0000 @@ -244,7 +244,7 @@ UPDATE t1 SET a = 5, b = 'slave' WHERE a = 1; SELECT * FROM t1 ORDER BY a; # since bug#31552/31609 idempotency is not default any longer. In -# order for the preceeding test UPDATE t1 to pass, the mode is switched +# order for the preceding test UPDATE t1 to pass, the mode is switched # temprorarily set @@global.slave_exec_mode= 'IDEMPOTENT'; --echo **** On Master **** diff -Nru mariadb-5.5-5.5.39/mysql-test/t/ctype_cp932.test mariadb-5.5-5.5.40/mysql-test/t/ctype_cp932.test --- mariadb-5.5-5.5.39/mysql-test/t/ctype_cp932.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/ctype_cp932.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,29 @@ +-- source include/have_cp932.inc +--echo # +--echo # Bug #11755818 LIKE DOESN'T MATCH WHEN CP932_BIN/SJIS_BIN COLLATIONS ARE +--echo # USED. +--echo # + +SET @old_character_set_client= @@character_set_client; +SET @old_character_set_connection= @@character_set_connection; +SET @old_character_set_results= @@character_set_results; +SET character_set_client= 'utf8'; +SET character_set_connection= 'utf8'; +SET character_set_results= 'utf8'; + +CREATE TABLE t1 (a VARCHAR(10) COLLATE cp932_bin); +INSERT INTO t1 VALUES('カカ'); +SELECT * FROM t1 WHERE a LIKE '%カ'; +SELECT * FROM t1 WHERE a LIKE '_カ'; +SELECT * FROM t1 WHERE a LIKE '%_カ'; + +ALTER TABLE t1 MODIFY a VARCHAR(100) COLLATE sjis_bin; +SELECT * FROM t1 WHERE a LIKE '%カ'; +SELECT * FROM t1 WHERE a LIKE '_カ'; +SELECT * FROM t1 WHERE a LIKE '%_カ'; +DROP TABLE t1; + +## Reset to initial values +SET @@character_set_client= @old_character_set_client; +SET @@character_set_connection= @old_character_set_connection; +SET @@character_set_results= @old_character_set_results; diff -Nru mariadb-5.5-5.5.39/mysql-test/t/derived_view.test mariadb-5.5-5.5.40/mysql-test/t/derived_view.test --- mariadb-5.5-5.5.39/mysql-test/t/derived_view.test 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/derived_view.test 2014-10-08 13:19:51.000000000 +0000 @@ -1731,6 +1731,99 @@ set optimizer_switch=@save_optimizer_switch5740; --echo # +--echo # Bug mdev-5721: possible long key access to a materialized derived table +--echo # (see also the test case for Bug#13261277 that is actually the same bug) +--echo # + +CREATE TABLE t1 ( + id varchar(255) NOT NULL DEFAULT '', + familyid int(11) DEFAULT NULL, + withdrawndate date DEFAULT NULL, + KEY index_td_familyid_id (familyid,id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; + +CREATE TABLE t2 ( + id int(11) NOT NULL AUTO_INCREMENT, + activefromts datetime NOT NULL DEFAULT '0000-00-00 00:00:00', + shortdescription text, + useraccessfamily varchar(512) DEFAULT NULL, + serialized longtext, + PRIMARY KEY (id) +) ENGINE=MyISAM DEFAULT CHARSET=utf8; + +insert into t1 values ('picture/89/1369722032695.pmd',89,NULL); +insert into t1 values ('picture/90/1369832057370.pmd',90,NULL); +insert into t2 values (38,'2013-03-04 07:49:22','desc','CODE','string'); + +EXPLAIN +SELECT * FROM t2 x, +(SELECT t2.useraccessfamily, t2.serialized AS picturesubuser, COUNT(*) + FROM t2, t1 GROUP BY t2.useraccessfamily, picturesubuser) y +WHERE x.useraccessfamily = y.useraccessfamily; + +SELECT * FROM t2 x, +(SELECT t2.useraccessfamily, t2.serialized AS picturesubuser, COUNT(*) + FROM t2, t1 GROUP BY t2.useraccessfamily, picturesubuser) y +WHERE x.useraccessfamily = y.useraccessfamily; + +DROP TABLE t1,t2; + +--echo # +--echo # Bug#13261277: Unchecked key length caused missing records. +--echo # + +CREATE TABLE t1 ( + col_varchar varchar(1024) CHARACTER SET utf8 DEFAULT NULL, + stub1 varchar(1024) CHARACTER SET utf8 DEFAULT NULL, + stub2 varchar(1024) CHARACTER SET utf8 DEFAULT NULL, + stub3 varchar(1024) CHARACTER SET utf8 DEFAULT NULL +); + +INSERT INTO t1 VALUES + ('d','d','l','ther'), + (NULL,'s','NJBIQ','trzetuchv'), + (-715390976,'coul','MYWFB','cfhtrzetu'), + (1696792576,'f','i\'s','c'), + (1,'i','ltpemcfhtr','gsltpemcf'), + (-663027712,'mgsltpemcf','sa','amgsltpem'), + (-1686700032,'JPRVK','i','vamgsltpe'), + (NULL,'STUNB','UNVJV','u'), + (5,'oka','qyihvamgsl','AXSMD'), + (NULL,'tqwmqyihva','h','yntqwmqyi'), + (3,'EGMJN','e','e'); + +CREATE TABLE t2 ( + col_varchar varchar(10) DEFAULT NULL, + col_int INT DEFAULT NULL +); + +INSERT INTO t2 VALUES ('d',9); + +set optimizer_switch='derived_merge=off,derived_with_keys=on'; + +SET @save_heap_size= @@max_heap_table_size; +SET @@max_heap_table_size= 16384; + +SELECT t2.col_int +FROM t2 + RIGHT JOIN ( SELECT * FROM t1 ) AS dt + ON t2.col_varchar = dt.col_varchar +WHERE t2.col_int IS NOT NULL ; + +--echo # Shouldn't use auto_key0 for derived table +EXPLAIN +SELECT t2.col_int +FROM t2 + RIGHT JOIN ( SELECT * FROM t1 ) AS dt + ON t2.col_varchar = dt.col_varchar +WHERE t2.col_int IS NOT NULL ; + +SET @@max_heap_table_size= @save_heap_size; +SET optimizer_switch=@save_optimizer_switch; + +DROP TABLE t1,t2; + +--echo # --echo # end of 5.3 tests --echo # diff -Nru mariadb-5.5-5.5.39/mysql-test/t/func_group.test mariadb-5.5-5.5.40/mysql-test/t/func_group.test --- mariadb-5.5-5.5.39/mysql-test/t/func_group.test 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/func_group.test 2014-10-08 13:19:52.000000000 +0000 @@ -3,7 +3,7 @@ # --disable_warnings -drop table if exists t1,t2; +drop table if exists t1,t2,t3,t4,t5,t6; --enable_warnings set @sav_dpi= @@div_precision_increment; @@ -1528,3 +1528,40 @@ explain select MIN(b) from t1 where b >= inet_aton('192.168.119.32'); DROP TABLE t1; +--echo # +--echo # MDEV-6743 crash in GROUP_CONCAT(IF () ORDER BY 1) +--echo # + +CREATE TABLE t1 (pk INT, t2_id INT, t5_id INT, PRIMARY KEY (pk)); +INSERT INTO t1 VALUES (1,3,12),(2,3,15); + +CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)); +INSERT INTO t2 VALUES (4),(5); + +CREATE TABLE t3 (t2_id INT, t4_id INT); +INSERT INTO t3 VALUES (6,11),(7,12); + +CREATE TABLE t4 (id INT); +INSERT INTO t4 VALUES (13),(14); + +CREATE TABLE t5 (pk INT, f VARCHAR(50), t6_id INT, PRIMARY KEY (pk)); +INSERT INTO t5 VALUES (9,'FOO',NULL); + +CREATE TABLE t6 (pk INT, f VARCHAR(120), b TINYINT(4), PRIMARY KEY (pk)); + +PREPARE stmt FROM " + SELECT t1.t2_id, GROUP_CONCAT(IF (t6.b, t6.f, t5.f) ORDER BY 1) + FROM t1 + JOIN t2 ON t1.t2_id = t2.pk + JOIN t3 ON t2.pk = t3.t2_id + JOIN t4 ON t4.id = t3.t4_id + JOIN t5 ON t1.t5_id = t5.pk + LEFT JOIN t6 ON t6.pk = t5.t6_id + GROUP BY t1.t2_id +"; + +EXECUTE stmt; +EXECUTE stmt; +EXECUTE stmt; + +DROP TABLE t1,t2,t3,t4,t5,t6; diff -Nru mariadb-5.5-5.5.39/mysql-test/t/join_cache.test mariadb-5.5-5.5.40/mysql-test/t/join_cache.test --- mariadb-5.5-5.5.39/mysql-test/t/join_cache.test 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/join_cache.test 2014-10-08 13:19:51.000000000 +0000 @@ -3656,5 +3656,134 @@ drop table t0,t1,t2; +--echo # +--echo # mdev-6292: huge performance degradation for a sequence +--echo # of LEFT JOIN operations when using join buffer +--echo # + +--source include/have_innodb.inc + +CREATE TABLE t1 ( + id int(11) NOT NULL AUTO_INCREMENT, + col1 varchar(255) NOT NULL DEFAULT '', + PRIMARY KEY (id) +) ENGINE=INNODB; + +CREATE TABLE t2 ( + id int(11) NOT NULL AUTO_INCREMENT, + parent_id smallint(3) NOT NULL DEFAULT '0', + col2 varchar(25) NOT NULL DEFAULT '', + PRIMARY KEY (id) +) ENGINE=INNODB; + +set join_buffer_size=8192; + +set join_cache_level=0; + +set @init_time:=now(); +SELECT t.* +FROM + t1 t + LEFT JOIN t2 c1 ON c1.parent_id = t.id AND c1.col2 = "val" + LEFT JOIN t2 c2 ON c2.parent_id = t.id AND c2.col2 = "val" + LEFT JOIN t2 c3 ON c3.parent_id = t.id AND c3.col2 = "val" + LEFT JOIN t2 c4 ON c4.parent_id = t.id AND c4.col2 = "val" + LEFT JOIN t2 c5 ON c5.parent_id = t.id AND c5.col2 = "val" + LEFT JOIN t2 c6 ON c6.parent_id = t.id AND c6.col2 = "val" + LEFT JOIN t2 c7 ON c7.parent_id = t.id AND c7.col2 = "val" + LEFT JOIN t2 c8 ON c8.parent_id = t.id AND c8.col2 = "val" + LEFT JOIN t2 c9 ON c9.parent_id = t.id AND c9.col2 = "val" + LEFT JOIN t2 c10 ON c10.parent_id = t.id AND c10.col2 = "val" + LEFT JOIN t2 c11 ON c11.parent_id = t.id AND c11.col2 = "val" + LEFT JOIN t2 c12 ON c12.parent_id = t.id AND c12.col2 = "val" + LEFT JOIN t2 c13 ON c13.parent_id = t.id AND c13.col2 = "val" + LEFT JOIN t2 c14 ON c14.parent_id = t.id AND c14.col2 = "val" + LEFT JOIN t2 c15 ON c15.parent_id = t.id AND c15.col2 = "val" + LEFT JOIN t2 c16 ON c16.parent_id = t.id AND c16.col2 = "val" + LEFT JOIN t2 c17 ON c17.parent_id = t.id AND c17.col2 = "val" + LEFT JOIN t2 c18 ON c18.parent_id = t.id AND c18.col2 = "val" + LEFT JOIN t2 c19 ON c19.parent_id = t.id AND c19.col2 = "val" + LEFT JOIN t2 c20 ON c20.parent_id = t.id AND c20.col2 = "val" + LEFT JOIN t2 c21 ON c21.parent_id = t.id AND c21.col2 = "val" + LEFT JOIN t2 c22 ON c22.parent_id = t.id AND c22.col2 = "val" + LEFT JOIN t2 c23 ON c23.parent_id = t.id AND c23.col2 = "val" + LEFT JOIN t2 c24 ON c24.parent_id = t.id AND c24.col2 = "val" + LEFT JOIN t2 c25 ON c25.parent_id = t.id AND c25.col2 = "val" +ORDER BY + col1; +select timestampdiff(second, @init_time, now()) <= 1; + +set join_cache_level=2; + +set @init_time:=now(); +SELECT t.* +FROM + t1 t + LEFT JOIN t2 c1 ON c1.parent_id = t.id AND c1.col2 = "val" + LEFT JOIN t2 c2 ON c2.parent_id = t.id AND c2.col2 = "val" + LEFT JOIN t2 c3 ON c3.parent_id = t.id AND c3.col2 = "val" + LEFT JOIN t2 c4 ON c4.parent_id = t.id AND c4.col2 = "val" + LEFT JOIN t2 c5 ON c5.parent_id = t.id AND c5.col2 = "val" + LEFT JOIN t2 c6 ON c6.parent_id = t.id AND c6.col2 = "val" + LEFT JOIN t2 c7 ON c7.parent_id = t.id AND c7.col2 = "val" + LEFT JOIN t2 c8 ON c8.parent_id = t.id AND c8.col2 = "val" + LEFT JOIN t2 c9 ON c9.parent_id = t.id AND c9.col2 = "val" + LEFT JOIN t2 c10 ON c10.parent_id = t.id AND c10.col2 = "val" + LEFT JOIN t2 c11 ON c11.parent_id = t.id AND c11.col2 = "val" + LEFT JOIN t2 c12 ON c12.parent_id = t.id AND c12.col2 = "val" + LEFT JOIN t2 c13 ON c13.parent_id = t.id AND c13.col2 = "val" + LEFT JOIN t2 c14 ON c14.parent_id = t.id AND c14.col2 = "val" + LEFT JOIN t2 c15 ON c15.parent_id = t.id AND c15.col2 = "val" + LEFT JOIN t2 c16 ON c16.parent_id = t.id AND c16.col2 = "val" + LEFT JOIN t2 c17 ON c17.parent_id = t.id AND c17.col2 = "val" + LEFT JOIN t2 c18 ON c18.parent_id = t.id AND c18.col2 = "val" + LEFT JOIN t2 c19 ON c19.parent_id = t.id AND c19.col2 = "val" + LEFT JOIN t2 c20 ON c20.parent_id = t.id AND c20.col2 = "val" + LEFT JOIN t2 c21 ON c21.parent_id = t.id AND c21.col2 = "val" + LEFT JOIN t2 c22 ON c22.parent_id = t.id AND c22.col2 = "val" + LEFT JOIN t2 c23 ON c23.parent_id = t.id AND c23.col2 = "val" + LEFT JOIN t2 c24 ON c24.parent_id = t.id AND c24.col2 = "val" + LEFT JOIN t2 c25 ON c25.parent_id = t.id AND c25.col2 = "val" +ORDER BY + col1; +select timestampdiff(second, @init_time, now()) <= 1; + +EXPLAIN +SELECT t.* +FROM + t1 t + LEFT JOIN t2 c1 ON c1.parent_id = t.id AND c1.col2 = "val" + LEFT JOIN t2 c2 ON c2.parent_id = t.id AND c2.col2 = "val" + LEFT JOIN t2 c3 ON c3.parent_id = t.id AND c3.col2 = "val" + LEFT JOIN t2 c4 ON c4.parent_id = t.id AND c4.col2 = "val" + LEFT JOIN t2 c5 ON c5.parent_id = t.id AND c5.col2 = "val" + LEFT JOIN t2 c6 ON c6.parent_id = t.id AND c6.col2 = "val" + LEFT JOIN t2 c7 ON c7.parent_id = t.id AND c7.col2 = "val" + LEFT JOIN t2 c8 ON c8.parent_id = t.id AND c8.col2 = "val" + LEFT JOIN t2 c9 ON c9.parent_id = t.id AND c9.col2 = "val" + LEFT JOIN t2 c10 ON c10.parent_id = t.id AND c10.col2 = "val" + LEFT JOIN t2 c11 ON c11.parent_id = t.id AND c11.col2 = "val" + LEFT JOIN t2 c12 ON c12.parent_id = t.id AND c12.col2 = "val" + LEFT JOIN t2 c13 ON c13.parent_id = t.id AND c13.col2 = "val" + LEFT JOIN t2 c14 ON c14.parent_id = t.id AND c14.col2 = "val" + LEFT JOIN t2 c15 ON c15.parent_id = t.id AND c15.col2 = "val" + LEFT JOIN t2 c16 ON c16.parent_id = t.id AND c16.col2 = "val" + LEFT JOIN t2 c17 ON c17.parent_id = t.id AND c17.col2 = "val" + LEFT JOIN t2 c18 ON c18.parent_id = t.id AND c18.col2 = "val" + LEFT JOIN t2 c19 ON c19.parent_id = t.id AND c19.col2 = "val" + LEFT JOIN t2 c20 ON c20.parent_id = t.id AND c20.col2 = "val" + LEFT JOIN t2 c21 ON c21.parent_id = t.id AND c21.col2 = "val" + LEFT JOIN t2 c22 ON c22.parent_id = t.id AND c22.col2 = "val" + LEFT JOIN t2 c23 ON c23.parent_id = t.id AND c23.col2 = "val" + LEFT JOIN t2 c24 ON c24.parent_id = t.id AND c24.col2 = "val" + LEFT JOIN t2 c25 ON c25.parent_id = t.id AND c25.col2 = "val" +ORDER BY + col1; + +set join_buffer_size=default; +set join_cache_level = default; + +DROP TABLE t1,t2; + # this must be the last command in the file set @@optimizer_switch=@save_optimizer_switch; diff -Nru mariadb-5.5-5.5.39/mysql-test/t/kill_processlist-6619.test mariadb-5.5-5.5.40/mysql-test/t/kill_processlist-6619.test --- mariadb-5.5-5.5.39/mysql-test/t/kill_processlist-6619.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/kill_processlist-6619.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,17 @@ +# +# MDEV-6619 SHOW PROCESSLIST returns empty result set after KILL QUERY +# +--source include/not_embedded.inc +--enable_connect_log +--connect (con1,localhost,root,,) +--let $con_id = `SELECT CONNECTION_ID()` +--replace_column 1 # 3 # 6 # 7 # +SHOW PROCESSLIST; +--connection default +--replace_result $con_id con_id +eval KILL QUERY $con_id; +--connection con1 +--error ER_QUERY_INTERRUPTED +SHOW PROCESSLIST; +--replace_column 1 # 3 # 6 # 7 # +SHOW PROCESSLIST; diff -Nru mariadb-5.5-5.5.39/mysql-test/t/log_errchk.test mariadb-5.5-5.5.40/mysql-test/t/log_errchk.test --- mariadb-5.5-5.5.39/mysql-test/t/log_errchk.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/log_errchk.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,64 @@ +# +--source include/not_windows.inc +--source include/not_embedded.inc + +# +# Bug#14757009 : WHEN THE GENERAL_LOG IS A SOCKET AND THE READER GOES AWAY, +# MYSQL QUITS WORKING. +# +call mtr.add_suppression("Could not use"); + +--let $gen_log_file= $MYSQLTEST_VARDIR/tmp/general_log.fifo +--let $slow_query_log_file= $MYSQLTEST_VARDIR/tmp/slow_log.fifo +--let GREP_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err + +--exec mkfifo $gen_log_file +--exec mkfifo $slow_query_log_file + +--echo # Case 1: Setting fife file to general_log_file and slow_query_log_file +--echo # system variable. +# Only regular files can be set to general log. Setting fifo file to general log +# reports an error. +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--error ER_WRONG_VALUE_FOR_VAR +--eval SET GLOBAL general_log_file="$gen_log_file"; + +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--error ER_WRONG_VALUE_FOR_VAR +--eval SET GLOBAL slow_query_log_file="$slow_query_log_file"; + +--echo # Case 2: Starting server with fifo file as general log file +--echo # and slow query log file. +# Restart server with fifo file as general log file. +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--shutdown_server 60 +--source include/wait_until_disconnected.inc +--enable_reconnect +# Write file to make mysql-test-run.pl start up the server again +--exec echo "restart: --general-log-file=$gen_log_file --slow-query-log-file=$slow_query_log_file" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc + +# With fix error should be reported in the error log file if file is not a +# regular file. +--perl + my $file= $ENV{'GREP_FILE'}; + my $pattern= "Turning logging off for the whole duration"; + open(FILE, "$file") or die("Unable to open $file: $!\n"); + my $count = 0; + while () { + if ($_ =~ m/$pattern/) { + $count++; + break; + } + } + if ($count >= 2){ + print "Setting fifo file as general log file and slow query log failed.\n"; + } else { + print "test failed.\n"; + } + close(FILE); +EOF + +# Cleanup +--remove_file $gen_log_file +--remove_file $slow_query_log_file diff -Nru mariadb-5.5-5.5.39/mysql-test/t/show_bad_definer-5553.test mariadb-5.5-5.5.40/mysql-test/t/show_bad_definer-5553.test --- mariadb-5.5-5.5.39/mysql-test/t/show_bad_definer-5553.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/show_bad_definer-5553.test 2014-10-08 13:19:53.000000000 +0000 @@ -0,0 +1,12 @@ +--source include/not_embedded.inc +# +# MDEV-5553 A view or procedure with a non existing definer can block "SHOW TABLE STATUS" with an unclear error message +# + +create database mysqltest1; # all-open privileges on test db desroy the test +use mysqltest1; +create table t1(id int primary key); +create definer=unknownuser@'%' sql security definer view v1 as select t1.id from t1 group by t1.id; +--replace_column 8 # 12 # 13 # +show table status; +drop database mysqltest1; diff -Nru mariadb-5.5-5.5.39/mysql-test/t/sp-bugs.test mariadb-5.5-5.5.40/mysql-test/t/sp-bugs.test --- mariadb-5.5-5.5.39/mysql-test/t/sp-bugs.test 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/sp-bugs.test 2014-10-08 13:19:52.000000000 +0000 @@ -285,3 +285,16 @@ CALL test_5531(1); DROP PROCEDURE test_5531; DROP TABLE t1; + +# +# MDEV-6601 Assertion `!thd->in_active_multi_stmt_transa ction() || thd->in_multi_stmt_transaction_mode()' failed on executing a stored procedure with commit +# +delimiter |; +create procedure sp() begin + commit; +end| +delimiter ;| +start transaction; +call sp(); +drop procedure sp; + diff -Nru mariadb-5.5-5.5.39/mysql-test/t/type_newdecimal.test mariadb-5.5-5.5.40/mysql-test/t/type_newdecimal.test --- mariadb-5.5-5.5.39/mysql-test/t/type_newdecimal.test 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/type_newdecimal.test 2014-10-08 13:19:51.000000000 +0000 @@ -1570,3 +1570,14 @@ DROP TABLE t1; +# +# Test for Bug#18469276: MOD FOR SMALL DECIMALS FAILS +# +select 0.000000000000000000000000000000000000000000000000001 mod 1; + +# +# incorrect result +# +select 0.0000000001 mod 1; +select 0.01 mod 1; + diff -Nru mariadb-5.5-5.5.39/mysql-test/t/type_time.test mariadb-5.5-5.5.40/mysql-test/t/type_time.test --- mariadb-5.5-5.5.39/mysql-test/t/type_time.test 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/type_time.test 2014-10-08 13:19:53.000000000 +0000 @@ -239,3 +239,14 @@ SELECT '-24:00:00' = (SELECT f1 FROM t1); DROP TABLE t1; +--echo # +--echo # MDEV-6592 Assertion `ltime->day == 0' failed with TIMESTAMP, MAKETIME +--echo # +CREATE TABLE t1 (d DATE, c VARCHAR(10), KEY(d)) engine=myisam; +INSERT INTO t1 VALUES ('2008-10-02','2008-10-02'), ('2008-10-02','2008-10-02'); +SELECT * FROM t1 WHERE TIMESTAMP(c,'02:04:42') AND d <=> MAKETIME(97,0,7); +DROP TABLE t1; + +--echo # +--echo # End of 5.5 tests +--echo # diff -Nru mariadb-5.5-5.5.39/mysql-test/t/variables.test mariadb-5.5-5.5.40/mysql-test/t/variables.test --- mariadb-5.5-5.5.39/mysql-test/t/variables.test 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/mysql-test/t/variables.test 2014-10-08 13:19:52.000000000 +0000 @@ -1553,4 +1553,12 @@ --error ER_BAD_FIELD_ERROR set autocommit = values(v); +# +# MDEV-6673 I_S.SESSION_VARIABLES shows global values +# +set session sql_mode=ansi_quotes; +select * from information_schema.session_variables where variable_name='sql_mode'; +show global status like 'foobar'; +select * from information_schema.session_variables where variable_name='sql_mode'; + --echo End of 5.5 tests diff -Nru mariadb-5.5-5.5.39/packaging/rpm-oel/mysql.spec.in mariadb-5.5-5.5.40/packaging/rpm-oel/mysql.spec.in --- mariadb-5.5-5.5.39/packaging/rpm-oel/mysql.spec.in 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/packaging/rpm-oel/mysql.spec.in 2014-10-08 13:19:52.000000000 +0000 @@ -85,7 +85,7 @@ Summary: A very fast and reliable SQL database server Group: Applications/Databases Version: @VERSION@ -Release: 4%{?commercial:.1}%{?dist} +Release: 2%{?commercial:.1}%{?dist} License: Copyright (c) 2000, @MYSQL_COPYRIGHT_YEAR@, %{mysql_vendor}. All rights reserved. Under %{?license_type} license as shown in the Description field. Source0: https://cdn.mysql.com/Downloads/MySQL-@MYSQL_BASE_VERSION@/%{src_dir}.tar.gz URL: http://www.mysql.com/ @@ -156,8 +156,8 @@ Provides: MySQL-server-advanced%{?_isa} = %{version}-%{release} Obsoletes: MySQL-server-advanced < %{version}-%{release} Obsoletes: mysql-community-server < %{version}-%{release} -Requires: mysql-enterprise-client%{?_isa} = %{version}-%{release} -Requires: mysql-enterprise-common%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-client%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-common%{?_isa} = %{version}-%{release} %else Provides: MySQL-server%{?_isa} = %{version}-%{release} Requires: mysql-community-client%{?_isa} = %{version}-%{release} @@ -169,6 +169,8 @@ Obsoletes: mariadb-galera-server Provides: mysql-server = %{version}-%{release} Provides: mysql-server%{?_isa} = %{version}-%{release} +Provides: mysql-compat-server = %{version}-%{release} +Provides: mysql-compat-server%{?_isa} = %{version}-%{release} %if 0%{?systemd} Requires(post): systemd Requires(preun): systemd @@ -207,7 +209,7 @@ Provides: MySQL-client-advanced%{?_isa} = %{version}-%{release} Obsoletes: MySQL-client-advanced < %{version}-%{release} Obsoletes: mysql-community-client < %{version}-%{release} -Requires: mysql-enterprise-libs%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-libs%{?_isa} = %{version}-%{release} %else Provides: MySQL-client%{?_isa} = %{version}-%{release} Requires: mysql-community-libs%{?_isa} = %{version}-%{release} @@ -246,7 +248,7 @@ Provides: MySQL-test-advanced%{?_isa} = %{version}-%{release} Obsoletes: MySQL-test-advanced < %{version}-%{release} Obsoletes: mysql-community-test < %{version}-%{release} -Requires: mysql-enterprise-server%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-server%{?_isa} = %{version}-%{release} %else Provides: MySQL-test%{?_isa} = %{version}-%{release} Requires: mysql-community-server%{?_isa} = %{version}-%{release} @@ -268,7 +270,7 @@ Group: Applications/Databases %if 0%{?commercial} Obsoletes: mysql-community-bench < %{version}-%{release} -Requires: mysql-enterprise-server%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-server%{?_isa} = %{version}-%{release} %else Requires: mysql-community-server%{?_isa} = %{version}-%{release} %endif @@ -289,7 +291,7 @@ Provides: MySQL-devel-advanced%{?_isa} = %{version}-%{release} Obsoletes: MySQL-devel-advanced < %{version}-%{release} Obsoletes: mysql-community-devel < %{version}-%{release} -Requires: mysql-enterprise-libs%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-libs%{?_isa} = %{version}-%{release} %else Provides: MySQL-devel%{?_isa} = %{version}-%{release} Requires: mysql-community-libs%{?_isa} = %{version}-%{release} @@ -311,7 +313,7 @@ Provides: MySQL-shared-advanced%{?_isa} = %{version}-%{release} Obsoletes: MySQL-shared-advanced < %{version}-%{release} Obsoletes: mysql-community-libs < %{version}-%{release} -Requires: mysql-enterprise-common%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-common%{?_isa} = %{version}-%{release} %else Provides: MySQL-shared%{?_isa} = %{version}-%{release} Requires: mysql-community-common%{?_isa} = %{version}-%{release} @@ -337,7 +339,7 @@ Provides: MySQL-shared-compat-advanced%{?_isa} = %{version}-%{release} Obsoletes: MySQL-shared-compat-advanced < %{version}-%{release} Obsoletes: mysql-community-libs-compat < %{version}-%{release} -Requires: mysql-enterprise-libs%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-libs%{?_isa} = %{version}-%{release} %else Provides: MySQL-shared-compat%{?_isa} = %{version}-%{release} Requires: mysql-community-libs%{?_isa} = %{version}-%{release} @@ -359,7 +361,7 @@ Provides: MySQL-embedded-advanced%{?_isa} = %{version}-%{release} Obsoletes: MySQL-embedded-advanced < %{version}-%{release} Obsoletes: mysql-community-embedded < %{version}-%{release} -Requires: mysql-enterprise-common%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-common%{?_isa} = %{version}-%{release} %else Provides: MySQL-embedded%{?_isa} = %{version}-%{release} Requires: mysql-community-common%{?_isa} = %{version}-%{release} @@ -387,8 +389,8 @@ Group: Applications/Databases %if 0%{?commercial} Obsoletes: mysql-community-embedded-devel < %{version}-%{release} -Requires: mysql-enterprise-devel%{?_isa} = %{version}-%{release} -Requires: mysql-enterprise-embedded%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-devel%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-embedded%{?_isa} = %{version}-%{release} %else Requires: mysql-community-devel%{?_isa} = %{version}-%{release} Requires: mysql-community-embedded%{?_isa} = %{version}-%{release} @@ -407,9 +409,9 @@ Summary: Convenience package for easy upgrades of MySQL package set Group: Applications/Databases %if 0%{?commercial} -Requires: mysql-enterprise-client%{?_isa} = %{version}-%{release} -Requires: mysql-enterprise-libs%{?_isa} = %{version}-%{release} -Requires: mysql-enterprise-libs-compat%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-client%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-libs%{?_isa} = %{version}-%{release} +Requires: mysql-commercial-libs-compat%{?_isa} = %{version}-%{release} %else Requires: mysql-community-client%{?_isa} = %{version}-%{release} Requires: mysql-community-libs%{?_isa} = %{version}-%{release} @@ -911,6 +913,9 @@ %endif %changelog +* Tue Jul 22 2014 Balasubramanian Kandasamy - 5.5.39-5 +- Provide mysql-compat-server dependencies + * Tue Jul 08 2014 Balasubramanian Kandasamy - 5.5.39-4 - Remove perl(GD) and dtrace dependencies diff -Nru mariadb-5.5-5.5.39/plugin/auth_socket/auth_socket.c mariadb-5.5-5.5.40/plugin/auth_socket/auth_socket.c --- mariadb-5.5-5.5.39/plugin/auth_socket/auth_socket.c 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/plugin/auth_socket/auth_socket.c 2014-10-08 13:19:51.000000000 +0000 @@ -27,9 +27,29 @@ #define _GNU_SOURCE 1 /* for struct ucred */ #include -#include -#include #include +#include +#include +#include + +#ifdef HAVE_PEERCRED +#define level SOL_SOCKET + +#elif defined HAVE_SOCKPEERCRED +#define level SOL_SOCKET +#define ucred sockpeercred + +#elif defined HAVE_XUCRED +#include +#include +#define level 0 +#define SO_PEERCRED LOCAL_PEERCRED +#define uid cr_uid +#define ucred xucred + +#else +#error impossible +#endif /** perform the unix socket based authentication @@ -63,7 +83,7 @@ return CR_ERROR; /* get the UID of the client process */ - if (getsockopt(vio_info.socket, SOL_SOCKET, SO_PEERCRED, &cred, &cred_len)) + if (getsockopt(vio_info.socket, level, SO_PEERCRED, &cred, &cred_len)) return CR_ERROR; if (cred_len != sizeof(cred)) diff -Nru mariadb-5.5-5.5.39/plugin/auth_socket/CMakeLists.txt mariadb-5.5-5.5.40/plugin/auth_socket/CMakeLists.txt --- mariadb-5.5-5.5.39/plugin/auth_socket/CMakeLists.txt 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/plugin/auth_socket/CMakeLists.txt 2014-10-08 13:19:51.000000000 +0000 @@ -22,18 +22,49 @@ getsockopt(0, SOL_SOCKET, SO_PEERCRED, &cred, 0); }" HAVE_PEERCRED) -IF (NOT HAVE_PEERCRED) - # Hi, OpenBSD! - CHECK_CXX_SOURCE_COMPILES( - "#include - #include - int main() { - struct sockpeercred cred; - getsockopt(0, SOL_SOCKET, SO_PEERCRED, &cred, 0); - }" HAVE_SOCKPEERCRED) - ADD_DEFINITIONS(-Ducred=sockpeercred) +IF (HAVE_PEERCRED) + ADD_DEFINITIONS(-DHAVE_PEERCRED) + SET(ok 1) +ELSE() + +# Hi, OpenBSD! +CHECK_CXX_SOURCE_COMPILES( +"#include +#include +int main() { + struct sockpeercred cred; + getsockopt(0, SOL_SOCKET, SO_PEERCRED, &cred, 0); + }" HAVE_SOCKPEERCRED) + +IF (HAVE_SOCKPEERCRED) + ADD_DEFINITIONS(-DHAVE_SOCKPEERCRED) + SET(ok 1) +ELSE() + +# FreeBSD, is that you? +CHECK_CXX_SOURCE_COMPILES( +"#include +#include +#include +#include +int main() { + struct xucred cred; + getsockopt(0, 0, LOCAL_PEERCRED, &cred, 0); + }" HAVE_XUCRED) + +IF (HAVE_XUCRED) + ADD_DEFINITIONS(-DHAVE_XUCRED) + SET(ok 1) +ELSE() + +# Who else? Anyone? +# C'mon, show your creativity, be different! ifdef's are fun, aren't they? + +ENDIF() +ENDIF() ENDIF() -IF(HAVE_PEERCRED OR HAVE_SOCKPEERCRED) +IF(ok) MYSQL_ADD_PLUGIN(auth_socket auth_socket.c MODULE_ONLY) ENDIF() + diff -Nru mariadb-5.5-5.5.39/plugin/handler_socket/handlersocket/database.cpp mariadb-5.5-5.5.40/plugin/handler_socket/handlersocket/database.cpp --- mariadb-5.5-5.5.39/plugin/handler_socket/handlersocket/database.cpp 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/plugin/handler_socket/handlersocket/database.cpp 2014-10-08 13:19:51.000000000 +0000 @@ -6,6 +6,8 @@ * See COPYRIGHT.txt for details. */ +#include + #include #include #include diff -Nru mariadb-5.5-5.5.39/plugin/handler_socket/handlersocket/handlersocket.cpp mariadb-5.5-5.5.40/plugin/handler_socket/handlersocket/handlersocket.cpp --- mariadb-5.5-5.5.39/plugin/handler_socket/handlersocket/handlersocket.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/plugin/handler_socket/handlersocket/handlersocket.cpp 2014-10-08 13:19:51.000000000 +0000 @@ -6,6 +6,8 @@ * See COPYRIGHT.txt for details. */ +#include + #include #include #include diff -Nru mariadb-5.5-5.5.39/plugin/handler_socket/handlersocket/hstcpsvr.cpp mariadb-5.5-5.5.40/plugin/handler_socket/handlersocket/hstcpsvr.cpp --- mariadb-5.5-5.5.39/plugin/handler_socket/handlersocket/hstcpsvr.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/plugin/handler_socket/handlersocket/hstcpsvr.cpp 2014-10-08 13:19:51.000000000 +0000 @@ -6,6 +6,8 @@ * See COPYRIGHT.txt for details. */ +#include + #include #include #include diff -Nru mariadb-5.5-5.5.39/plugin/handler_socket/libhsclient/hstcpcli.cpp mariadb-5.5-5.5.40/plugin/handler_socket/libhsclient/hstcpcli.cpp --- mariadb-5.5-5.5.39/plugin/handler_socket/libhsclient/hstcpcli.cpp 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/plugin/handler_socket/libhsclient/hstcpcli.cpp 2014-10-08 13:19:51.000000000 +0000 @@ -6,6 +6,8 @@ * See COPYRIGHT.txt for details. */ +#include + #include #include "hstcpcli.hpp" diff -Nru mariadb-5.5-5.5.39/plugin/handler_socket/libhsclient/socket.cpp mariadb-5.5-5.5.40/plugin/handler_socket/libhsclient/socket.cpp --- mariadb-5.5-5.5.39/plugin/handler_socket/libhsclient/socket.cpp 2014-08-03 12:00:44.000000000 +0000 +++ mariadb-5.5-5.5.40/plugin/handler_socket/libhsclient/socket.cpp 2014-10-08 13:19:51.000000000 +0000 @@ -6,6 +6,8 @@ * See COPYRIGHT.txt for details. */ +#include + #include #include #include diff -Nru mariadb-5.5-5.5.39/plugin/server_audit/server_audit.c mariadb-5.5-5.5.40/plugin/server_audit/server_audit.c --- mariadb-5.5-5.5.39/plugin/server_audit/server_audit.c 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/plugin/server_audit/server_audit.c 2014-10-08 13:19:51.000000000 +0000 @@ -17,6 +17,8 @@ #define PLUGIN_VERSION 0x101 #define PLUGIN_STR_VERSION "1.1.7" +#include + #include #include #include diff -Nru mariadb-5.5-5.5.39/scripts/mysqlhotcopy.sh mariadb-5.5-5.5.40/scripts/mysqlhotcopy.sh --- mariadb-5.5-5.5.39/scripts/mysqlhotcopy.sh 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/scripts/mysqlhotcopy.sh 2014-10-08 13:19:52.000000000 +0000 @@ -56,6 +56,9 @@ # Documentation continued at end of file +# fix CORE::GLOBAL::die to return a predictable exit code +BEGIN { *CORE::GLOBAL::die= sub { warn @_; exit 1; }; } + my $VERSION = "1.23"; my $opt_tmpdir = $ENV{TMPDIR} || "/tmp"; diff -Nru mariadb-5.5-5.5.39/scripts/mysql_system_tables_fix.sql mariadb-5.5-5.5.40/scripts/mysql_system_tables_fix.sql --- mariadb-5.5-5.5.39/scripts/mysql_system_tables_fix.sql 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/scripts/mysql_system_tables_fix.sql 2014-10-08 13:19:52.000000000 +0000 @@ -668,6 +668,23 @@ INSERT INTO proxies_priv SELECT * FROM tmp_proxies_priv WHERE @had_proxies_priv_table=0; DROP TABLE tmp_proxies_priv; +-- Checking for any duplicate hostname and username combination are exists. +-- If exits we will throw error. +DROP PROCEDURE IF EXISTS mysql.count_duplicate_host_names; +DELIMITER // +CREATE PROCEDURE mysql.count_duplicate_host_names() +BEGIN + SET @duplicate_hosts=(SELECT count(*) FROM mysql.user GROUP BY user, lower(host) HAVING count(*) > 1 LIMIT 1); + IF @duplicate_hosts > 1 THEN + SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = 'Multiple accounts exist for @user_name, @host_name that differ only in Host lettercase; remove all except one of them'; + END IF; +END // +DELIMITER ; +CALL mysql.count_duplicate_host_names(); +-- Get warnings (if any) +SHOW WARNINGS; +DROP PROCEDURE mysql.count_duplicate_host_names; + # Convering the host name to lower case for existing users UPDATE user SET host=LOWER( host ) WHERE LOWER( host ) <> host; diff -Nru mariadb-5.5-5.5.39/sql/CMakeLists.txt mariadb-5.5-5.5.40/sql/CMakeLists.txt --- mariadb-5.5-5.5.39/sql/CMakeLists.txt 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/CMakeLists.txt 2014-10-08 13:19:52.000000000 +0000 @@ -32,13 +32,6 @@ ADD_DEFINITIONS(-DMYSQL_SERVER -DHAVE_EVENT_SCHEDULER) -IF (CMAKE_SYSTEM_NAME MATCHES "Linux" OR - CMAKE_SYSTEM_NAME MATCHES "Windows" OR - CMAKE_SYSTEM_NAME MATCHES "SunOS" OR - HAVE_KQUEUE) - ADD_DEFINITIONS(-DHAVE_POOL_OF_THREADS) -ENDIF() - IF(SSL_DEFINES) ADD_DEFINITIONS(${SSL_DEFINES}) ENDIF() @@ -97,10 +90,16 @@ ${MYSYS_LIBWRAP_SOURCE} ) -IF(WIN32) - SET(SQL_SOURCE ${SQL_SOURCE} threadpool_win.cc) -ELSE() - SET(SQL_SOURCE ${SQL_SOURCE} threadpool_unix.cc) +IF (CMAKE_SYSTEM_NAME MATCHES "Linux" OR + CMAKE_SYSTEM_NAME MATCHES "Windows" OR + CMAKE_SYSTEM_NAME MATCHES "SunOS" OR + HAVE_KQUEUE) + ADD_DEFINITIONS(-DHAVE_POOL_OF_THREADS) + IF(WIN32) + SET(SQL_SOURCE ${SQL_SOURCE} threadpool_win.cc) + ELSE() + SET(SQL_SOURCE ${SQL_SOURCE} threadpool_unix.cc) + ENDIF() ENDIF() MYSQL_ADD_PLUGIN(partition ha_partition.cc STORAGE_ENGINE DEFAULT STATIC_ONLY diff -Nru mariadb-5.5-5.5.39/sql/event_scheduler.cc mariadb-5.5-5.5.40/sql/event_scheduler.cc --- mariadb-5.5-5.5.39/sql/event_scheduler.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/event_scheduler.cc 2014-10-08 13:19:52.000000000 +0000 @@ -355,14 +355,7 @@ mysql_mutex_init(key_event_scheduler_LOCK_scheduler_state, &LOCK_scheduler_state, MY_MUTEX_INIT_FAST); mysql_cond_init(key_event_scheduler_COND_state, &COND_state, NULL); - -#ifdef SAFE_MUTEX - /* Ensure right mutex order */ - mysql_mutex_lock(&LOCK_scheduler_state); - mysql_mutex_lock(&LOCK_global_system_variables); - mysql_mutex_unlock(&LOCK_global_system_variables); - mysql_mutex_unlock(&LOCK_scheduler_state); -#endif + mysql_mutex_record_order(&LOCK_scheduler_state, &LOCK_global_system_variables); } diff -Nru mariadb-5.5-5.5.39/sql/handler.cc mariadb-5.5-5.5.40/sql/handler.cc --- mariadb-5.5-5.5.39/sql/handler.cc 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/handler.cc 2014-10-08 13:19:52.000000000 +0000 @@ -2819,15 +2819,10 @@ if (error) { if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) - { - /* No entry found, start with 1. */ - nr= 1; - } + /* No entry found, that's fine */; else - { - DBUG_ASSERT(0); - nr= ULONGLONG_MAX; - } + print_error(error, MYF(0)); + nr= 1; } else nr= ((ulonglong) table->next_number_field-> diff -Nru mariadb-5.5-5.5.39/sql/ha_partition.cc mariadb-5.5-5.5.40/sql/ha_partition.cc --- mariadb-5.5-5.5.39/sql/ha_partition.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/ha_partition.cc 2014-10-08 13:19:52.000000000 +0000 @@ -7854,8 +7854,7 @@ ulonglong first_value_part, max_first_value; handler **file= m_file; first_value_part= max_first_value= *first_value; - /* Must lock and find highest value among all partitions. */ - lock_auto_increment(); + /* Must find highest value among all partitions. */ do { /* Only nb_desired_values = 1 makes sense */ @@ -7866,7 +7865,6 @@ *first_value= first_value_part; /* log that the error was between table/partition handler */ sql_print_error("Partition failed to reserve auto_increment value"); - unlock_auto_increment(); DBUG_VOID_RETURN; } DBUG_PRINT("info", ("first_value_part: %lu", (ulong) first_value_part)); @@ -7874,7 +7872,6 @@ } while (*(++file)); *first_value= max_first_value; *nb_reserved_values= 1; - unlock_auto_increment(); } else { diff -Nru mariadb-5.5-5.5.39/sql/item.cc mariadb-5.5-5.5.40/sql/item.cc --- mariadb-5.5-5.5.39/sql/item.cc 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/item.cc 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,6 @@ /* - Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2010, 2013, Monty Program Ab. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. + Copyright (c) 2010, 2014, Monty Program Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -5638,6 +5638,18 @@ } +/** + Verifies that the input string is well-formed according to its character set. + @param send_error If true, call my_error if string is not well-formed. + + Will truncate input string if it is not well-formed. + + @return + If well-formed: input string. + If not well-formed: + if strict mode: NULL pointer and we set this Item's value to NULL + if not strict mode: input string truncated up to last good character + */ String *Item::check_well_formed_result(String *str, bool send_error) { /* Check whether we got a well-formed string */ @@ -9015,17 +9027,11 @@ int Item_cache_temporal::save_in_field(Field *field, bool no_conversions) { - int error; - if (!has_value()) + MYSQL_TIME ltime; + if (get_date(<ime, 0)) return set_field_to_null_with_conversions(field, no_conversions); - field->set_notnull(); - - MYSQL_TIME ltime; - unpack_time(value, <ime); - ltime.time_type= mysql_type_to_time_type(field_type()); - error= field->store_time_dec(<ime, decimals); - + int error= field->store_time_dec(<ime, decimals); return error ? error : field->table->in_use->is_error() ? 1 : 0; } diff -Nru mariadb-5.5-5.5.39/sql/item_sum.cc mariadb-5.5-5.5.40/sql/item_sum.cc --- mariadb-5.5-5.5.39/sql/item_sum.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/item_sum.cc 2014-10-08 13:19:51.000000000 +0000 @@ -3181,19 +3181,13 @@ /* We need to allocate: args - arg_count_field+arg_count_order - (for possible order items in temporare tables) + (for possible order items in temporary tables) order - arg_count_order */ - if (!(args= (Item**) sql_alloc(sizeof(Item*) * arg_count + + if (!(args= (Item**) sql_alloc(sizeof(Item*) * arg_count * 2 + sizeof(ORDER*)*arg_count_order))) return; - if (!(orig_args= (Item **) sql_alloc(sizeof(Item *) * arg_count))) - { - args= NULL; - return; - } - order= (ORDER**)(args + arg_count); /* fill args items of show and sort */ @@ -3214,6 +3208,9 @@ order_item->item= arg_ptr++; } } + + /* orig_args is only used for print() */ + orig_args= (Item**) (order + arg_count_order); memcpy(orig_args, args, sizeof(Item*) * arg_count); } @@ -3297,6 +3294,7 @@ } DBUG_ASSERT(tree == 0); } + DBUG_VOID_RETURN; } diff -Nru mariadb-5.5-5.5.39/sql/log.cc mariadb-5.5-5.5.40/sql/log.cc --- mariadb-5.5-5.5.39/sql/log.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/log.cc 2014-10-08 13:19:52.000000000 +0000 @@ -2357,6 +2357,7 @@ const char *new_name, enum cache_type io_cache_type_arg) { char buff[FN_REFLEN]; + MY_STAT f_stat; File file= -1; int open_flags= O_CREAT | O_BINARY; DBUG_ENTER("MYSQL_LOG::open"); @@ -2374,6 +2375,10 @@ log_type_arg, io_cache_type_arg)) goto err; + /* File is regular writable file */ + if (my_stat(log_file_name, &f_stat, MYF(0)) && !MY_S_ISREG(f_stat.st_mode)) + goto err; + if (io_cache_type == SEQ_READ_APPEND) open_flags |= O_RDWR | O_APPEND; else diff -Nru mariadb-5.5-5.5.39/sql/log_event.cc mariadb-5.5-5.5.40/sql/log_event.cc --- mariadb-5.5-5.5.39/sql/log_event.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/log_event.cc 2014-10-08 13:19:51.000000000 +0000 @@ -3649,7 +3649,7 @@ if ((error= rows_event_stmt_cleanup(const_cast(rli), thd))) { const_cast(rli)->report(ERROR_LEVEL, error, - "Error in cleaning up after an event preceeding the commit; " + "Error in cleaning up after an event preceding the commit; " "the group log file/position: %s %s", const_cast(rli)->group_master_log_name, llstr(const_cast(rli)->group_master_log_pos, diff -Nru mariadb-5.5-5.5.39/sql/mysqld.cc mariadb-5.5-5.5.40/sql/mysqld.cc --- mariadb-5.5-5.5.39/sql/mysqld.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/mysqld.cc 2014-10-08 13:19:51.000000000 +0000 @@ -363,7 +363,8 @@ /* Global variables */ bool opt_bin_log, opt_bin_log_used=0, opt_ignore_builtin_innodb= 0; -my_bool opt_log, opt_slow_log, debug_assert_if_crashed_table= 0, opt_help= 0, opt_abort; +my_bool opt_log, opt_slow_log, debug_assert_if_crashed_table= 0, opt_help= 0; +static my_bool opt_abort; ulonglong log_output_options; my_bool opt_userstat_running; my_bool opt_log_queries_not_using_indexes= 0; @@ -2948,9 +2949,6 @@ sa.sa_flags = 0; sa.sa_handler = print_signal_warning; sigaction(SIGHUP, &sa, (struct sigaction*) 0); -#ifdef SIGTSTP - sigaddset(&set,SIGTSTP); -#endif if (thd_lib_detected != THD_LIB_LT) sigaddset(&set,THR_SERVER_ALARM); if (test_flags & TEST_SIGINT) @@ -2960,7 +2958,12 @@ sigdelset(&set, SIGINT); } else + { sigaddset(&set,SIGINT); +#ifdef SIGTSTP + sigaddset(&set,SIGTSTP); +#endif + } sigprocmask(SIG_SETMASK,&set,NULL); pthread_sigmask(SIG_SETMASK,&set,NULL); @@ -7585,6 +7588,9 @@ test_flags= argument ? (uint) atoi(argument) : 0; opt_endinfo=1; break; + case OPT_THREAD_CONCURRENCY: + WARN_DEPRECATED_NO_REPLACEMENT(NULL, "THREAD_CONCURRENCY"); + break; case (int) OPT_ISAM_LOG: opt_myisam_log=1; break; diff -Nru mariadb-5.5-5.5.39/sql/mysqld.h mariadb-5.5-5.5.40/sql/mysqld.h --- mariadb-5.5-5.5.39/sql/mysqld.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/mysqld.h 2014-10-08 13:19:52.000000000 +0000 @@ -406,6 +406,7 @@ OPT_SSL_CERT, OPT_SSL_CIPHER, OPT_SSL_KEY, + OPT_THREAD_CONCURRENCY, OPT_UPDATE_LOG, OPT_WANT_CORE, OPT_which_is_always_the_last diff -Nru mariadb-5.5-5.5.39/sql/slave.h mariadb-5.5-5.5.40/sql/slave.h --- mariadb-5.5-5.5.39/sql/slave.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/slave.h 2014-10-08 13:19:52.000000000 +0000 @@ -230,22 +230,16 @@ bool net_request_file(NET* net, const char* fname); extern bool volatile abort_loop; -extern Master_info main_mi, *active_mi; /* active_mi for multi-master */ -extern LIST master_list; +extern Master_info *active_mi; /* active_mi for multi-master */ extern my_bool replicate_same_server_id; extern int disconnect_slave_event_count, abort_slave_event_count ; /* the master variables are defaults read from my.cnf or command line */ -extern uint master_port, master_connect_retry, report_port; -extern char * master_user, *master_password, *master_host; +extern uint report_port; extern char *master_info_file, *report_user; extern char *report_host, *report_password; -extern my_bool master_ssl; -extern char *master_ssl_ca, *master_ssl_capath, *master_ssl_cert; -extern char *master_ssl_cipher, *master_ssl_key; - extern I_List threads; #else diff -Nru mariadb-5.5-5.5.39/sql/sp_head.cc mariadb-5.5-5.5.40/sql/sp_head.cc --- mariadb-5.5-5.5.39/sql/sp_head.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/sp_head.cc 2014-10-08 13:19:52.000000000 +0000 @@ -1224,6 +1224,8 @@ Item_change_list old_change_list; String old_packet; uint old_server_status; + const uint status_backup_mask= SERVER_STATUS_CURSOR_EXISTS | + SERVER_STATUS_LAST_ROW_SENT; Reprepare_observer *save_reprepare_observer= thd->m_reprepare_observer; Object_creation_ctx *saved_creation_ctx; Warning_info *saved_warning_info; @@ -1358,7 +1360,7 @@ It is probably safe to use same thd->convert_buff everywhere. */ old_packet.swap(thd->packet); - old_server_status= thd->server_status; + old_server_status= thd->server_status & status_backup_mask; /* Switch to per-instruction arena here. We can do it since we cleanup @@ -1488,7 +1490,7 @@ thd->spcont->pop_all_cursors(); // To avoid memory leaks after an error /* Restore all saved */ - thd->server_status= old_server_status; + thd->server_status= (thd->server_status & ~status_backup_mask) | old_server_status; old_packet.swap(thd->packet); DBUG_ASSERT(thd->change_list.is_empty()); old_change_list.move_elements_to(&thd->change_list); diff -Nru mariadb-5.5-5.5.39/sql/sql_acl.cc mariadb-5.5-5.5.40/sql/sql_acl.cc --- mariadb-5.5-5.5.39/sql/sql_acl.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/sql_acl.cc 2014-10-08 13:19:52.000000000 +0000 @@ -9224,9 +9224,9 @@ mpvio.auth_info.authenticated_as, TRUE); if (!acl_proxy_user) { + mysql_mutex_unlock(&acl_cache->lock); if (!thd->is_error()) login_failed_error(thd); - mysql_mutex_unlock(&acl_cache->lock); DBUG_RETURN(1); } acl_user= acl_proxy_user->copy(thd->mem_root); diff -Nru mariadb-5.5-5.5.39/sql/sql_cache.cc mariadb-5.5-5.5.40/sql/sql_cache.cc --- mariadb-5.5-5.5.39/sql/sql_cache.cc 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/sql_cache.cc 2014-10-08 13:19:51.000000000 +0000 @@ -3280,7 +3280,7 @@ There are not callback function for for VIEWs */ if (!insert_table(key_length, key, (*block_table), - tables_used->view_db.length + 1, + tables_used->view_db.length, HA_CACHE_TBL_NONTRANSACT, 0, 0, TRUE)) DBUG_RETURN(0); /* diff -Nru mariadb-5.5-5.5.39/sql/sql_join_cache.cc mariadb-5.5-5.5.40/sql/sql_join_cache.cc --- mariadb-5.5-5.5.39/sql/sql_join_cache.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/sql_join_cache.cc 2014-10-08 13:19:52.000000000 +0000 @@ -2088,7 +2088,7 @@ goto finish; if (outer_join_first_inner) { - if (next_cache) + if (next_cache && join_tab != join_tab->last_inner) { /* Ensure that all matches for outer records from join buffer are to be diff -Nru mariadb-5.5-5.5.39/sql/sql_priv.h mariadb-5.5-5.5.40/sql/sql_priv.h --- mariadb-5.5-5.5.39/sql/sql_priv.h 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/sql_priv.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,5 @@ -/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. - Copyright (c) 2010-2011 Monty Program Ab +/* Copyright (c) 2000, 2014, Oracle and/or its affiliates. + Copyright (c) 2010, 2014, Monty Program Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,6 +60,33 @@ (Old), (New)); \ } while(0) + +/* + Generates a warning that a feature is deprecated and there is no replacement. + + Using it as + + WARN_DEPRECATED_NO_REPLACEMENT(thd, "BAD"); + + Will result in a warning + + "'BAD' is deprecated and will be removed in a future release." + + Note that in macro arguments BAD is not quoted. +*/ + +#define WARN_DEPRECATED_NO_REPLACEMENT(Thd,Old) \ + do { \ + if (((THD *) Thd) != NULL) \ + push_warning_printf(((THD *) Thd), MYSQL_ERROR::WARN_LEVEL_WARN, \ + ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT, \ + ER(ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT), \ + (Old)); \ + else \ + sql_print_warning("'%s' is deprecated and will be removed " \ + "in a future release.", (Old)); \ + } while(0) + /*************************************************************************/ #endif diff -Nru mariadb-5.5-5.5.39/sql/sql_select.cc mariadb-5.5-5.5.40/sql/sql_select.cc --- mariadb-5.5-5.5.39/sql/sql_select.cc 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/sql_select.cc 2014-10-08 13:19:52.000000000 +0000 @@ -9202,6 +9202,25 @@ } +static +uint get_next_field_for_derived_key_simple(uchar *arg) +{ + KEYUSE *keyuse= *(KEYUSE **) arg; + if (!keyuse) + return (uint) (-1); + TABLE *table= keyuse->table; + uint key= keyuse->key; + uint fldno= keyuse->keypart; + for ( ; + keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno; + keyuse++) + ; + if (keyuse->key != key) + keyuse= 0; + *((KEYUSE **) arg)= keyuse; + return fldno; +} + static bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys) { @@ -9232,12 +9251,28 @@ } else { - if (table->add_tmp_key(table->s->keys, parts, - get_next_field_for_derived_key, - (uchar *) &first_keyuse, - FALSE)) - return TRUE; - table->reginfo.join_tab->keys.set_bit(table->s->keys); + KEYUSE *save_first_keyuse= first_keyuse; + if (table->check_tmp_key(table->s->keys, parts, + get_next_field_for_derived_key_simple, + (uchar *) &first_keyuse)) + + { + first_keyuse= save_first_keyuse; + if (table->add_tmp_key(table->s->keys, parts, + get_next_field_for_derived_key, + (uchar *) &first_keyuse, + FALSE)) + return TRUE; + table->reginfo.join_tab->keys.set_bit(table->s->keys); + } + else + { + /* Mark keyuses for this key to be excluded */ + for (KEYUSE *curr=save_first_keyuse; curr < first_keyuse; curr++) + { + curr->key= MAX_KEY; + } + } first_keyuse= keyuse; key_count++; parts= 0; @@ -20390,7 +20425,7 @@ order_item->full_name(), thd->where); return TRUE; } - order->item= ref_pointer_array + count - 1; + thd->change_item_tree((Item**)&order->item, (Item*)(ref_pointer_array + count - 1)); order->in_field_list= 1; order->counter= count; order->counter_used= 1; @@ -20423,7 +20458,7 @@ order_item_type == Item::REF_ITEM) { from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables, - NULL, &view_ref, IGNORE_ERRORS, TRUE, + NULL, &view_ref, IGNORE_ERRORS, FALSE, FALSE); if (!from_field) from_field= (Field*) not_found_field; diff -Nru mariadb-5.5-5.5.39/sql/sql_show.cc mariadb-5.5-5.5.40/sql/sql_show.cc --- mariadb-5.5-5.5.39/sql/sql_show.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/sql_show.cc 2014-10-08 13:19:52.000000000 +0000 @@ -2178,77 +2178,77 @@ Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) DBUG_VOID_RETURN; + if (thd->killed) + DBUG_VOID_RETURN; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list - if (!thd->killed) + I_List_iterator it(threads); + THD *tmp; + while ((tmp=it++)) { - I_List_iterator it(threads); - THD *tmp; - while ((tmp=it++)) - { - Security_context *tmp_sctx= tmp->security_ctx; - struct st_my_thread_var *mysys_var; - if ((tmp->vio_ok() || tmp->system_thread) && - (!user || (tmp_sctx->user && !strcmp(tmp_sctx->user, user)))) + Security_context *tmp_sctx= tmp->security_ctx; + struct st_my_thread_var *mysys_var; + if ((tmp->vio_ok() || tmp->system_thread) && + (!user || (tmp_sctx->user && !strcmp(tmp_sctx->user, user)))) + { + thread_info *thd_info= new thread_info; + + thd_info->thread_id=tmp->thread_id; + thd_info->user= thd->strdup(tmp_sctx->user ? tmp_sctx->user : + (tmp->system_thread ? + "system user" : "unauthenticated user")); + if (tmp->peer_port && (tmp_sctx->host || tmp_sctx->ip) && + thd->security_ctx->host_or_ip[0]) { - thread_info *thd_info= new thread_info; - - thd_info->thread_id=tmp->thread_id; - thd_info->user= thd->strdup(tmp_sctx->user ? tmp_sctx->user : - (tmp->system_thread ? - "system user" : "unauthenticated user")); - if (tmp->peer_port && (tmp_sctx->host || tmp_sctx->ip) && - thd->security_ctx->host_or_ip[0]) - { - if ((thd_info->host= (char*) thd->alloc(LIST_PROCESS_HOST_LEN+1))) - my_snprintf((char *) thd_info->host, LIST_PROCESS_HOST_LEN, - "%s:%u", tmp_sctx->host_or_ip, tmp->peer_port); - } - else - thd_info->host= thd->strdup(tmp_sctx->host_or_ip[0] ? - tmp_sctx->host_or_ip : - tmp_sctx->host ? tmp_sctx->host : ""); - thd_info->command=(int) tmp->command; - mysql_mutex_lock(&tmp->LOCK_thd_data); - if ((thd_info->db= tmp->db)) // Safe test - thd_info->db= thd->strdup(thd_info->db); - if ((mysys_var= tmp->mysys_var)) - mysql_mutex_lock(&mysys_var->mutex); - thd_info->proc_info= (char*) (tmp->killed >= KILL_QUERY ? - "Killed" : 0); - thd_info->state_info= thread_state_info(tmp); - if (mysys_var) - mysql_mutex_unlock(&mysys_var->mutex); + if ((thd_info->host= (char*) thd->alloc(LIST_PROCESS_HOST_LEN+1))) + my_snprintf((char *) thd_info->host, LIST_PROCESS_HOST_LEN, + "%s:%u", tmp_sctx->host_or_ip, tmp->peer_port); + } + else + thd_info->host= thd->strdup(tmp_sctx->host_or_ip[0] ? + tmp_sctx->host_or_ip : + tmp_sctx->host ? tmp_sctx->host : ""); + thd_info->command=(int) tmp->command; + mysql_mutex_lock(&tmp->LOCK_thd_data); + if ((thd_info->db= tmp->db)) // Safe test + thd_info->db= thd->strdup(thd_info->db); + if ((mysys_var= tmp->mysys_var)) + mysql_mutex_lock(&mysys_var->mutex); + thd_info->proc_info= (char*) (tmp->killed >= KILL_QUERY ? + "Killed" : 0); + thd_info->state_info= thread_state_info(tmp); + if (mysys_var) + mysql_mutex_unlock(&mysys_var->mutex); - /* Lock THD mutex that protects its data when looking at it. */ - if (tmp->query()) - { - uint length= min(max_query_length, tmp->query_length()); - char *q= thd->strmake(tmp->query(),length); - /* Safety: in case strmake failed, we set length to 0. */ - thd_info->query_string= - CSET_STRING(q, q ? length : 0, tmp->query_charset()); - } + /* Lock THD mutex that protects its data when looking at it. */ + if (tmp->query()) + { + uint length= min(max_query_length, tmp->query_length()); + char *q= thd->strmake(tmp->query(),length); + /* Safety: in case strmake failed, we set length to 0. */ + thd_info->query_string= + CSET_STRING(q, q ? length : 0, tmp->query_charset()); + } - /* - Progress report. We need to do this under a lock to ensure that all - is from the same stage. - */ - if (tmp->progress.max_counter) - { - uint max_stage= max(tmp->progress.max_stage, 1); - thd_info->progress= (((tmp->progress.stage / (double) max_stage) + - ((tmp->progress.counter / - (double) tmp->progress.max_counter) / - (double) max_stage)) * - 100.0); - set_if_smaller(thd_info->progress, 100); - } - else - thd_info->progress= 0.0; - thd_info->start_time= tmp->start_time; - mysql_mutex_unlock(&tmp->LOCK_thd_data); - thread_infos.append(thd_info); + /* + Progress report. We need to do this under a lock to ensure that all + is from the same stage. + */ + if (tmp->progress.max_counter) + { + uint max_stage= max(tmp->progress.max_stage, 1); + thd_info->progress= (((tmp->progress.stage / (double) max_stage) + + ((tmp->progress.counter / + (double) tmp->progress.max_counter) / + (double) max_stage)) * + 100.0); + set_if_smaller(thd_info->progress, 100); } + else + thd_info->progress= 0.0; + thd_info->start_time= tmp->start_time; + mysql_mutex_unlock(&tmp->LOCK_thd_data); + thread_infos.append(thd_info); } } mysql_mutex_unlock(&LOCK_thread_count); @@ -2648,12 +2648,11 @@ char *value=var->value; const char *pos, *end; // We assign a lot of const's - mysql_mutex_lock(&LOCK_global_system_variables); - if (show_type == SHOW_SYS) { sys_var *var= ((sys_var *) value); show_type= var->show_type(); + mysql_mutex_lock(&LOCK_global_system_variables); value= (char*) var->value_ptr(thd, value_type, &null_lex_str); charset= var->charset(thd); } @@ -2754,7 +2753,8 @@ thd->count_cuted_fields= CHECK_FIELD_IGNORE; table->field[1]->set_notnull(); - mysql_mutex_unlock(&LOCK_global_system_variables); + if (var->type == SHOW_SYS) + mysql_mutex_unlock(&LOCK_global_system_variables); if (schema_table_store_record(thd, table)) { @@ -6934,7 +6934,7 @@ bool upper_case_names= (schema_table_idx != SCH_VARIABLES); bool sorted_vars= (schema_table_idx == SCH_VARIABLES); - if (lex->option_type == OPT_GLOBAL || + if ((sorted_vars && lex->option_type == OPT_GLOBAL) || schema_table_idx == SCH_GLOBAL_VARIABLES) option_type= OPT_GLOBAL; diff -Nru mariadb-5.5-5.5.39/sql/sys_vars.cc mariadb-5.5-5.5.40/sql/sys_vars.cc --- mariadb-5.5-5.5.39/sql/sys_vars.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/sys_vars.cc 2014-10-08 13:19:52.000000000 +0000 @@ -1772,7 +1772,8 @@ "the desired number of threads that should be run at the same time." "This variable has no effect, and is deprecated. " "It will be removed in a future release.", - READ_ONLY GLOBAL_VAR(concurrency), CMD_LINE(REQUIRED_ARG), + READ_ONLY GLOBAL_VAR(concurrency), + CMD_LINE(REQUIRED_ARG, OPT_THREAD_CONCURRENCY), VALID_RANGE(1, 512), DEFAULT(DEFAULT_CONCURRENCY), BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0), DEPRECATED("")); diff -Nru mariadb-5.5-5.5.39/sql/table.cc mariadb-5.5-5.5.40/sql/table.cc --- mariadb-5.5-5.5.39/sql/table.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/table.cc 2014-10-08 13:19:52.000000000 +0000 @@ -5996,6 +5996,52 @@ /** @brief + Check validity of a possible key for the derived table + + @param key the number of the key + @param key_parts number of components of the key + @param next_field_no the call-back function that returns the number of + the field used as the next component of the key + @param arg the argument for the above function + + @details + The function checks whether a possible key satisfies the constraints + imposed on the keys of any temporary table. + + @return TRUE if the key is valid + @return FALSE otherwise +*/ + +bool TABLE::check_tmp_key(uint key, uint key_parts, + uint (*next_field_no) (uchar *), uchar *arg) +{ + Field **reg_field; + uint i; + uint key_len= 0; + + for (i= 0; i < key_parts; i++) + { + uint fld_idx= next_field_no(arg); + reg_field= field + fld_idx; + uint fld_store_len= (uint16) (*reg_field)->key_length(); + if ((*reg_field)->real_maybe_null()) + fld_store_len+= HA_KEY_NULL_LENGTH; + if ((*reg_field)->type() == MYSQL_TYPE_BLOB || + (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR || + (*reg_field)->type() == MYSQL_TYPE_GEOMETRY) + fld_store_len+= HA_KEY_BLOB_LENGTH; + key_len+= fld_store_len; + } + /* + We use MI_MAX_KEY_LENGTH (myisam's default) below because it is + smaller than MAX_KEY_LENGTH (heap's default) and it's unknown whether + myisam or heap will be used for the temporary table. + */ + return key_len <= MI_MAX_KEY_LENGTH; +} + +/** + @brief Add one key to a temporary table @param key the number of the key @@ -6025,6 +6071,7 @@ KEY* keyinfo; Field **reg_field; uint i; + bool key_start= TRUE; KEY_PART_INFO* key_part_info= (KEY_PART_INFO*) alloc_root(&mem_root, sizeof(KEY_PART_INFO)*key_parts); diff -Nru mariadb-5.5-5.5.39/sql/table.h mariadb-5.5-5.5.40/sql/table.h --- mariadb-5.5-5.5.39/sql/table.h 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/sql/table.h 2014-10-08 13:19:52.000000000 +0000 @@ -1264,6 +1264,8 @@ { return !db_stat || m_needs_reopen; } bool alloc_keys(uint key_count); + bool check_tmp_key(uint key, uint key_parts, + uint (*next_field_no) (uchar *), uchar *arg); bool add_tmp_key(uint key, uint key_parts, uint (*next_field_no) (uchar *), uchar *arg, bool unique); diff -Nru mariadb-5.5-5.5.39/sql-common/client.c mariadb-5.5-5.5.40/sql-common/client.c --- mariadb-5.5-5.5.39/sql-common/client.c 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/sql-common/client.c 2014-10-08 13:19:52.000000000 +0000 @@ -1906,6 +1906,12 @@ DBUG_RETURN(1); } + if (X509_V_OK != SSL_get_verify_result(ssl)) + { + *errptr= "Failed to verify the server certificate"; + X509_free(server_cert); + DBUG_RETURN(1); + } /* We already know that the certificate exchanged was valid; the SSL library handled that. Now we need to verify that the contents of the certificate diff -Nru mariadb-5.5-5.5.39/storage/example/ha_example.cc mariadb-5.5-5.5.40/storage/example/ha_example.cc --- mariadb-5.5-5.5.39/storage/example/ha_example.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/example/ha_example.cc 2014-10-08 13:19:51.000000000 +0000 @@ -98,6 +98,7 @@ #pragma implementation // gcc: Class implementation #endif +#include #include #include "ha_example.h" #include "sql_class.h" diff -Nru mariadb-5.5-5.5.39/storage/federatedx/federatedx_io.cc mariadb-5.5-5.5.40/storage/federatedx/federatedx_io.cc --- mariadb-5.5-5.5.39/storage/federatedx/federatedx_io.cc 2014-08-03 12:00:44.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/federatedx/federatedx_io.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,6 +28,7 @@ /*#define MYSQL_SERVER 1*/ +#include #include "sql_priv.h" #include diff -Nru mariadb-5.5-5.5.39/storage/federatedx/federatedx_io_mysql.cc mariadb-5.5-5.5.40/storage/federatedx/federatedx_io_mysql.cc --- mariadb-5.5-5.5.39/storage/federatedx/federatedx_io_mysql.cc 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/federatedx/federatedx_io_mysql.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,6 +28,7 @@ #define MYSQL_SERVER 1 +#include #include "sql_priv.h" #include diff -Nru mariadb-5.5-5.5.39/storage/federatedx/federatedx_io_null.cc mariadb-5.5-5.5.40/storage/federatedx/federatedx_io_null.cc --- mariadb-5.5-5.5.39/storage/federatedx/federatedx_io_null.cc 2014-08-03 12:00:44.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/federatedx/federatedx_io_null.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,6 +28,7 @@ /*#define MYSQL_SERVER 1*/ +#include #include "sql_priv.h" #include diff -Nru mariadb-5.5-5.5.39/storage/federatedx/federatedx_txn.cc mariadb-5.5-5.5.40/storage/federatedx/federatedx_txn.cc --- mariadb-5.5-5.5.39/storage/federatedx/federatedx_txn.cc 2014-08-03 12:00:44.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/federatedx/federatedx_txn.cc 2014-10-08 13:19:51.000000000 +0000 @@ -31,6 +31,7 @@ #endif #define MYSQL_SERVER 1 +#include #include "sql_priv.h" #include diff -Nru mariadb-5.5-5.5.39/storage/federatedx/ha_federatedx.cc mariadb-5.5-5.5.40/storage/federatedx/ha_federatedx.cc --- mariadb-5.5-5.5.39/storage/federatedx/ha_federatedx.cc 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/federatedx/ha_federatedx.cc 2014-10-08 13:19:51.000000000 +0000 @@ -312,6 +312,7 @@ #endif #define MYSQL_SERVER 1 +#include #include #include "ha_federatedx.h" #include "sql_servers.h" diff -Nru mariadb-5.5-5.5.39/storage/heap/hp_hash.c mariadb-5.5-5.5.40/storage/heap/hp_hash.c --- mariadb-5.5-5.5.39/storage/heap/hp_hash.c 2014-08-03 12:00:37.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/heap/hp_hash.c 2014-10-08 13:19:52.000000000 +0000 @@ -876,8 +876,13 @@ if (seg->null_bit) { if (!(*key++= (char) 1 - *old++)) + { + /* Add key pack length (2) to key for VARCHAR segments */ + if (seg->type == HA_KEYTYPE_VARTEXT1) + old+= 2; continue; } + } if (seg->flag & HA_SWAP_KEY) { uint length= seg->length; diff -Nru mariadb-5.5-5.5.39/storage/innobase/buf/buf0buf.c mariadb-5.5-5.5.40/storage/innobase/buf/buf0buf.c --- mariadb-5.5-5.5.39/storage/innobase/buf/buf0buf.c 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/buf/buf0buf.c 2014-10-08 13:19:52.000000000 +0000 @@ -3616,6 +3616,7 @@ " because of" " a corrupt database page.\n", stderr); + ut_error; } } diff -Nru mariadb-5.5-5.5.39/storage/innobase/CMakeLists.txt mariadb-5.5-5.5.40/storage/innobase/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/innobase/CMakeLists.txt 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/CMakeLists.txt 2014-10-08 13:19:51.000000000 +0000 @@ -85,12 +85,39 @@ }" HAVE_IB_GCC_ATOMIC_BUILTINS ) + CHECK_C_SOURCE_RUNS( + "#include + int main() + { + __sync_synchronize(); + return(0); + }" + HAVE_IB_GCC_SYNC_SYNCHRONISE + ) + CHECK_C_SOURCE_RUNS( + "#include + int main() + { + __atomic_thread_fence(__ATOMIC_ACQUIRE); + __atomic_thread_fence(__ATOMIC_RELEASE); + return(0); + }" + HAVE_IB_GCC_ATOMIC_THREAD_FENCE + ) ENDIF() IF(HAVE_IB_GCC_ATOMIC_BUILTINS) ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1) ENDIF() +IF(HAVE_IB_GCC_SYNC_SYNCHRONISE) + ADD_DEFINITIONS(-DHAVE_IB_GCC_SYNC_SYNCHRONISE=1) +ENDIF() + +IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) + ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1) +ENDIF() + # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not IF(NOT CMAKE_CROSSCOMPILING) CHECK_C_SOURCE_RUNS( @@ -169,10 +196,21 @@ return(0); } " HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) + CHECK_C_SOURCE_COMPILES( + "#include + int main() { + __machine_r_barrier(); + __machine_w_barrier(); + return(0); + }" + HAVE_IB_MACHINE_BARRIER_SOLARIS) ENDIF() IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1) ENDIF() + IF(HAVE_IB_MACHINE_BARRIER_SOLARIS) + ADD_DEFINITIONS(-DHAVE_IB_MACHINE_BARRIER_SOLARIS=1) + ENDIF() ENDIF() @@ -190,6 +228,7 @@ IF(MSVC) ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS) + ADD_DEFINITIONS(-DHAVE_WINDOWS_MM_FENCE) ENDIF() diff -Nru mariadb-5.5-5.5.39/storage/innobase/dict/dict0dict.c mariadb-5.5-5.5.40/storage/innobase/dict/dict0dict.c --- mariadb-5.5-5.5.39/storage/innobase/dict/dict0dict.c 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/dict/dict0dict.c 2014-10-08 13:19:52.000000000 +0000 @@ -1123,6 +1123,11 @@ /* The id will be changed. So remove old one */ rbt_delete(foreign->foreign_table->foreign_rbt, foreign->id); + if (foreign->referenced_table) { + rbt_delete(foreign->referenced_table->referenced_rbt, + foreign->id); + } + if (ut_strlen(foreign->foreign_table_name) < ut_strlen(table->name)) { /* Allocate a longer name buffer; @@ -1273,6 +1278,11 @@ rbt_insert(foreign->foreign_table->foreign_rbt, foreign->id, &foreign); + if (foreign->referenced_table) { + rbt_insert(foreign->referenced_table->referenced_rbt, + foreign->id, &foreign); + } + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); } @@ -5364,6 +5374,11 @@ ut_ad(space_id > 0); + if (dict_sys == NULL) { + /* This could happen when it's in redo processing. */ + return(NULL); + } + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); num_item = UT_LIST_GET_LEN(dict_sys->table_LRU); diff -Nru mariadb-5.5-5.5.39/storage/innobase/include/log0log.h mariadb-5.5-5.5.40/storage/innobase/include/log0log.h --- mariadb-5.5-5.5.39/storage/innobase/include/log0log.h 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/include/log0log.h 2014-10-08 13:19:51.000000000 +0000 @@ -154,6 +154,13 @@ ib_uint64_t log_get_lsn(void); /*=============*/ +/************************************************************//** +Gets the current lsn. +@return current lsn */ +UNIV_INLINE +lsn_t +log_get_lsn_nowait(void); +/*=============*/ /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. diff -Nru mariadb-5.5-5.5.39/storage/innobase/include/log0log.ic mariadb-5.5-5.5.40/storage/innobase/include/log0log.ic --- mariadb-5.5-5.5.39/storage/innobase/include/log0log.ic 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/include/log0log.ic 2014-10-08 13:19:51.000000000 +0000 @@ -411,6 +411,26 @@ return(lsn); } +/************************************************************//** +Gets the current lsn with a trylock +@return current lsn or 0 if false*/ +UNIV_INLINE +lsn_t +log_get_lsn_nowait(void) +/*=============*/ +{ + lsn_t lsn; + + if (mutex_enter_nowait(&(log_sys->mutex))) + return 0; + + lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + return(lsn); +} + /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. diff -Nru mariadb-5.5-5.5.39/storage/innobase/include/os0sync.h mariadb-5.5-5.5.40/storage/innobase/include/os0sync.h --- mariadb-5.5-5.5.39/storage/innobase/include/os0sync.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/include/os0sync.h 2014-10-08 13:19:51.000000000 +0000 @@ -310,6 +310,9 @@ # define os_atomic_test_and_set_byte(ptr, new_val) \ __sync_lock_test_and_set(ptr, (byte) new_val) +# define os_atomic_lock_release_byte(ptr) \ + __sync_lock_release(ptr) + #elif defined(HAVE_IB_SOLARIS_ATOMICS) #define HAVE_ATOMIC_BUILTINS @@ -363,6 +366,9 @@ # define os_atomic_test_and_set_byte(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) +# define os_atomic_lock_release_byte(ptr) \ + (void) atomic_swap_uchar(ptr, 0) + #elif defined(HAVE_WINDOWS_ATOMICS) #define HAVE_ATOMIC_BUILTINS @@ -416,6 +422,57 @@ "Mutexes and rw_locks use InnoDB's own implementation" #endif +/** barrier definitions for memory ordering */ +#ifdef HAVE_IB_GCC_ATOMIC_THREAD_FENCE +# define HAVE_MEMORY_BARRIER +# define os_rmb __atomic_thread_fence(__ATOMIC_ACQUIRE) +# define os_wmb __atomic_thread_fence(__ATOMIC_RELEASE) +#ifdef __powerpc__ +# define os_isync __asm __volatile ("isync":::"memory") +#else +#define os_isync do { } while(0) +#endif + +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "GCC builtin __atomic_thread_fence() is used for memory barrier" + +#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE) +# define HAVE_MEMORY_BARRIER +# define os_rmb __sync_synchronize() +# define os_wmb __sync_synchronize() +# define os_isync __sync_synchronize() +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "GCC builtin __sync_synchronize() is used for memory barrier" + +#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS) +# define HAVE_MEMORY_BARRIER +# include +# define os_rmb __machine_r_barrier() +# define os_wmb __machine_w_barrier() +# define os_isync os_rmb; os_wmb +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Soralis memory ordering functions are used for memory barrier" + +#elif defined(HAVE_WINDOWS_MM_FENCE) +# define HAVE_MEMORY_BARRIER +# include +# define os_rmb _mm_lfence() +# define os_wmb _mm_sfence() +# define os_isync os_rmb; os_wmb +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "_mm_lfence() and _mm_sfence() are used for memory barrier" + +# define os_atomic_lock_release_byte(ptr) \ + (void) InterlockedExchange(ptr, 0) + +#else +# define os_rmb do { } while(0) +# define os_wmb do { } while(0) +# define os_isync do { } while(0) +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Memory barrier is not used" +#endif + #ifndef UNIV_NONINL #include "os0sync.ic" #endif diff -Nru mariadb-5.5-5.5.39/storage/innobase/include/sync0rw.h mariadb-5.5-5.5.40/storage/innobase/include/sync0rw.h --- mariadb-5.5-5.5.39/storage/innobase/include/sync0rw.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/include/sync0rw.h 2014-10-08 13:19:51.000000000 +0000 @@ -70,14 +70,8 @@ #ifdef UNIV_SYNC_DEBUG /* The global mutex which protects debug info lists of all rw-locks. To modify the debug info list of an rw-lock, this mutex has to be - acquired in addition to the mutex protecting the lock. */ -extern mutex_t rw_lock_debug_mutex; -extern os_event_t rw_lock_debug_event; /*!< If deadlock detection does - not get immediately the mutex it - may wait for this event */ -extern ibool rw_lock_debug_waiters; /*!< This is set to TRUE, if - there may be waiters for the event */ +extern os_fast_mutex_t rw_lock_debug_mutex; #endif /* UNIV_SYNC_DEBUG */ /** number of spin waits on rw-latches, diff -Nru mariadb-5.5-5.5.39/storage/innobase/include/sync0rw.ic mariadb-5.5-5.5.40/storage/innobase/include/sync0rw.ic --- mariadb-5.5-5.5.39/storage/innobase/include/sync0rw.ic 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/include/sync0rw.ic 2014-10-08 13:19:51.000000000 +0000 @@ -200,14 +200,14 @@ ulint amount) /*!< in: amount to decrement */ { #ifdef INNODB_RW_LOCKS_USE_ATOMICS - lint local_lock_word = lock->lock_word; - while (local_lock_word > 0) { + lint local_lock_word; + os_rmb; + while ((local_lock_word= lock->lock_word) > 0) { if (os_compare_and_swap_lint(&lock->lock_word, local_lock_word, local_lock_word - amount)) { return(TRUE); } - local_lock_word = lock->lock_word; } return(FALSE); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ diff -Nru mariadb-5.5-5.5.39/storage/innobase/include/sync0sync.ic mariadb-5.5-5.5.40/storage/innobase/include/sync0sync.ic --- mariadb-5.5-5.5.39/storage/innobase/include/sync0sync.ic 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/include/sync0sync.ic 2014-10-08 13:19:51.000000000 +0000 @@ -92,6 +92,7 @@ ut_a(mutex->lock_word == 0); mutex->lock_word = 1; + os_wmb; } return((byte)ret); @@ -108,10 +109,7 @@ mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) - /* In theory __sync_lock_release should be used to release the lock. - Unfortunately, it does not work properly alone. The workaround is - that more conservative __sync_lock_test_and_set is used instead. */ - os_atomic_test_and_set_byte(&mutex->lock_word, 0); + os_atomic_lock_release_byte(&mutex->lock_word); #else mutex->lock_word = 0; @@ -147,6 +145,7 @@ ptr = &(mutex->waiters); + os_rmb; return(*ptr); /* Here we assume that the read of a single word from memory is atomic */ } @@ -181,6 +180,7 @@ to wake up possible hanging threads if they are missed in mutex_signal_object. */ + os_isync; if (mutex_get_waiters(mutex) != 0) { mutex_signal_object(mutex); diff -Nru mariadb-5.5-5.5.39/storage/innobase/row/row0ins.c mariadb-5.5-5.5.40/storage/innobase/row/row0ins.c --- mariadb-5.5-5.5.39/storage/innobase/row/row0ins.c 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/row/row0ins.c 2014-10-08 13:19:52.000000000 +0000 @@ -1702,7 +1702,7 @@ do { const rec_t* rec = btr_pcur_get_rec(&pcur); const buf_block_t* block = btr_pcur_get_block(&pcur); - ulint lock_type; + const ulint lock_type = LOCK_ORDINARY; if (page_rec_is_infimum(rec)) { @@ -1712,16 +1712,6 @@ offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - /* If the transaction isolation level is no stronger than - READ COMMITTED, then avoid gap locks. */ - if (!page_rec_is_supremum(rec) - && thr_get_trx(thr)->isolation_level - <= TRX_ISO_READ_COMMITTED) { - lock_type = LOCK_REC_NOT_GAP; - } else { - lock_type = LOCK_ORDINARY; - } - if (allow_duplicates) { /* If the SQL-query will update or replace diff -Nru mariadb-5.5-5.5.39/storage/innobase/srv/srv0srv.c mariadb-5.5-5.5.40/storage/innobase/srv/srv0srv.c --- mariadb-5.5-5.5.39/storage/innobase/srv/srv0srv.c 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/srv/srv0srv.c 2014-10-08 13:19:51.000000000 +0000 @@ -411,7 +411,12 @@ UNIV_INTERN ulong srv_replication_delay = 0; /*-------------------------------------------*/ +#ifdef HAVE_MEMORY_BARRIER +/* No idea to wait long with memory barriers */ +UNIV_INTERN ulong srv_n_spin_wait_rounds = 15; +#else UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; +#endif UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; UNIV_INTERN ulong srv_thread_sleep_delay = 10000; UNIV_INTERN ulong srv_spin_wait_delay = 6; @@ -2459,9 +2464,10 @@ /* Try to track a strange bug reported by Harald Fuchs and others, where the lsn seems to decrease at times */ - new_lsn = log_get_lsn(); + /* We have to use nowait to ensure we don't block */ + new_lsn= log_get_lsn_nowait(); - if (new_lsn < old_lsn) { + if (new_lsn && new_lsn < old_lsn) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Error: old log sequence number %llu" @@ -2473,7 +2479,8 @@ ut_ad(0); } - old_lsn = new_lsn; + if (new_lsn) + old_lsn = new_lsn; if (difftime(time(NULL), srv_last_monitor_time) > 60) { /* We referesh InnoDB Monitor values so that averages are diff -Nru mariadb-5.5-5.5.39/storage/innobase/sync/sync0arr.c mariadb-5.5-5.5.40/storage/innobase/sync/sync0arr.c --- mariadb-5.5-5.5.39/storage/innobase/sync/sync0arr.c 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/sync/sync0arr.c 2014-10-08 13:19:51.000000000 +0000 @@ -791,6 +791,7 @@ lock = cell->wait_object; + os_rmb; if (lock->lock_word > 0) { /* Either unlocked or only read locked. */ @@ -802,6 +803,7 @@ lock = cell->wait_object; /* lock_word == 0 means all readers have left */ + os_rmb; if (lock->lock_word == 0) { return(TRUE); @@ -810,6 +812,7 @@ lock = cell->wait_object; /* lock_word > 0 means no writer or reserved writer */ + os_rmb; if (lock->lock_word > 0) { return(TRUE); diff -Nru mariadb-5.5-5.5.39/storage/innobase/sync/sync0rw.c mariadb-5.5-5.5.40/storage/innobase/sync/sync0rw.c --- mariadb-5.5-5.5.39/storage/innobase/sync/sync0rw.c 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/sync/sync0rw.c 2014-10-08 13:19:51.000000000 +0000 @@ -40,6 +40,7 @@ #include "srv0srv.h" #include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */ #include "ha_prototypes.h" +#include "my_cpu.h" /* IMPLEMENTATION OF THE RW_LOCK @@ -179,18 +180,12 @@ To modify the debug info list of an rw-lock, this mutex has to be acquired in addition to the mutex protecting the lock. */ -UNIV_INTERN mutex_t rw_lock_debug_mutex; +UNIV_INTERN os_fast_mutex_t rw_lock_debug_mutex; # ifdef UNIV_PFS_MUTEX UNIV_INTERN mysql_pfs_key_t rw_lock_debug_mutex_key; # endif -/* If deadlock detection does not get immediately the mutex, -it may wait for this event */ -UNIV_INTERN os_event_t rw_lock_debug_event; -/* This is set to TRUE, if there may be waiters for the event */ -UNIV_INTERN ibool rw_lock_debug_waiters; - /******************************************************************//** Creates a debug info struct. */ static @@ -390,15 +385,19 @@ lock_loop: /* Spin waiting for the writer field to become free */ + os_rmb; + HMT_low(); while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } i++; + os_rmb; } - - if (i == SYNC_SPIN_ROUNDS) { + HMT_medium(); + if (lock->lock_word <= 0) + { os_thread_yield(); } @@ -498,16 +497,19 @@ ulint index; ulint i = 0; + os_rmb; ut_ad(lock->lock_word <= 0); - + HMT_low(); while (lock->lock_word < 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } if(i < SYNC_SPIN_ROUNDS) { i++; + os_rmb; continue; } + HMT_medium(); /* If there is still a reader, then go to sleep.*/ rw_x_spin_round_count += i; @@ -544,7 +546,9 @@ sync_array_free_cell(sync_primary_wait_array, index); } + HMT_low(); } + HMT_medium(); rw_x_spin_round_count += i; } @@ -582,6 +586,8 @@ file_name, line); } else { + if (!pass) + os_rmb; /* Decrement failed: relock or failed lock */ if (!pass && lock->recursive && os_thread_eq(lock->writer_thread, curr_thread)) { @@ -647,6 +653,8 @@ } /* Spin waiting for the lock_word to become free */ + os_rmb; + HMT_low(); while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { if (srv_spin_wait_delay) { @@ -655,7 +663,9 @@ } i++; + os_rmb; } + HMT_medium(); if (i == SYNC_SPIN_ROUNDS) { os_thread_yield(); } else { @@ -720,22 +730,7 @@ rw_lock_debug_mutex_enter(void) /*===========================*/ { -loop: - if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { - return; - } - - os_event_reset(rw_lock_debug_event); - - rw_lock_debug_waiters = TRUE; - - if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { - return; - } - - os_event_wait(rw_lock_debug_event); - - goto loop; + os_fast_mutex_lock(&rw_lock_debug_mutex); } /******************************************************************//** @@ -745,12 +740,7 @@ rw_lock_debug_mutex_exit(void) /*==========================*/ { - mutex_exit(&rw_lock_debug_mutex); - - if (rw_lock_debug_waiters) { - rw_lock_debug_waiters = FALSE; - os_event_set(rw_lock_debug_event); - } + os_fast_mutex_unlock(&rw_lock_debug_mutex); } /******************************************************************//** diff -Nru mariadb-5.5-5.5.39/storage/innobase/sync/sync0sync.c mariadb-5.5-5.5.40/storage/innobase/sync/sync0sync.c --- mariadb-5.5-5.5.39/storage/innobase/sync/sync0sync.c 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/innobase/sync/sync0sync.c 2014-10-08 13:19:51.000000000 +0000 @@ -47,6 +47,7 @@ # include "srv0start.h" /* srv_is_being_started */ #endif /* UNIV_SYNC_DEBUG */ #include "ha_prototypes.h" +#include "my_cpu.h" /* REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX @@ -473,6 +474,8 @@ ptr = &(mutex->waiters); + os_wmb; + *ptr = n; /* Here we assume that the write of a single word in memory is atomic */ } @@ -520,13 +523,15 @@ spin_loop: ut_d(mutex->count_spin_loop++); + HMT_low(); while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } - + os_rmb; // Ensure future reads sees new values i++; } + HMT_medium(); if (i == SYNC_SPIN_ROUNDS) { #ifdef UNIV_DEBUG @@ -1530,11 +1535,7 @@ SYNC_NO_ORDER_CHECK); #ifdef UNIV_SYNC_DEBUG - mutex_create(rw_lock_debug_mutex_key, &rw_lock_debug_mutex, - SYNC_NO_ORDER_CHECK); - - rw_lock_debug_event = os_event_create(NULL); - rw_lock_debug_waiters = FALSE; + os_fast_mutex_init(rw_lock_debug_mutex_key, &rw_lock_debug_mutex); #endif /* UNIV_SYNC_DEBUG */ } @@ -1602,6 +1603,7 @@ sync_order_checks_on = FALSE; sync_thread_level_arrays_free(); + os_fast_mutex_free(&rw_lock_debug_mutex); #endif /* UNIV_SYNC_DEBUG */ sync_initialized = FALSE; diff -Nru mariadb-5.5-5.5.39/storage/maria/ha_maria.cc mariadb-5.5-5.5.40/storage/maria/ha_maria.cc --- mariadb-5.5-5.5.39/storage/maria/ha_maria.cc 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/maria/ha_maria.cc 2014-10-08 13:19:51.000000000 +0000 @@ -21,6 +21,7 @@ #endif #define MYSQL_SERVER 1 +#include #include #include #include diff -Nru mariadb-5.5-5.5.39/storage/myisam/mi_rnext.c mariadb-5.5-5.5.40/storage/myisam/mi_rnext.c --- mariadb-5.5-5.5.39/storage/myisam/mi_rnext.c 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/myisam/mi_rnext.c 2014-10-08 13:19:51.000000000 +0000 @@ -66,7 +66,7 @@ Normally SQL layer would never request "search next" if "search first" failed. But HANDLER may do anything. - As mi_rnext() without preceeding mi_rkey()/mi_rfirst() + As mi_rnext() without preceding mi_rkey()/mi_rfirst() equals to mi_rfirst(), we must restore original state as if failing mi_rfirst() was not called. */ diff -Nru mariadb-5.5-5.5.39/storage/sphinx/snippets_udf.cc mariadb-5.5-5.5.40/storage/sphinx/snippets_udf.cc --- mariadb-5.5-5.5.39/storage/sphinx/snippets_udf.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/sphinx/snippets_udf.cc 2014-10-08 13:19:51.000000000 +0000 @@ -13,6 +13,7 @@ // did not, you can find it at http://www.gnu.org/ // +#include #include #include #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/CMakeLists.txt mariadb-5.5-5.5.40/storage/tokudb/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/tokudb/CMakeLists.txt 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/CMakeLists.txt 2014-10-08 13:19:51.000000000 +0000 @@ -13,13 +13,14 @@ RETURN() ENDIF() +IF(NOT LIBJEMALLOC) + MESSAGE(WARNING "TokuDB is enabled, but jemalloc is not. This configuration is not supported") +ENDIF() + ############################################ -SET(TOKUDB_VERSION "7.1.6") +SET(TOKUDB_VERSION "7.5.0") SET(TOKUDB_DEB_FILES "usr/lib/mysql/plugin/ha_tokudb.so\netc/mysql/conf.d/tokudb.cnf\nusr/bin/tokuftdump\nusr/share/doc/mariadb-server-5.5/README-TOKUDB\nusr/share/doc/mariadb-server-5.5/README.md" PARENT_SCOPE) SET(USE_BDB OFF CACHE BOOL "") -SET(USE_VALGRIND OFF CACHE BOOL "") -SET(BUILD_TESTING OFF CACHE BOOL "") -SET(TOKU_DEBUG_PARANOID OFF CACHE BOOL "") MARK_AS_ADVANCED(BUILDNAME) MARK_AS_ADVANCED(BUILD_TESTING) MARK_AS_ADVANCED(CMAKE_TOKUDB_REVISION) @@ -32,6 +33,10 @@ MARK_AS_ADVANCED(XZ_SOURCE_DIR) ############################################ +SET(BUILD_TESTING OFF CACHE BOOL "") +SET(USE_VALGRIND OFF CACHE BOOL "") +SET(TOKU_DEBUG_PARANOID OFF CACHE BOOL "") + IF(NOT DEFINED TOKUDB_VERSION) IF(DEFINED ENV{TOKUDB_VERSION}) SET(TOKUDB_VERSION $ENV{TOKUDB_VERSION}) @@ -49,6 +54,25 @@ ADD_DEFINITIONS("-DTOKUDB_CHECK_JEMALLOC=${TOKUDB_CHECK_JEMALLOC}") ENDIF() +## adds a compiler flag if the compiler supports it +include(CheckCCompilerFlag) +include(CheckCXXCompilerFlag) + +macro(set_cflags_if_supported) + foreach(flag ${ARGN}) + check_c_compiler_flag(${flag} HAVE_C_${flag}) + if (HAVE_C_${flag}) + set(CMAKE_C_FLAGS "${flag} ${CMAKE_C_FLAGS}") + endif () + check_cxx_compiler_flag(${flag} HAVE_CXX_${flag}) + if (HAVE_CXX_${flag}) + set(CMAKE_CXX_FLAGS "${flag} ${CMAKE_CXX_FLAGS}") + endif () + endforeach(flag) +endmacro(set_cflags_if_supported) + +set_cflags_if_supported(-Wno-missing-field-initializers) + ADD_SUBDIRECTORY(ft-index) INCLUDE_DIRECTORIES(ft-index) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/buildheader/CMakeLists.txt mariadb-5.5-5.5.40/storage/tokudb/ft-index/buildheader/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/buildheader/CMakeLists.txt 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/buildheader/CMakeLists.txt 2014-10-08 13:19:51.000000000 +0000 @@ -26,4 +26,4 @@ DESTINATION include COMPONENT tokukv_headers ) -endif () +endif () \ No newline at end of file diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/buildheader/make_tdb.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/buildheader/make_tdb.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/buildheader/make_tdb.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/buildheader/make_tdb.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -291,6 +291,7 @@ printf("#define DB_IS_HOT_INDEX 0x00100000\n"); // private tokudb printf("#define DBC_DISABLE_PREFETCHING 0x20000000\n"); // private tokudb printf("#define DB_UPDATE_CMP_DESCRIPTOR 0x40000000\n"); // private tokudb + printf("#define TOKUFT_DIRTY_SHUTDOWN %x\n", 1<<31); { //dbt flags @@ -340,8 +341,8 @@ dodefine_from_track(txn_flags, DB_TXN_READ_ONLY); } - /* TOKUDB specific error codes*/ - printf("/* TOKUDB specific error codes */\n"); + /* TokuFT specific error codes*/ + printf("/* TokuFT specific error codes */\n"); dodefine(TOKUDB_OUT_OF_LOCKS); dodefine(TOKUDB_SUCCEEDED_EARLY); dodefine(TOKUDB_FOUND_BUT_REJECTED); @@ -421,7 +422,7 @@ "int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */", "int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */", "int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */", - "int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */", + "int (*checkpointing_resume) (DB_ENV*) /* Alert tokuft that 'postpone' is no longer necessary */", "int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */", "int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */", "int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */", @@ -545,6 +546,7 @@ "int (*change_fanout)(DB *db, uint32_t fanout)", "int (*get_fanout)(DB *db, uint32_t *fanout)", "int (*set_fanout)(DB *db, uint32_t fanout)", + "int (*set_memcmp_magic)(DB *db, uint8_t magic)", "int (*set_indexer)(DB*, DB_INDEXER*)", "void (*get_indexer)(DB*, DB_INDEXER**)", "int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going)", @@ -571,8 +573,9 @@ STRUCT_SETUP(DB_TXN, api_internal,"void *%s"); STRUCT_SETUP(DB_TXN, commit, "int (*%s) (DB_TXN*, uint32_t)"); STRUCT_SETUP(DB_TXN, prepare, "int (*%s) (DB_TXN*, uint8_t gid[DB_GID_SIZE])"); + STRUCT_SETUP(DB_TXN, discard, "int (*%s) (DB_TXN*, uint32_t)"); STRUCT_SETUP(DB_TXN, id, "uint32_t (*%s) (DB_TXN *)"); - STRUCT_SETUP(DB_TXN, mgrp, "DB_ENV *%s /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/"); + STRUCT_SETUP(DB_TXN, mgrp, "DB_ENV *%s /* In TokuFT, mgrp is a DB_ENV, not a DB_TXNMGR */"); STRUCT_SETUP(DB_TXN, parent, "DB_TXN *%s"); const char *extra[] = { "int (*txn_stat)(DB_TXN *, struct txn_stat **)", @@ -612,6 +615,7 @@ "int (*c_set_bounds)(DBC*, const DBT*, const DBT*, bool pre_acquire, int out_of_range_error)", "void (*c_set_check_interrupt_callback)(DBC*, bool (*)(void*), void *)", "void (*c_remove_restriction)(DBC*)", + "char _internal[512]", NULL}; sort_and_dump_fields("dbc", false, extra); } @@ -635,9 +639,9 @@ printf("#define DB_VERSION_MAJOR %d\n", DB_VERSION_MAJOR); printf("#define DB_VERSION_MINOR %d\n", DB_VERSION_MINOR); - printf("/* As of r40364 (post TokuDB 5.2.7), the patch version number is 100+ the BDB header patch version number.*/\n"); + printf("/* As of r40364 (post TokuFT 5.2.7), the patch version number is 100+ the BDB header patch version number.*/\n"); printf("#define DB_VERSION_PATCH %d\n", 100+DB_VERSION_PATCH); - printf("#define DB_VERSION_STRING \"Tokutek: TokuDB %d.%d.%d\"\n", DB_VERSION_MAJOR, DB_VERSION_MINOR, 100+DB_VERSION_PATCH); + printf("#define DB_VERSION_STRING \"Tokutek: TokuFT %d.%d.%d\"\n", DB_VERSION_MAJOR, DB_VERSION_MINOR, 100+DB_VERSION_PATCH); #ifndef DB_GID_SIZE #define DB_GID_SIZE DB_XIDDATASIZE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/cmake/merge_archives_unix.cmake.in mariadb-5.5-5.5.40/storage/tokudb/ft-index/cmake/merge_archives_unix.cmake.in --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/cmake/merge_archives_unix.cmake.in 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/cmake/merge_archives_unix.cmake.in 2014-10-08 13:19:51.000000000 +0000 @@ -43,7 +43,9 @@ LIST(LENGTH LIB_OBJ_LIST LENGTH_WITH_DUPS) SET(LIB_OBJ_LIST_NO_DUPS ${LIB_OBJ_LIST}) - LIST(REMOVE_DUPLICATES LIB_OBJ_LIST_NO_DUPS) + IF (LENGTH_WITH_DUPS GREATER 0) + LIST(REMOVE_DUPLICATES LIB_OBJ_LIST_NO_DUPS) + ENDIF () LIST(LENGTH LIB_OBJ_LIST_NO_DUPS LENGTH_WITHOUT_DUPS) IF(LENGTH_WITH_DUPS EQUAL LENGTH_WITHOUT_DUPS) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/CMakeLists.txt mariadb-5.5-5.5.40/storage/tokudb/ft-index/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/CMakeLists.txt 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/CMakeLists.txt 2014-10-08 13:19:51.000000000 +0000 @@ -76,9 +76,6 @@ add_subdirectory(src) add_subdirectory(tools) -## subdirectories that just install things -#add_subdirectory(examples) - INSTALL_DOCUMENTATION(README.md README-TOKUDB COMPONENT Server) ## build tags diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/cmake_modules/FindBDB.cmake mariadb-5.5-5.5.40/storage/tokudb/ft-index/cmake_modules/FindBDB.cmake --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/cmake_modules/FindBDB.cmake 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/cmake_modules/FindBDB.cmake 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ -# - Try to find BDB -# Once done this will define -# BDB_FOUND - System has BDB -# BDB_INCLUDE_DIRS - The BDB include directories -# BDB_LIBRARIES - The libraries needed to use BDB -# BDB_DEFINITIONS - Compiler switches required for using BDB - -find_path(BDB_INCLUDE_DIR db.h) - -find_library(BDB_LIBRARY NAMES db libdb) - -include(CheckSymbolExists) -## check if the found bdb has DB_TXN_SNAPSHOT -set(CMAKE_REQUIRED_INCLUDES ${BDB_INCLUDE_DIR}) -check_symbol_exists(DB_TXN_SNAPSHOT "db.h" HAVE_DB_TXN_SNAPSHOT) -if(HAVE_DB_TXN_SNAPSHOT) - set(BDB_INCLUDE_DIRS ${BDB_INCLUDE_DIR}) - set(BDB_LIBRARIES ${BDB_LIBRARY}) - - include(FindPackageHandleStandardArgs) - # handle the QUIETLY and REQUIRED arguments and set BDB_FOUND to TRUE - # if all listed variables are TRUE - find_package_handle_standard_args(BDB DEFAULT_MSG - BDB_LIBRARY BDB_INCLUDE_DIR) - - mark_as_advanced(BDB_INCLUDE_DIR BDB_LIBRARY) -endif() diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/cmake_modules/TokuBuildTagDatabases.cmake mariadb-5.5-5.5.40/storage/tokudb/ft-index/cmake_modules/TokuBuildTagDatabases.cmake --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/cmake_modules/TokuBuildTagDatabases.cmake 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/cmake_modules/TokuBuildTagDatabases.cmake 1970-01-01 00:00:00.000000000 +0000 @@ -1,126 +0,0 @@ -## set up lists of sources and headers for tags -file(GLOB_RECURSE all_srcs - buildheader/*.cc - db-benchmark-test/*.cc - ft/*.cc - include/*.cc - locktree/*.cc - portability/*.cc - src/*.cc - utils/*.cc - util/*.cc - db-benchmark-test/*.cc - ) -list(APPEND all_srcs - ${CMAKE_CURRENT_BINARY_DIR}/ft/log_code.cc - ${CMAKE_CURRENT_BINARY_DIR}/ft/log_print.cc - ) -file(GLOB_RECURSE all_hdrs - buildheader/*.h - db-benchmark-test/*.h - ft/*.h - include/*.h - locktree/*.h - portability/*.h - src/*.h - utils/*.h - util/*.h - db-benchmark-test/*.h - ) -list(APPEND all_hdrs - ${CMAKE_CURRENT_BINARY_DIR}/portability/toku_config.h - ${CMAKE_CURRENT_BINARY_DIR}/buildheader/db.h - ${CMAKE_CURRENT_BINARY_DIR}/ft/log_header.h - ) - -option(USE_CTAGS "Build the ctags database." ON) -if (USE_CTAGS AND - # Macs by default are not case-sensitive, so tags and TAGS clobber each other. Do etags and not ctags in that case, because Emacs is superior. :P - (NOT APPLE OR NOT USE_ETAGS)) - find_program(CTAGS "ctags") - if (NOT CTAGS MATCHES NOTFOUND) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/tags" - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/ctags-stamp" - COMMAND ${CTAGS} -o tags ${all_srcs} ${all_hdrs} - COMMAND touch "${CMAKE_CURRENT_BINARY_DIR}/ctags-stamp" - DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(build_ctags ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/tags" ctags-stamp) - endif () -endif () - -option(USE_ETAGS "Build the etags database." ON) -if (USE_ETAGS) - find_program(ETAGS "etags") - if (NOT ETAGS MATCHES NOTFOUND) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/TAGS" - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/etags-stamp" - COMMAND ${ETAGS} -o TAGS ${all_srcs} ${all_hdrs} - COMMAND touch "${CMAKE_CURRENT_BINARY_DIR}/etags-stamp" - DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(build_etags ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/TAGS" etags-stamp) - endif () -endif () - -option(USE_CSCOPE "Build the cscope database." ON) -if (USE_CSCOPE) - find_program(CSCOPE "cscope") - if (NOT CSCOPE MATCHES NOTFOUND) - file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/cscope.files" "") - foreach(file ${all_srcs} ${all_hdrs}) - file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cscope.files" "${file}\n") - endforeach(file) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.out" - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.in.out" - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.po.out" - COMMAND ${CSCOPE} -b -q -R -i"${CMAKE_CURRENT_BINARY_DIR}/cscope.files" -I"${CMAKE_CURRENT_SOURCE_DIR}" -I"${CMAKE_CURRENT_SOURCE_DIR}/include" -I"${CMAKE_CURRENT_SOURCE_DIR}/portability" -I"${CMAKE_CURRENT_SOURCE_DIR}/portability" -I"${CMAKE_CURRENT_SOURCE_DIR}/ft" -I"${CMAKE_CURRENT_SOURCE_DIR}/src" -I"${CMAKE_CURRENT_SOURCE_DIR}/locktree" -I"${CMAKE_CURRENT_SOURCE_DIR}/utils" -I"${CMAKE_CURRENT_SOURCE_DIR}/db-benchmark-test" -I"${CMAKE_CURRENT_BINARY_DIR}" -I"${CMAKE_CURRENT_BINARY_DIR}/portability" -I"${CMAKE_CURRENT_BINARY_DIR}/buildheader" - DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(build_cscope.out ALL DEPENDS - "${CMAKE_CURRENT_SOURCE_DIR}/cscope.out" - "${CMAKE_CURRENT_SOURCE_DIR}/cscope.in.out" - "${CMAKE_CURRENT_SOURCE_DIR}/cscope.po.out") - endif () -endif () - -option(USE_GTAGS "Build the gtags database." ON) -if (USE_GTAGS) - find_program(GTAGS "gtags") - if (NOT GTAGS MATCHES NOTFOUND) - file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/gtags.files" "") - foreach(file ${all_srcs} ${all_hdrs}) - file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/gtags.files" "${file}\n") - endforeach(file) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GTAGS" - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GRTAGS" - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GPATH" - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GSYMS" - COMMAND ${GTAGS} -f "${CMAKE_CURRENT_BINARY_DIR}/gtags.files" - DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(build_GTAGS ALL DEPENDS - "${CMAKE_CURRENT_SOURCE_DIR}/GTAGS" - "${CMAKE_CURRENT_SOURCE_DIR}/GRTAGS" - "${CMAKE_CURRENT_SOURCE_DIR}/GPATH" - "${CMAKE_CURRENT_SOURCE_DIR}/GSYMS") - endif () -endif () - -option(USE_MKID "Build the idutils database." ON) -if (USE_MKID) - find_program(MKID "mkid") - if (NOT MKID MATCHES NOTFOUND) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/ID" - COMMAND ${MKID} ${all_srcs} ${all_hdrs} - DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(build_MKID ALL DEPENDS - "${CMAKE_CURRENT_SOURCE_DIR}/ID") - endif () -endif () diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/cmake_modules/TokuFeatureDetection.cmake mariadb-5.5-5.5.40/storage/tokudb/ft-index/cmake_modules/TokuFeatureDetection.cmake --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/cmake_modules/TokuFeatureDetection.cmake 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/cmake_modules/TokuFeatureDetection.cmake 2014-10-08 13:19:51.000000000 +0000 @@ -2,11 +2,6 @@ find_package(Threads) find_package(ZLIB REQUIRED) -option(USE_BDB "Build some tools and tests with bdb (requires a proper BerkeleyDB include directory and library)." ON) -if(USE_BDB) - find_package(BDB REQUIRED) -endif() - option(USE_VALGRIND "Build to run safely under valgrind (often slower)." ON) if(USE_VALGRIND) find_package(Valgrind REQUIRED) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/CTestCustom.cmake mariadb-5.5-5.5.40/storage/tokudb/ft-index/CTestCustom.cmake --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/CTestCustom.cmake 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/CTestCustom.cmake 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,241 @@ +cmake_policy(SET CMP0012 NEW) + +## these tests shouldn't run with valgrind +list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE + ft/bnc-insert-benchmark + ft/ft_loader-test-extractor-1 + ft/ft_loader-test-extractor-2 + ft/ft_loader-test-extractor-3 + ft/upgrade_test_simple + portability/test-cache-line-boundary-fails + portability/try-leak-lost + portability/try-leak-reachable + portability/try-leak-uninit + util/helgrind_test_circular_buffer + util/helgrind_test_partitioned_counter + util/helgrind_test_partitioned_counter_5833 + ydb/diskfull.tdb + ydb/drd_test_4015.tdb + ydb/drd_test_groupcommit_count.tdb + ydb/filesize.tdb + ydb/helgrind_helgrind1.tdb + ydb/helgrind_helgrind2.tdb + ydb/helgrind_helgrind3.tdb + ydb/helgrind_test_groupcommit_count.tdb + ydb/hot-optimize-table-tests.tdb + ydb/insert-dup-prelock.tdb + ydb/loader-cleanup-test2.tdb + ydb/loader-cleanup-test3.tdb + ydb/loader-stress-test4.tdb + ydb/maxsize-for-loader-B.tdb + ydb/openlimit17.tdb + ydb/openlimit17-locktree.tdb + ydb/preload-db-nested.tdb + ydb/stress-gc.tdb + ydb/stress-gc2.tdb + ydb/stress-test.tdb + ydb/test-5138.tdb + ydb/test-prepare.tdb + ydb/test-prepare2.tdb + ydb/test-prepare3.tdb + ydb/test-recover1.tdb + ydb/test-recover2.tdb + ydb/test-recover3.tdb + ydb/test-xa-prepare.tdb + ydb/test4573-logtrim.tdb + ydb/test_3645.tdb + ydb/test_groupcommit_perf.tdb + ydb/test_large_update_broadcast_small_cachetable.tdb + ydb/test_update_broadcast_stress.tdb + ydb/test_update_stress.tdb + ydb/upgrade-test-4.tdb + ) + +if (NOT @RUN_HELGRIND_TESTS@) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE + util/helgrind_test_circular_buffer + util/helgrind_test_partitioned_counter + util/helgrind_test_partitioned_counter_5833 + ydb/helgrind_helgrind1.tdb + ydb/helgrind_helgrind2.tdb + ydb/helgrind_helgrind3.tdb + ydb/helgrind_test_groupcommit_count.tdb + ) +endif () + +if (NOT @RUN_DRD_TESTS@) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE + ydb/drd_test_groupcommit_count.tdb + ydb/drd_test_4015.tdb + ) +endif () + +## osx's pthreads prefer writers, so this test will deadlock +if (@CMAKE_SYSTEM_NAME@ STREQUAL Darwin) + list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE portability/test-pthread-rwlock-rwr) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE portability/test-pthread-rwlock-rwr) +endif () + +## tests that are supposed to crash will generate memcheck failures +set(tests_that_should_fail + ft/test-assertA + ft/test-assertB + portability/try-assert-zero + portability/try-assert0 + ydb/recover-missing-dbfile-2.abortrecover + ydb/recover-missing-dbfile.abortrecover + ydb/test_db_no_env.tdb + ydb/test_truncate_txn_abort.tdb + ) +list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${tests_that_should_fail}) + +## don't run drd stress tests with valgrind either (because that would do valgrind twice) +set(stress_tests + test_stress0.tdb + test_stress1.tdb + test_stress2.tdb + test_stress3.tdb + test_stress4.tdb + test_stress5.tdb + test_stress6.tdb + test_stress7.tdb + test_stress_hot_indexing.tdb + test_stress_openclose.tdb + test_stress_with_verify.tdb + ) +foreach(test ${stress_tests}) + list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE + ydb/drd_tiny_${test} + ydb/drd_mid_${test} + ydb/drd_large_${test} + ) + if(NOT @RUN_LONG_TESTS@) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE + ydb/drd_large_${test} + ) + endif() + if (NOT @RUN_DRD_TESTS@) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE + ydb/drd_tiny_${test} + ydb/drd_mid_${test} + ydb/drd_large_${test} + ) + endif () +endforeach(test) + +## upgrade stress tests are 5 minutes long, don't need to run them always +if(NOT @RUN_LONG_TESTS@) + foreach(test ${stress_tests}) + if (NOT ${test} MATCHES test_stress_openclose) + foreach(oldver 4.2.0 5.0.8 5.2.7 6.0.0 6.1.0 6.5.1 6.6.3) + foreach(p_or_s pristine stressed) + if (NOT (${test} MATCHES test_stress4 AND ${p_or_s} MATCHES stressed)) + foreach(size 2000) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE ydb/${test}/upgrade/${oldver}/${p_or_s}/${size}) + endforeach(size) + endif () + endforeach(p_or_s) + endforeach(oldver) + endif () + endforeach(test) +endif() + +set(tdb_tests_that_should_fail "ydb/${stress_tests}") +string(REGEX REPLACE ";" ";ydb/" stress_tests "${stress_tests}") + +set(recover_stress_tests + ydb/recover-test_stress1.abortrecover + ydb/recover-test_stress2.abortrecover + ydb/recover-test_stress3.abortrecover + ydb/recover-test_stress_openclose.abortrecover + ) + +## we run stress tests separately, only run them if asked to +if(NOT @RUN_STRESS_TESTS@) + list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${stress_tests} ${recover_stress_tests}) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${stress_tests} ${recover_stress_tests}) +endif() + +set(perf_tests + ydb/perf_checkpoint_var.tdb + ydb/perf_cursor_nop.tdb + ydb/perf_malloc_free.tdb + ydb/perf_nop.tdb + ydb/perf_ptquery.tdb + ydb/perf_ptquery2.tdb + ydb/perf_read_write.tdb + ydb/perf_xmalloc_free.tdb + ) + +## we also don't need to run perf tests every time +if(NOT @RUN_PERF_TESTS@) + list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${perf_tests}) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${perf_tests}) +endif() + +## don't run perf tests with valgrind (that's slow) +file(GLOB perf_test_srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/src/tests" perf_*.cc) +string(REGEX REPLACE "\\.cc(;|$)" ".tdb\\1" perf_tests "${perf_test_srcs}") +set(tdb_tests_that_should_fail "ydb/${perf_tests}") +string(REGEX REPLACE ";" ";ydb/" perf_tests "${perf_tests}") +list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${perf_tests}) + +## these tests fail often and aren't helpful +set(known_failing_tests + ydb/diskfull.tdb + ) +list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${known_failing_tests}) +list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${known_failing_tests}) + +## these tests take a long time, only run them if asked to +set(long_running_tests + ft/is_empty + ft/upgrade_test_simple + ydb/checkpoint_1.tdb + ydb/checkpoint_stress.tdb + ydb/hotindexer-with-queries.tdb + ydb/hot-optimize-table-tests.tdb + ydb/loader-cleanup-test0.tdb + ydb/loader-cleanup-test0z.tdb + ydb/loader-cleanup-test2.tdb + ydb/loader-cleanup-test2z.tdb + ydb/loader-stress-test4.tdb + ydb/loader-stress-test4z.tdb + ydb/manyfiles.tdb + ydb/preload-db-nested.tdb + ydb/recover_stress.tdb + ydb/root_fifo_1.tdb + ydb/root_fifo_2.tdb + ydb/root_fifo_31.tdb + ydb/root_fifo_32.tdb + ydb/stress-gc.tdb + ydb/stress-test.tdb + ydb/test3529.tdb + ydb/test_logmax.tdb + ydb/test_txn_nested2.tdb + ydb/test_update_broadcast_stress.tdb + ydb/test_update_stress.tdb + ) +if(NOT @RUN_LONG_TESTS@) + list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${long_running_tests}) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${long_running_tests}) +endif() + +## ignore log_print.cc in coverage report +list(APPEND CTEST_CUSTOM_COVERAGE_EXCLUDE "log_print.cc") + +list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION + # don't complain about warnings in xz source + "xz-4.999.9beta/src/liblzma" + # don't complain about clang missing warnings from xz code + "clang: warning: unknown warning option" + # don't complain about warnings in jemalloc source + "jemalloc/src" + "jemalloc/internal" + # don't complain about valgrind headers leaving things unused + "valgrind/valgrind.h" + "valgrind/memcheck.h" + # don't complain about ranlib or libtool on empty archive + "has no symbols" + "the table of contents is empty" + ) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/CTestCustom.cmake.in mariadb-5.5-5.5.40/storage/tokudb/ft-index/CTestCustom.cmake.in --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/CTestCustom.cmake.in 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/CTestCustom.cmake.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,244 +0,0 @@ -cmake_policy(SET CMP0012 NEW) - -## these tests shouldn't run with valgrind -list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE - ft/bnc-insert-benchmark - ft/brt-serialize-benchmark - ft/ft_loader-test-extractor-1 - ft/ft_loader-test-extractor-2 - ft/ft_loader-test-extractor-3 - ft/upgrade_test_simple - portability/test-cache-line-boundary-fails - portability/try-leak-lost - portability/try-leak-reachable - portability/try-leak-uninit - util/helgrind_test_circular_buffer - util/helgrind_test_partitioned_counter - util/helgrind_test_partitioned_counter_5833 - ydb/diskfull.tdb - ydb/drd_test_4015.tdb - ydb/drd_test_groupcommit_count.tdb - ydb/filesize.tdb - ydb/helgrind_helgrind1.tdb - ydb/helgrind_helgrind2.tdb - ydb/helgrind_helgrind3.tdb - ydb/helgrind_test_groupcommit_count.tdb - ydb/hot-optimize-table-tests.tdb - ydb/insert-dup-prelock.tdb - ydb/loader-cleanup-test2.tdb - ydb/loader-cleanup-test3.tdb - ydb/loader-stress-test4.tdb - ydb/maxsize-for-loader-B.tdb - ydb/openlimit17.tdb - ydb/openlimit17-locktree.tdb - ydb/preload-db-nested.tdb - ydb/stress-gc.tdb - ydb/stress-gc2.tdb - ydb/stress-test.bdb - ydb/stress-test.tdb - ydb/test-5138.tdb - ydb/test-prepare.tdb - ydb/test-prepare2.tdb - ydb/test-prepare3.tdb - ydb/test-recover1.tdb - ydb/test-recover2.tdb - ydb/test-recover3.tdb - ydb/test-xa-prepare.tdb - ydb/test4573-logtrim.tdb - ydb/test_3645.tdb - ydb/test_groupcommit_perf.bdb - ydb/test_groupcommit_perf.tdb - ydb/test_large_update_broadcast_small_cachetable.tdb - ydb/test_update_broadcast_stress.tdb - ydb/test_update_stress.tdb - ydb/upgrade-test-4.tdb - ) - -if (NOT @RUN_HELGRIND_TESTS@) - list(APPEND CTEST_CUSTOM_TESTS_IGNORE - util/helgrind_test_circular_buffer - util/helgrind_test_partitioned_counter - util/helgrind_test_partitioned_counter_5833 - ydb/helgrind_helgrind1.tdb - ydb/helgrind_helgrind2.tdb - ydb/helgrind_helgrind3.tdb - ydb/helgrind_test_groupcommit_count.tdb - ) -endif () - -if (NOT @RUN_DRD_TESTS@) - list(APPEND CTEST_CUSTOM_TESTS_IGNORE - ydb/drd_test_groupcommit_count.tdb - ydb/drd_test_4015.tdb - ) -endif () - -## osx's pthreads prefer writers, so this test will deadlock -if (@CMAKE_SYSTEM_NAME@ STREQUAL Darwin) - list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE portability/test-pthread-rwlock-rwr) - list(APPEND CTEST_CUSTOM_TESTS_IGNORE portability/test-pthread-rwlock-rwr) -endif () - -## tests that are supposed to crash will generate memcheck failures -set(tests_that_should_fail - ft/test-assertA - ft/test-assertB - portability/try-assert-zero - portability/try-assert0 - ydb/recover-missing-dbfile-2.abortrecover - ydb/recover-missing-dbfile.abortrecover - ydb/test_db_no_env.tdb - ydb/test_truncate_txn_abort.tdb - ) -list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${tests_that_should_fail}) - -## don't run drd stress tests with valgrind either (because that would do valgrind twice) -set(stress_tests - test_stress0.tdb - test_stress1.tdb - test_stress2.tdb - test_stress3.tdb - test_stress4.tdb - test_stress5.tdb - test_stress6.tdb - test_stress7.tdb - test_stress_hot_indexing.tdb - test_stress_openclose.tdb - test_stress_with_verify.tdb - ) -foreach(test ${stress_tests}) - list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE - ydb/drd_tiny_${test} - ydb/drd_mid_${test} - ydb/drd_large_${test} - ) - if(NOT @RUN_LONG_TESTS@) - list(APPEND CTEST_CUSTOM_TESTS_IGNORE - ydb/drd_large_${test} - ) - endif() - if (NOT @RUN_DRD_TESTS@) - list(APPEND CTEST_CUSTOM_TESTS_IGNORE - ydb/drd_tiny_${test} - ydb/drd_mid_${test} - ydb/drd_large_${test} - ) - endif () -endforeach(test) - -## upgrade stress tests are 5 minutes long, don't need to run them always -if(NOT @RUN_LONG_TESTS@) - foreach(test ${stress_tests}) - if (NOT ${test} MATCHES test_stress_openclose) - foreach(oldver 4.2.0 5.0.8 5.2.7 6.0.0 6.1.0 6.5.1 6.6.3) - foreach(p_or_s pristine stressed) - if (NOT (${test} MATCHES test_stress4 AND ${p_or_s} MATCHES stressed)) - foreach(size 2000) - list(APPEND CTEST_CUSTOM_TESTS_IGNORE ydb/${test}/upgrade/${oldver}/${p_or_s}/${size}) - endforeach(size) - endif () - endforeach(p_or_s) - endforeach(oldver) - endif () - endforeach(test) -endif() - -set(tdb_tests_that_should_fail "ydb/${stress_tests}") -string(REGEX REPLACE ";" ";ydb/" stress_tests "${stress_tests}") - -set(recover_stress_tests - ydb/recover-test_stress1.abortrecover - ydb/recover-test_stress2.abortrecover - ydb/recover-test_stress3.abortrecover - ydb/recover-test_stress_openclose.abortrecover - ) - -## we run stress tests separately, only run them if asked to -if(NOT @RUN_STRESS_TESTS@) - list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${stress_tests} ${recover_stress_tests}) - list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${stress_tests} ${recover_stress_tests}) -endif() - -set(perf_tests - ydb/perf_checkpoint_var.tdb - ydb/perf_cursor_nop.tdb - ydb/perf_malloc_free.tdb - ydb/perf_nop.tdb - ydb/perf_ptquery.tdb - ydb/perf_ptquery2.tdb - ydb/perf_read_write.tdb - ydb/perf_xmalloc_free.tdb - ) - -## we also don't need to run perf tests every time -if(NOT @RUN_PERF_TESTS@) - list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${perf_tests}) - list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${perf_tests}) -endif() - -## don't run perf tests with valgrind (that's slow) -file(GLOB perf_test_srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/src/tests" perf_*.cc) -string(REGEX REPLACE "\\.cc(;|$)" ".tdb\\1" perf_tests "${perf_test_srcs}") -set(tdb_tests_that_should_fail "ydb/${perf_tests}") -string(REGEX REPLACE ";" ";ydb/" perf_tests "${perf_tests}") -list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${perf_tests}) - -## these tests fail often and aren't helpful -set(known_failing_tests - ydb/diskfull.tdb - ) -list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${known_failing_tests}) -list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${known_failing_tests}) - -## these tests take a long time, only run them if asked to -set(long_running_tests - ft/is_empty - ft/upgrade_test_simple - ydb/checkpoint_1.tdb - ydb/checkpoint_stress.tdb - ydb/hotindexer-with-queries.tdb - ydb/hot-optimize-table-tests.tdb - ydb/loader-cleanup-test0.tdb - ydb/loader-cleanup-test0z.tdb - ydb/loader-cleanup-test2.tdb - ydb/loader-cleanup-test2z.tdb - ydb/loader-stress-test4.tdb - ydb/loader-stress-test4z.tdb - ydb/manyfiles.tdb - ydb/preload-db-nested.tdb - ydb/recover_stress.tdb - ydb/root_fifo_1.tdb - ydb/root_fifo_2.tdb - ydb/root_fifo_31.tdb - ydb/root_fifo_32.tdb - ydb/stress-gc.tdb - ydb/stress-test.tdb - ydb/test3529.tdb - ydb/test_logmax.tdb - ydb/test_txn_nested2.tdb - ydb/test_update_broadcast_stress.tdb - ydb/test_update_stress.tdb - ) -if(NOT @RUN_LONG_TESTS@) - list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${long_running_tests}) - list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${long_running_tests}) -endif() - -## ignore log_print.cc in coverage report -list(APPEND CTEST_CUSTOM_COVERAGE_EXCLUDE "log_print.cc") - -list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION - # don't complain about warnings in xz source - "xz-4.999.9beta/src/liblzma" - # don't complain about clang missing warnings from xz code - "clang: warning: unknown warning option" - # don't complain about warnings in jemalloc source - "jemalloc/src" - "jemalloc/internal" - # don't complain about valgrind headers leaving things unused - "valgrind/valgrind.h" - "valgrind/memcheck.h" - # don't complain about ranlib or libtool on empty archive - "has no symbols" - "the table of contents is empty" - ) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/CMakeLists.txt mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/CMakeLists.txt 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/CMakeLists.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -# detect when we are being built as a subproject -if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING) - install( - FILES - db-insert.c - db-insert-multiple.c - db-scan.c - db-update.c - Makefile - README.examples - DESTINATION - examples - COMPONENT - tokukv_examples - ) -endif () \ No newline at end of file diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/db-insert.c mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/db-insert.c --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/db-insert.c 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/db-insert.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,610 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include -// Define BDB if you want to compile this to use Berkeley DB -#include -#include -#ifdef BDB -#include -#include -#define DIRSUF bdb -#else -#include -#define DIRSUF tokudb -#endif - -#include -#include -#include -#include -#include - -static inline float toku_tdiff (struct timeval *a, struct timeval *b) { - return (a->tv_sec - b->tv_sec) +1e-6*(a->tv_usec - b->tv_usec); -} - -#if !defined(DB_PRELOCKED_WRITE) -#define NO_DB_PRELOCKED -#define DB_PRELOCKED_WRITE 0 -#endif - -int verbose=1; - -enum { SERIAL_SPACING = 1<<6 }; -enum { DEFAULT_ITEMS_TO_INSERT_PER_ITERATION = 1<<20 }; -enum { DEFAULT_ITEMS_PER_TRANSACTION = 1<<14 }; - -static void insert (long long v); -#define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, db_strerror(r)); assert(__r==0); }) -#define CKERR2(r,rexpect) if (r!=rexpect) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, db_strerror(r)); assert(r==rexpect); - -/* default test parameters */ -int keysize = sizeof (long long); -int valsize = sizeof (long long); -int pagesize = 0; -long long cachesize = 1000000000; // 1GB -int dupflags = 0; -int noserial = 0; // Don't do the serial stuff -int norandom = 0; // Don't do the random stuff -int prelock = 0; -int prelockflag = 0; -int items_per_transaction = DEFAULT_ITEMS_PER_TRANSACTION; -int items_per_iteration = DEFAULT_ITEMS_TO_INSERT_PER_ITERATION; -int finish_child_first = 0; // Commit or abort child first (before doing so to the parent). No effect if child does not exist. -int singlex_child = 0; // Do a single transaction, but do all work with a child -int singlex = 0; // Do a single transaction -int singlex_create = 0; // Create the db using the single transaction (only valid if singlex) -int insert1first = 0; // insert 1 before doing the rest -int do_transactions = 0; -int if_transactions_do_logging = DB_INIT_LOG; // set this to zero if we want no logging when transactions are used -int do_abort = 0; -int n_insertions_since_txn_began=0; -int env_open_flags = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL; -u_int32_t put_flags = 0; -double compressibility = -1; // -1 means make it very compressible. 1 means use random bits everywhere. 2 means half the bits are random. -int do_append = 0; -u_int32_t checkpoint_period = 60; - -static void do_prelock(DB* db, DB_TXN* txn) { - if (prelock) { -#if !defined(NO_DB_PRELOCKED) - int r = db->pre_acquire_table_lock(db, txn); - assert(r==0); -#else - (void) db; (void) txn; -#endif - } -} - -#define STRINGIFY2(s) #s -#define STRINGIFY(s) STRINGIFY2(s) -const char *dbdir = "./bench." STRINGIFY(DIRSUF); -char *dbfilename = "bench.db"; -char *dbname; - -DB_ENV *dbenv; -DB *db; -DB_TXN *parenttid=0; -DB_TXN *tid=0; - - -static void benchmark_setup (void) { - int r; - - if (!do_append) { - char unlink_cmd[strlen(dbdir) + strlen("rm -rf ") + 1]; - snprintf(unlink_cmd, sizeof(unlink_cmd), "rm -rf %s", dbdir); - //printf("unlink_cmd=%s\n", unlink_cmd); - system(unlink_cmd); - - if (strcmp(dbdir, ".") != 0) { - r = mkdir(dbdir,S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); - assert(r == 0); - } - } - - r = db_env_create(&dbenv, 0); - assert(r == 0); - -#if !defined(TOKUDB) -#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 4 - if (dbenv->set_lk_max) { - r = dbenv->set_lk_max(dbenv, items_per_transaction*2); - assert(r==0); - } -#elif (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 7) || DB_VERSION_MAJOR >= 5 - if (dbenv->set_lk_max_locks) { - r = dbenv->set_lk_max_locks(dbenv, items_per_transaction*2); - assert(r==0); - } - if (dbenv->set_lk_max_lockers) { - r = dbenv->set_lk_max_lockers(dbenv, items_per_transaction*2); - assert(r==0); - } - if (dbenv->set_lk_max_objects) { - r = dbenv->set_lk_max_objects(dbenv, items_per_transaction*2); - assert(r==0); - } -#else -#error -#endif -#endif - - if (dbenv->set_cachesize) { - r = dbenv->set_cachesize(dbenv, cachesize / (1024*1024*1024), cachesize % (1024*1024*1024), 1); - if (r != 0) - printf("WARNING: set_cachesize %d\n", r); - } - { - r = dbenv->open(dbenv, dbdir, env_open_flags, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); - assert(r == 0); - } - -#if defined(TOKUDB) - if (checkpoint_period) { - printf("set checkpoint_period %u\n", checkpoint_period); - r = dbenv->checkpointing_set_period(dbenv, checkpoint_period); assert(r == 0); - u_int32_t period; - r = dbenv->checkpointing_get_period(dbenv, &period); assert(r == 0 && period == checkpoint_period); - } -#endif - - r = db_create(&db, dbenv, 0); - assert(r == 0); - - if (do_transactions) { - r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r); - } - if (pagesize && db->set_pagesize) { - r = db->set_pagesize(db, pagesize); - assert(r == 0); - } - if (dupflags) { - r = db->set_flags(db, dupflags); - assert(r == 0); - } - r = db->open(db, tid, dbfilename, NULL, DB_BTREE, DB_CREATE, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); - if (r!=0) fprintf(stderr, "errno=%d, %s\n", errno, strerror(errno)); - assert(r == 0); - if (insert1first) { - if (do_transactions) { - r=tid->commit(tid, 0); - assert(r==0); - tid = NULL; - r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r); - } - insert(-1); - if (singlex) { - r=tid->commit(tid, 0); - assert(r==0); - tid = NULL; - r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r); - } - } - else if (singlex && !singlex_create) { - r=tid->commit(tid, 0); - assert(r==0); - tid = NULL; - r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r); - } - if (do_transactions) { - if (singlex) - do_prelock(db, tid); - else { - r=tid->commit(tid, 0); - assert(r==0); - tid = NULL; - } - } - if (singlex_child) { - parenttid = tid; - tid = NULL; - r=dbenv->txn_begin(dbenv, parenttid, &tid, 0); CKERR(r); - } - -} - -static void benchmark_shutdown (void) { - int r; - - if (do_transactions && singlex && !insert1first && (singlex_create || prelock)) { -#if defined(TOKUDB) - //There should be a single 'truncate' in the rollback instead of many 'insert' entries. - struct txn_stat *s; - r = tid->txn_stat(tid, &s); - assert(r==0); - //TODO: #1125 Always do the test after performance testing is done. - if (singlex_child) fprintf(stderr, "SKIPPED 'small rollback' test for child txn\n"); - else - assert(s->rollback_raw_count < 100); // gross test, not worth investigating details - free(s); - //system("ls -l bench.tokudb"); -#endif - } - if (do_transactions && singlex) { - if (!singlex_child || finish_child_first) { - assert(tid); - r = (do_abort ? tid->abort(tid) : tid->commit(tid, 0)); assert(r==0); - tid = NULL; - } - if (singlex_child) { - assert(parenttid); - r = (do_abort ? parenttid->abort(parenttid) : parenttid->commit(parenttid, 0)); assert(r==0); - parenttid = NULL; - } - else - assert(!parenttid); - } - assert(!tid); - assert(!parenttid); - - r = db->close(db, 0); - assert(r == 0); - r = dbenv->close(dbenv, 0); - assert(r == 0); -} - -static void long_long_to_array (unsigned char *a, int array_size, unsigned long long l) { - int i; - for (i=0; i<8 && i>(56-8*i))&0xff; -} - -static DBT *fill_dbt(DBT *dbt, const void *data, int size) { - memset(dbt, 0, sizeof *dbt); - dbt->size = size; - dbt->data = (void *) data; - return dbt; -} - -// Fill array with 0's if compressibilty==-1, otherwise fill array with data that is likely to compress by a factor of compressibility. -static void fill_array (unsigned char *data, int size) { - memset(data, 0, size); - if (compressibility>0) { - int i; - for (i=0; iput(db, tid, fill_dbt(&kt, kc, keysize), fill_dbt(&vt, vc, valsize), put_flags); - CKERR(r); - if (do_transactions) { - if (n_insertions_since_txn_began>=items_per_transaction && !singlex) { - n_insertions_since_txn_began=0; - r = tid->commit(tid, 0); assert(r==0); - tid = NULL; - r=dbenv->txn_begin(dbenv, 0, &tid, 0); assert(r==0); - do_prelock(db, tid); - n_insertions_since_txn_began=0; - } - n_insertions_since_txn_began++; - } -} - -static void serial_insert_from (long long from) { - long long i; - if (do_transactions && !singlex) { - int r = dbenv->txn_begin(dbenv, 0, &tid, 0); assert(r==0); - do_prelock(db, tid); - { - DBT k,v; - r=db->put(db, tid, fill_dbt(&k, "a", 1), fill_dbt(&v, "b", 1), put_flags); - CKERR(r); - } - } - for (i=0; icommit(tid, 0); assert(r==0); - tid=NULL; - } -} - -static long long llrandom (void) { - return (((long long)(random()))<<32) + random(); -} - -static void random_insert_below (long long below) { - long long i; - if (do_transactions && !singlex) { - int r = dbenv->txn_begin(dbenv, 0, &tid, 0); assert(r==0); - do_prelock(db, tid); - } - for (i=0; icommit(tid, 0); assert(r==0); - tid=NULL; - } -} - -static void biginsert (long long n_elements, struct timeval *starttime) { - long long i; - struct timeval t1,t2; - int iteration; - for (i=0, iteration=0; i= argc) return print_usage(argv[0]); - items_per_transaction = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0); - } else if (strcmp(arg, "--abort") == 0) { - do_abort = 1; - } else if (strcmp(arg, "--periter") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - items_per_iteration = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0); - } else if (strcmp(arg, "--cachesize") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - cachesize = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0); - } else if (strcmp(arg, "--keysize") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - keysize = atoi(argv[++i]); - } else if (strcmp(arg, "--valsize") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - valsize = atoi(argv[++i]); - } else if (strcmp(arg, "--pagesize") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - pagesize = atoi(argv[++i]); - } else if (strcmp(arg, "--env") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - dbdir = argv[++i]; - } else if (strcmp(arg, "--prelock") == 0) { - prelock=1; - } else if (strcmp(arg, "--prelockflag") == 0) { - prelock=1; - prelockflag=1; - } else if (strcmp(arg, "--srandom") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - srandom(atoi(argv[++i])); - } else if (strcmp(arg, "--append") == 0) { - do_append = 1; - } else if (strcmp(arg, "--checkpoint-period") == 0) { - if (i+1 >= argc) return print_usage(argv[9]); - checkpoint_period = (u_int32_t) atoi(argv[++i]); - } else if (strcmp(arg, "--unique_checks") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - int unique_checks = atoi(argv[++i]); - if (unique_checks) - put_flags = DB_NOOVERWRITE; - else - put_flags = 0; - } else { - return print_usage(argv[0]); - } - } - if (do_transactions) { - env_open_flags |= DB_INIT_TXN | if_transactions_do_logging | DB_INIT_LOCK; - } - if (do_transactions && prelockflag) { - put_flags |= DB_PRELOCKED_WRITE; - } - if (iput_multiple -// the table schema is t(a bigint, b bigint, c bigint, d bigint, primary key(a), key(b), key(c,d), clustering key(d)) -// the primary key(a) is represented with key=a and value=b,c,d -// the key(b) index is represented with key=b,a and no value -// the key(c,d) index is represented with key=c,d,a and no value -// the clustering key(d) is represented with key=d,a and value=b,c -// a is auto increment -// b, c and d are random - -#include "../include/toku_config.h" -#include -#include -#include -#include -#include -#include -#include -#if defined(HAVE_BYTESWAP_H) -# include -#elif defined(HAVE_LIBKERN_OSBYTEORDER_H) -# include -# define bswap_64 OSSwapInt64 -#endif -#include -#include "db.h" - -static int force_multiple = 1; - -struct table { - int ndbs; - DB **dbs; -#if defined(TOKUDB) - DBT *mult_keys; - DBT *mult_vals; - uint32_t *mult_flags; -#endif -}; - -#if defined(TOKUDB) -static void table_init_dbt(DBT *dbt, size_t length) { - dbt->flags = DB_DBT_USERMEM; - dbt->data = malloc(length); - dbt->ulen = length; - dbt->size = 0; -} - -static void table_destroy_dbt(DBT *dbt) { - free(dbt->data); -} -#endif - -static void table_init(struct table *t, int ndbs, DB **dbs, size_t key_length __attribute__((unused)), size_t val_length __attribute__((unused))) { - t->ndbs = ndbs; - t->dbs = dbs; -#if defined(TOKUDB) - t->mult_keys = calloc(ndbs, sizeof (DBT)); - int i; - for (i = 0; i < ndbs; i++) - table_init_dbt(&t->mult_keys[i], key_length); - t->mult_vals = calloc(ndbs, sizeof (DBT)); - for (i = 0; i < ndbs; i++) - table_init_dbt(&t->mult_vals[i], val_length); - t->mult_flags = calloc(ndbs, sizeof (uint32_t)); - for (i = 0; i < ndbs; i++) - t->mult_flags[i] = 0; -#endif -} - -static void table_destroy(struct table *t) { -#if defined(TOKUDB) - int i; - for (i = 0; i < t->ndbs; i++) - table_destroy_dbt(&t->mult_keys[i]); - free(t->mult_keys); - for (i = 0; i < t->ndbs; i++) - table_destroy_dbt(&t->mult_vals[i]); - free(t->mult_vals); - free(t->mult_flags); -#else - assert(t); -#endif -} - -static int verbose = 0; - -static long random64(void) { - return ((long)random() << 32LL) + (long)random(); -} - -static long htonl64(long x) { -#if BYTE_ORDER == LITTLE_ENDIAN - return bswap_64(x); -#else -#error -#endif -} - -#if defined(TOKUDB) -static int my_generate_row_for_put(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val) { - assert(src_db); - assert(dest_key->flags == DB_DBT_USERMEM && dest_key->ulen >= 4 * 8); - assert(dest_val->flags == DB_DBT_USERMEM && dest_val->ulen >= 4 * 8); - int index_num; - assert(dest_db->descriptor->dbt.size == sizeof index_num); - memcpy(&index_num, dest_db->descriptor->dbt.data, sizeof index_num); - switch (htonl(index_num) % 4) { - case 0: - // dest_key = src_key - dest_key->size = src_key->size; - memcpy(dest_key->data, src_key->data, src_key->size); - // dest_val = src_val - dest_val->size = src_val->size; - memcpy(dest_val->data, src_val->data, src_val->size); - break; - case 1: - // dest_key = b,a - dest_key->size = 2 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 0, 8); - memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8); - // dest_val = null - dest_val->size = 0; - break; - case 2: - // dest_key = c,d,a - dest_key->size = 3 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 8, 8); - memcpy((char *)dest_key->data + 8, (char *)src_val->data + 16, 8); - memcpy((char *)dest_key->data + 16, (char *)src_key->data + 0, 8); - // dest_val = null - dest_val->size = 0; - break; - case 3: - // dest_key = d,a - dest_key->size = 2 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 16, 8); - memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8); - // dest_val = b,c - dest_val->size = 2 * 8; - memcpy((char *)dest_val->data + 0, (char *)src_val->data + 0, 8); - memcpy((char *)dest_val->data + 8, (char *)src_val->data + 8, 8); - break; - default: - assert(0); - } - return 0; -} - -#else - -static int my_secondary_key(DB *db, const DBT *src_key, const DBT *src_val, DBT *dest_key) { - assert(dest_key->flags == 0 && dest_key->data == NULL); - dest_key->flags = DB_DBT_APPMALLOC; - dest_key->data = malloc(4 * 8); assert(dest_key->data); - switch ((intptr_t)db->app_private % 4) { - case 0: - // dest_key = src_key - dest_key->size = src_key->size; - memcpy(dest_key->data, src_key->data, src_key->size); - break; - case 1: - // dest_key = b,a - dest_key->size = 2 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 0, 8); - memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8); - break; - case 2: - // dest_key = c,d,a - dest_key->size = 3 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 8, 8); - memcpy((char *)dest_key->data + 8, (char *)src_val->data + 16, 8); - memcpy((char *)dest_key->data + 16, (char *)src_key->data + 0, 8); - break; - case 3: - // dest_key = d,a,b,c - dest_key->size = 4 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 16, 8); - memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8); - memcpy((char *)dest_key->data + 16, (char *)src_val->data + 0, 8); - memcpy((char *)dest_key->data + 24, (char *)src_val->data + 8, 8); - break; - default: - assert(0); - } - return 0; -} -#endif - -static void insert_row(DB_ENV *db_env, struct table *t, DB_TXN *txn, long a, long b, long c, long d) { - int r; - - // generate the primary key - char key_buffer[8]; - a = htonl64(a); - memcpy(key_buffer, &a, sizeof a); - - // generate the primary value - char val_buffer[3*8]; - b = htonl64(b); - memcpy(val_buffer+0, &b, sizeof b); - c = htonl64(c); - memcpy(val_buffer+8, &c, sizeof c); - d = htonl64(d); - memcpy(val_buffer+16, &d, sizeof d); - - DBT key = { .data = key_buffer, .size = sizeof key_buffer }; - DBT value = { .data = val_buffer, .size = sizeof val_buffer }; -#if defined(TOKUDB) - if (!force_multiple && t->ndbs == 1) { - r = t->dbs[0]->put(t->dbs[0], txn, &key, &value, t->mult_flags[0]); assert(r == 0); - } else { - r = db_env->put_multiple(db_env, t->dbs[0], txn, &key, &value, t->ndbs, &t->dbs[0], t->mult_keys, t->mult_vals, t->mult_flags); assert(r == 0); - } -#else - assert(db_env); - r = t->dbs[0]->put(t->dbs[0], txn, &key, &value, 0); assert(r == 0); -#endif -} - -static inline float tdiff (struct timeval *a, struct timeval *b) { - return (a->tv_sec - b->tv_sec) +1e-6*(a->tv_usec - b->tv_usec); -} - -static void insert_all(DB_ENV *db_env, struct table *t, long nrows, long max_rows_per_txn, long key_range, long rows_per_report, bool do_txn) { - int r; - - struct timeval tstart; - r = gettimeofday(&tstart, NULL); assert(r == 0); - struct timeval tlast = tstart; - DB_TXN *txn = NULL; - if (do_txn) { - r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0); - } - long n_rows_per_txn = 0; - long rowi; - for (rowi = 0; rowi < nrows; rowi++) { - long a = rowi; - long b = random64() % key_range; - long c = random64() % key_range; - long d = random64() % key_range; - insert_row(db_env, t, txn, a, b, c, d); - n_rows_per_txn++; - - // maybe commit - if (do_txn && n_rows_per_txn == max_rows_per_txn) { - r = txn->commit(txn, 0); assert(r == 0); - r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0); - n_rows_per_txn = 0; - } - - // maybe report performance - if (((rowi + 1) % rows_per_report) == 0) { - struct timeval tnow; - r = gettimeofday(&tnow, NULL); assert(r == 0); - float last_time = tdiff(&tnow, &tlast); - float total_time = tdiff(&tnow, &tstart); - printf("%ld %.3f %.0f/s %.0f/s\n", rowi + 1, last_time, rows_per_report/last_time, rowi/total_time); fflush(stdout); - tlast = tnow; - } - } - - if (do_txn) { - r = txn->commit(txn, 0); assert(r == 0); - } - struct timeval tnow; - r = gettimeofday(&tnow, NULL); assert(r == 0); - printf("total %ld %.3f %.0f/s\n", nrows, tdiff(&tnow, &tstart), nrows/tdiff(&tnow, &tstart)); fflush(stdout); -} - -int main(int argc, char *argv[]) { -#if defined(TOKDUB) - char *db_env_dir = "insertm.env.tokudb"; -#else - char *db_env_dir = "insertm.env.bdb"; -#endif - int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG; - long rows = 100000000; - long rows_per_txn = 1000; - long rows_per_report = 100000; - long key_range = 100000; - bool do_txn = true; - u_int32_t pagesize = 0; - u_int64_t cachesize = 1000000000; - int ndbs = 4; -#if defined(TOKUDB) - u_int32_t checkpoint_period = 60; -#endif - - int i; - for (i = 1; i < argc; i++) { - char *arg = argv[i]; - if (strcmp(arg, "--verbose") == 0) { - verbose++; - continue; - } - if (strcmp(arg, "--ndbs") == 0 && i+1 < argc) { - ndbs = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--rows") == 0 && i+1 < argc) { - rows = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--rows_per_txn") == 0 && i+1 < argc) { - rows_per_txn = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--rows_per_report") == 0 && i+1 < argc) { - rows_per_report = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--key_range") == 0 && i+1 < argc) { - key_range = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--txn") == 0 && i+1 < argc) { - do_txn = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--pagesize") == 0 && i+1 < argc) { - pagesize = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--cachesize") == 0 && i+1 < argc) { - cachesize = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--force_multiple") == 0 && i+1 < argc) { - force_multiple = atoi(argv[++i]); - continue; - } -#if defined(TOKUDB) - if (strcmp(arg, "--checkpoint_period") == 0 && i+1 < argc) { - checkpoint_period = atoi(argv[++i]); - continue; - } -#endif - - assert(0); - } - - int r; - char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1]; - snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir); - r = system(rm_cmd); assert(r == 0); - - r = mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0); - - // create and open the env - DB_ENV *db_env = NULL; - r = db_env_create(&db_env, 0); assert(r == 0); - if (!do_txn) - db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); - if (cachesize) { - const u_int64_t gig = 1 << 30; - r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); - } -#if defined(TOKUDB) - r = db_env->set_generate_row_callback_for_put(db_env, my_generate_row_for_put); assert(r == 0); -#endif - r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if defined(TOKUDB) - if (checkpoint_period) { - r = db_env->checkpointing_set_period(db_env, checkpoint_period); assert(r == 0); - u_int32_t period; - r = db_env->checkpointing_get_period(db_env, &period); assert(r == 0 && period == checkpoint_period); - } -#endif - - - // create the db - DB *dbs[ndbs]; - for (i = 0; i < ndbs; i++) { - DB *db = NULL; - r = db_create(&db, db_env, 0); assert(r == 0); - DB_TXN *create_txn = NULL; - if (do_txn) { - r = db_env->txn_begin(db_env, NULL, &create_txn, 0); assert(r == 0); - } - if (pagesize) { - r = db->set_pagesize(db, pagesize); assert(r == 0); - } - char db_filename[32]; sprintf(db_filename, "test%d", i); - r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); - -#if defined(TOKUDB) - DESCRIPTOR_S new_descriptor; - int index_num = htonl(i); - new_descriptor.dbt.data = &index_num; - new_descriptor.dbt.size = sizeof i; - r = db->change_descriptor(db, create_txn, &new_descriptor.dbt, 0); assert(r == 0); -#else - db->app_private = (void *) (intptr_t) i; - if (i > 0) { - r = dbs[0]->associate(dbs[0], create_txn, db, my_secondary_key, 0); assert(r == 0); - } -#endif - if (do_txn) { - r = create_txn->commit(create_txn, 0); assert(r == 0); - } - dbs[i] = db; - } - - // insert all rows - struct table table; - table_init(&table, ndbs, dbs, 4 * 8, 4 * 8); - - insert_all(db_env, &table, rows, rows_per_txn, key_range, rows_per_report, do_txn); - - table_destroy(&table); - - // shutdown - for (i = 0; i < ndbs; i++) { - DB *db = dbs[i]; - r = db->close(db, 0); assert(r == 0); db = NULL; - } - r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL; - - return 0; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/db-scan.c mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/db-scan.c --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/db-scan.c 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/db-scan.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,461 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -/* Scan the bench.tokudb/bench.db over and over. */ -#define DONT_DEPRECATE_MALLOC - -#include -#include -#include -#include -#include -#ifdef BDB -#include -#define DIRSUF bdb -#else -#include -#define DIRSUF tokudb -#endif -#include -#include -#include -#include -#include -#include -#include - -static const char *pname; -static enum run_mode { RUN_HWC, RUN_LWC, RUN_VERIFY, RUN_RANGE} run_mode = RUN_HWC; -static int do_txns=1, prelock=0, prelockflag=0; -static u_int32_t lock_flag = 0; -static long limitcount=-1; -static u_int32_t cachesize = 127*1024*1024; -static u_int64_t start_range = 0, end_range = 0; -static int n_experiments = 2; -static int bulk_fetch = 1; - -static int print_usage (const char *argv0) { - fprintf(stderr, "Usage:\n%s [--verify-lwc | --lwc | --nohwc] [--prelock] [--prelockflag] [--prelockwriteflag] [--env DIR]\n", argv0); - fprintf(stderr, " --verify-lwc means to run the light weight cursor and the heavyweight cursor to verify that they get the same answer.\n"); - fprintf(stderr, " --lwc run light weight cursors instead of heavy weight cursors\n"); - fprintf(stderr, " --prelock acquire a read lock on the entire table before running\n"); - fprintf(stderr, " --prelockflag pass DB_PRELOCKED to the the cursor get operation whenever the locks have been acquired\n"); - fprintf(stderr, " --prelockwriteflag pass DB_PRELOCKED_WRITE to the cursor get operation\n"); - fprintf(stderr, " --nox no transactions (no locking)\n"); - fprintf(stderr, " --count COUNT read the first COUNT rows and then stop.\n"); - fprintf(stderr, " --cachesize N set the env cachesize to N bytes\n"); - fprintf(stderr, " --srandom N srandom(N)\n"); - fprintf(stderr, " --env DIR put db files in DIR instead of default\n"); - fprintf(stderr, " --bulk_fetch 0|1 do bulk fetch on lwc operations (default: 1)\n"); - return 1; -} - -static DB_ENV *env; -static DB *db; -static DB_TXN *tid=0; - -#define STRINGIFY2(s) #s -#define STRINGIFY(s) STRINGIFY2(s) -static const char *dbdir = "./bench." STRINGIFY(DIRSUF); /* DIRSUF is passed in as a -D argument to the compiler. */ -static int env_open_flags_yesx = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL|DB_INIT_TXN|DB_INIT_LOG|DB_INIT_LOCK; -static int env_open_flags_nox = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL; -static char *dbfilename = "bench.db"; - - -static void parse_args (int argc, const char *argv[]) { - pname=argv[0]; - argc--; argv++; - int specified_run_mode=0; - while (argc>0) { - if (strcmp(*argv,"--verify-lwc")==0) { - if (specified_run_mode && run_mode!=RUN_VERIFY) { two_modes: fprintf(stderr, "You specified two run modes\n"); exit(1); } - run_mode = RUN_VERIFY; - } else if (strcmp(*argv, "--lwc")==0) { - if (specified_run_mode && run_mode!=RUN_LWC) goto two_modes; - run_mode = RUN_LWC; - } else if (strcmp(*argv, "--hwc")==0) { - if (specified_run_mode && run_mode!=RUN_VERIFY) goto two_modes; - run_mode = RUN_HWC; - } else if (strcmp(*argv, "--prelock")==0) prelock=1; -#ifdef TOKUDB - else if (strcmp(*argv, "--prelockflag")==0) { prelockflag=1; lock_flag = DB_PRELOCKED; } - else if (strcmp(*argv, "--prelockwriteflag")==0) { prelockflag=1; lock_flag = DB_PRELOCKED_WRITE; } -#endif - else if (strcmp(*argv, "--nox")==0) { do_txns=0; } - else if (strcmp(*argv, "--count")==0) { - char *end; - argc--; argv++; - errno=0; limitcount=strtol(*argv, &end, 10); assert(errno==0); - printf("Limiting count to %ld\n", limitcount); - } else if (strcmp(*argv, "--cachesize")==0 && argc>0) { - char *end; - argc--; argv++; - cachesize=(u_int32_t)strtol(*argv, &end, 10); - } else if (strcmp(*argv, "--env") == 0) { - argc--; argv++; - if (argc==0) exit(print_usage(pname)); - dbdir = *argv; - } else if (strcmp(*argv, "--range") == 0 && argc > 2) { - run_mode = RUN_RANGE; - argc--; argv++; - start_range = strtoll(*argv, NULL, 10); - argc--; argv++; - end_range = strtoll(*argv, NULL, 10); - } else if (strcmp(*argv, "--experiments") == 0 && argc > 1) { - argc--; argv++; - n_experiments = strtol(*argv, NULL, 10); - } else if (strcmp(*argv, "--srandom") == 0 && argc > 1) { - argc--; argv++; - srandom(atoi(*argv)); - } else if (strcmp(*argv, "--bulk_fetch") == 0 && argc > 1) { - argc--; argv++; - bulk_fetch = atoi(*argv); - } else { - exit(print_usage(pname)); - } - argc--; argv++; - } - //Prelocking is meaningless without transactions - if (do_txns==0) { - prelockflag=0; - lock_flag=0; - prelock=0; - } -} - -static void scanscan_setup (void) { - int r; - r = db_env_create(&env, 0); assert(r==0); - r = env->set_cachesize(env, 0, cachesize, 1); assert(r==0); - r = env->open(env, dbdir, do_txns? env_open_flags_yesx : env_open_flags_nox, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); assert(r==0); - r = db_create(&db, env, 0); assert(r==0); - if (do_txns) { - r = env->txn_begin(env, 0, &tid, 0); assert(r==0); - } - r = db->open(db, tid, dbfilename, NULL, DB_BTREE, 0, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); assert(r==0); -#ifdef TOKUDB - if (prelock) { - r = db->pre_acquire_table_lock(db, tid); - assert(r==0); - } -#endif -} - -static void scanscan_shutdown (void) { - int r; - r = db->close(db, 0); assert(r==0); - if (do_txns) { - r = tid->commit(tid, 0); assert(r==0); - } - r = env->close(env, 0); assert(r==0); -} - -static double gettime (void) { - struct timeval tv; - int r = gettimeofday(&tv, 0); - assert(r==0); - return tv.tv_sec + 1e-6*tv.tv_usec; -} - -static void scanscan_hwc (void) { - int r; - int counter=0; - for (counter=0; countercursor(db, tid, &dbc, 0); assert(r==0); - memset(&k, 0, sizeof(k)); - memset(&v, 0, sizeof(v)); - u_int32_t c_get_flags = DB_NEXT; - if (prelockflag && (counter || prelock)) { - c_get_flags |= lock_flag; - } - while (0 == (r = dbc->c_get(dbc, &k, &v, c_get_flags))) { - - //printf("r=%d\n", r); - - totalbytes += k.size + v.size; - rowcounter++; - if (limitcount>0 && rowcounter>=limitcount) break; - } - assert(r==DB_NOTFOUND); - r = dbc->c_close(dbc); assert(r==0); - double thistime = gettime(); - double tdiff = thistime-prevtime; - printf("Scan %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", totalbytes, rowcounter, tdiff, 1e-6*totalbytes/tdiff); - } -} - -#ifdef TOKUDB - -struct extra_count { - long long totalbytes; - int rowcounter; -}; - -static int counttotalbytes (DBT const *key, DBT const *data, void *extrav) { - struct extra_count *e=extrav; - e->totalbytes += key->size + data->size; - e->rowcounter++; - return bulk_fetch ? TOKUDB_CURSOR_CONTINUE : 0; -} - -static void scanscan_lwc (void) { - int r; - int counter=0; - for (counter=0; countercursor(db, tid, &dbc, 0); assert(r==0); - u_int32_t f_flags = 0; - if (prelockflag && (counter || prelock)) { - f_flags |= lock_flag; - } - long rowcounter=0; - while (0 == (r = dbc->c_getf_next(dbc, f_flags, counttotalbytes, &e))) { - rowcounter++; - if (limitcount>0 && rowcounter>=limitcount) break; - } - r = dbc->c_close(dbc); assert(r==0); - double thistime = gettime(); - double tdiff = thistime-prevtime; - printf("LWC Scan %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", e.totalbytes, e.rowcounter, tdiff, 1e-6*e.totalbytes/tdiff); - } -} -#endif - -static void scanscan_range (void) { - int r; - - double texperiments[n_experiments]; - u_int64_t k = 0; - char kv[8]; - DBT key, val; - - int counter; - for (counter = 0; counter < n_experiments; counter++) { - - if (1) { //if ((counter&1) == 0) { - makekey: - // generate a random key in the key range - k = (start_range + (random() % (end_range - start_range))) * (1<<6); - int i; - for (i = 0; i < 8; i++) - kv[i] = k >> (56-8*i); - } - memset(&key, 0, sizeof key); key.data = &kv, key.size = sizeof kv; - memset(&val, 0, sizeof val); - - double tstart = gettime(); - - DBC *dbc; - r = db->cursor(db, tid, &dbc, 0); assert(r==0); - - // set the cursor to the random key - r = dbc->c_get(dbc, &key, &val, DB_SET_RANGE+lock_flag); - if (r != 0) { - assert(r == DB_NOTFOUND); - printf("%s:%d %" PRIu64 "\n", __FUNCTION__, __LINE__, k); - goto makekey; - } - -#ifdef TOKUDB - // do the range scan - long rowcounter = 0; - struct extra_count e = {0,0}; - while (limitcount > 0 && rowcounter < limitcount) { - r = dbc->c_getf_next(dbc, prelockflag ? lock_flag : 0, counttotalbytes, &e); - if (r != 0) - break; - rowcounter++; - } -#endif - - r = dbc->c_close(dbc); - assert(r==0); - - texperiments[counter] = gettime() - tstart; - printf("%" PRIu64 " %f\n", k, texperiments[counter]); fflush(stdout); - } - - // print the times - double tsum = 0.0, tmin = 0.0, tmax = 0.0; - for (counter = 0; counter < n_experiments; counter++) { - if (counter==0 || texperiments[counter] < tmin) - tmin = texperiments[counter]; - if (counter==0 || texperiments[counter] > tmax) - tmax = texperiments[counter]; - tsum += texperiments[counter]; - } - printf("%f %f %f/%d = %f\n", tmin, tmax, tsum, n_experiments, tsum / n_experiments); -} - -#ifdef TOKUDB - -struct extra_verify { - long long totalbytes; - int rowcounter; - DBT k,v; // the k and v are gotten using the old cursor -}; - -static int -checkbytes (DBT const *key, DBT const *data, void *extrav) { - struct extra_verify *e=extrav; - e->totalbytes += key->size + data->size; - e->rowcounter++; - assert(e->k.size == key->size); - assert(e->v.size == data->size); - assert(memcmp(e->k.data, key->data, key->size)==0); - assert(memcmp(e->v.data, data->data, data->size)==0); - assert(e->k.data != key->data); - assert(e->v.data != data->data); - return 0; -} - - -static void scanscan_verify (void) { - int r; - int counter=0; - for (counter=0; countercursor(db, tid, &dbc1, 0); assert(r==0); - r = db->cursor(db, tid, &dbc2, 0); assert(r==0); - memset(&v.k, 0, sizeof(v.k)); - memset(&v.v, 0, sizeof(v.v)); - u_int32_t f_flags = 0; - u_int32_t c_get_flags = DB_NEXT; - if (prelockflag && (counter || prelock)) { - f_flags |= lock_flag; - c_get_flags |= lock_flag; - } - while (1) { - int r1,r2; - r2 = dbc1->c_get(dbc1, &v.k, &v.v, c_get_flags); - r1 = dbc2->c_getf_next(dbc2, f_flags, checkbytes, &v); - assert(r1==r2); - if (r1) break; - } - r = dbc1->c_close(dbc1); assert(r==0); - r = dbc2->c_close(dbc2); assert(r==0); - double thistime = gettime(); - double tdiff = thistime-prevtime; - printf("verify %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", v.totalbytes, v.rowcounter, tdiff, 1e-6*v.totalbytes/tdiff); - } -} - -#endif - -int main (int argc, const char *argv[]) { - - parse_args(argc,argv); - - scanscan_setup(); - switch (run_mode) { - case RUN_HWC: scanscan_hwc(); break; -#ifdef TOKUDB - case RUN_LWC: scanscan_lwc(); break; - case RUN_VERIFY: scanscan_verify(); break; -#endif - case RUN_RANGE: scanscan_range(); break; - default: assert(0); break; - } - scanscan_shutdown(); - - return 0; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/db-update.c mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/db-update.c --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/db-update.c 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/db-update.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,379 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -// measure the performance of a simulated "insert on duplicate key update" operation -// the table schema is t(a int, b int, c int, d int, primary key(a, b)) -// a and b are random -// c is the sum of the observations -// d is the first observation - -#include -#include -#include -#include -#include -#include -#include -#include -#include "db.h" - -static size_t key_size = 8; -static size_t val_size = 8; -static int verbose = 0; - -static void db_error(const DB_ENV *env, const char *prefix, const char *msg) { - printf("%s: %p %s %s\n", __FUNCTION__, env, prefix, msg); -} - -static int get_int(void *p) { - int v; - memcpy(&v, p, sizeof v); - return htonl(v); -} - -#if defined(TOKUDB) -static int my_update_callback(DB *db, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra) { - assert(db); - assert(key); - if (old_val == NULL) { - // insert new_val = extra - set_val(extra, set_extra); - } else { - if (verbose) printf("u"); - // update new_val = old_val + extra - assert(old_val->size == val_size && extra->size == val_size); - char new_val_buffer[val_size]; - memcpy(new_val_buffer, old_val->data, sizeof new_val_buffer); - int newc = htonl(get_int(old_val->data) + get_int(extra->data)); // newc = oldc + newc - memcpy(new_val_buffer, &newc, sizeof newc); - DBT new_val = { .data = new_val_buffer, .size = sizeof new_val_buffer }; - set_val(&new_val, set_extra); - } - return 0; -} -#endif - -static void insert_and_update(DB *db, DB_TXN *txn, int a, int b, int c, int d, bool do_update_callback) { -#if !defined(TOKUDB) - assert(!do_update_callback); -#endif - int r; - - // generate the key - assert(key_size >= 8); - char key_buffer[key_size]; - int newa = htonl(a); - memcpy(key_buffer, &newa, sizeof newa); - int newb = htonl(b); - memcpy(key_buffer+4, &newb, sizeof newb); - - // generate the value - assert(val_size >= 8); - char val_buffer[val_size]; - int newc = htonl(c); - memcpy(val_buffer, &newc, sizeof newc); - int newd = htonl(d); - memcpy(val_buffer+4, &newd, sizeof newd); - -#if defined(TOKUDB) - if (do_update_callback) { - // extra = value_buffer, implicit combine column c update function - DBT key = { .data = key_buffer, .size = sizeof key_buffer }; - DBT extra = { .data = val_buffer, .size = sizeof val_buffer }; - r = db->update(db, txn, &key, &extra, 0); assert(r == 0); - } else -#endif - { - DBT key = { .data = key_buffer, .size = sizeof key_buffer }; - DBT value = { .data = val_buffer, .size = sizeof val_buffer }; - DBT oldvalue = { }; - r = db->get(db, txn, &key, &oldvalue, 0); - assert(r == 0 || r == DB_NOTFOUND); - if (r == 0) { - // update it - if (verbose) printf("U"); - int oldc = get_int(oldvalue.data); - newc = htonl(oldc + c); // newc = oldc + newc - memcpy(val_buffer, &newc, sizeof newc); - r = db->put(db, txn, &key, &value, 0); - assert(r == 0); - } else if (r == DB_NOTFOUND) { - r = db->put(db, txn, &key, &value, 0); - assert(r == 0); - } - } -} - -static inline float tdiff (struct timeval *a, struct timeval *b) { - return (a->tv_sec - b->tv_sec) +1e-6*(a->tv_usec - b->tv_usec); -} - -static void insert_and_update_all(DB_ENV *db_env, DB *db, long nrows, long max_rows_per_txn, int key_range, long rows_per_report, bool do_update_callback, bool do_txn) { - int r; - struct timeval tstart; - r = gettimeofday(&tstart, NULL); assert(r == 0); - struct timeval tlast = tstart; - DB_TXN *txn = NULL; - if (do_txn) { - r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0); - } - long n_rows_per_txn = 0; - long rowi; - for (rowi = 0; rowi < nrows; rowi++) { - int a = random() % key_range; - int b = random() % key_range; - int c = 1; - int d = 0; // timestamp - insert_and_update(db, txn, a, b, c, d, do_update_callback); - n_rows_per_txn++; - - // maybe commit - if (do_txn && n_rows_per_txn == max_rows_per_txn) { - r = txn->commit(txn, 0); assert(r == 0); - r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0); - n_rows_per_txn = 0; - } - - // maybe report performance - if (((rowi + 1) % rows_per_report) == 0) { - struct timeval tnow; - r = gettimeofday(&tnow, NULL); assert(r == 0); - float last_time = tdiff(&tnow, &tlast); - float total_time = tdiff(&tnow, &tstart); - printf("%ld %.3f %.0f/s %.0f/s\n", rowi + 1, last_time, rows_per_report/last_time, rowi/total_time); fflush(stdout); - tlast = tnow; - } - } - - if (do_txn) { - r = txn->commit(txn, 0); assert(r == 0); - } - struct timeval tnow; - r = gettimeofday(&tnow, NULL); assert(r == 0); - printf("total %ld %.3f %.0f/s\n", nrows, tdiff(&tnow, &tstart), nrows/tdiff(&tnow, &tstart)); fflush(stdout); -} - -int main(int argc, char *argv[]) { -#if defined(TOKUDB) - char *db_env_dir = "update.env.tokudb"; -#else - char *db_env_dir = "update.env.bdb"; -#endif - int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG; - char *db_filename = "update.db"; - long rows = 1000000000; - long rows_per_txn = 100; - long rows_per_report = 100000; - int key_range = 1000000; -#if defined(TOKUDB) - bool do_update_callback = true; -#else - bool do_update_callback = false; -#endif - bool do_txn = false; - u_int64_t cachesize = 1000000000; - u_int32_t pagesize = 0; -#if defined(TOKUDB) - u_int32_t checkpoint_period = 60; -#endif - - int i; - for (i = 1; i < argc; i++) { - char *arg = argv[i]; - if (strcmp(arg, "--verbose") == 0) { - verbose++; - continue; - } - if (strcmp(arg, "--rows") == 0 && i+1 < argc) { - rows = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--rows_per_txn") == 0 && i+1 < argc) { - rows_per_txn = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--rows_per_report") == 0 && i+1 < argc) { - rows_per_report = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--key_range") == 0 && i+1 < argc) { - key_range = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--txn") == 0 && i+1 < argc) { - do_txn = atoi(argv[++i]) != 0; - continue; - } - if (strcmp(arg, "--pagesize") == 0 && i+1 < argc) { - pagesize = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--cachesize") == 0 && i+1 < argc) { - cachesize = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--update_callback") == 0 && i+1 < argc) { - do_update_callback = atoi(argv[++i]) != 0; - continue; - } - if (strcmp(arg, "--key_size") == 0 && i+1 < argc) { - key_size = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--val_size") == 0 && i+1 < argc) { - val_size = atoi(argv[++i]); - continue; - } -#if defined(TOKUDB) - if (strcmp(arg, "--checkpoint_period") == 0 && i+1 < argc) { - checkpoint_period = atoi(argv[++i]); - continue; - } -#endif - - assert(0); - } - - int r; - char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1]; - snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir); - r = system(rm_cmd); assert(r == 0); - - r = mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0); - - // create and open the env - DB_ENV *db_env = NULL; - r = db_env_create(&db_env, 0); assert(r == 0); -#if defined(TOKUDB) - db_env->set_update(db_env, my_update_callback); -#endif - if (cachesize) { - if (verbose) printf("cachesize %llu\n", (unsigned long long)cachesize); - const u_int64_t gig = 1 << 30; - r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); - } - if (!do_txn) - db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); - db_env->set_errcall(db_env, db_error); - if (verbose) printf("env %s\n", db_env_dir); - r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if defined(TOKUDB) - if (checkpoint_period) { - r = db_env->checkpointing_set_period(db_env, checkpoint_period); assert(r == 0); - u_int32_t period; - r = db_env->checkpointing_get_period(db_env, &period); assert(r == 0 && period == checkpoint_period); - } -#endif - - // create the db - DB *db = NULL; - r = db_create(&db, db_env, 0); assert(r == 0); - DB_TXN *create_txn = NULL; - if (do_txn) { - r = db_env->txn_begin(db_env, NULL, &create_txn, 0); assert(r == 0); - } - if (pagesize) { - r = db->set_pagesize(db, pagesize); assert(r == 0); - } - r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); - if (do_txn) { - r = create_txn->commit(create_txn, 0); assert(r == 0); - } - - // insert on duplicate key update - insert_and_update_all(db_env, db, rows, rows_per_txn, key_range, rows_per_report, do_update_callback, do_txn); - - // shutdown - r = db->close(db, 0); assert(r == 0); db = NULL; - r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL; - - return 0; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/Makefile mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/Makefile --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/Makefile 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/Makefile 1970-01-01 00:00:00.000000000 +0000 @@ -1,29 +0,0 @@ -SRCS = $(wildcard *.c) -TARGETS = $(patsubst %.c,%,$(SRCS)) $(patsubst %.c,%-bdb,$(SRCS)) -CPPFLAGS = -I../include -D_GNU_SOURCE -CFLAGS = -g -std=c99 -Wall -Wextra -Werror -Wno-missing-field-initializers -ifeq ($(USE_STATIC_LIBS),1) -LIBTOKUDB = tokufractaltree_static -LIBTOKUPORTABILITY = tokuportability_static -else -LIBTOKUDB = tokufractaltree -LIBTOKUPORTABILITY = tokuportability -endif -LDFLAGS = -L../lib -l$(LIBTOKUDB) -l$(LIBTOKUPORTABILITY) -Wl,-rpath,../lib -lpthread -lz -ldl - -default local: $(TARGETS) - -%: %.c - $(CC) $(CPPFLAGS) $(CFLAGS) $^ -o $@ $(LDFLAGS) - -%-bdb: %.c - $(CC) -D_GNU_SOURCE -DBDB $(CFLAGS) $^ -o $@ -ldb - -check: $(TARGETS) - ./db-insert -x && ./db-scan --lwc --prelock --prelockflag - -checknox: $(TARGETS) - ./db-insert && ./db-scan --nox --lwc --prelock --prelockflag - -clean: - rm -rf $(TARGETS) bench.* update.env.* insertm.env.* diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/README.examples mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/README.examples --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/examples/README.examples 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/examples/README.examples 1970-01-01 00:00:00.000000000 +0000 @@ -1,85 +0,0 @@ -The examples includes a pair of programs that can be compiled to use either the Berkeley DB library or the Tokutek Fractal Tree index library. - -Note: The file formats are different from TokuDB and Berkley DB. Thus -you cannot access a database created by Berkeley DB using the Tokutek -DB, or vice-versa. - -db-insert is a program that inserts random key-value pairs into a database. - -db-scan is a program that scans through the key-value pairs, reading every row, from a database. - -db-update is a program that upserts key-value pairs into a database. If the key already exists it increment a count in the value. - -db-insert-multiple is a program and inserts key-value pairs into multiple databases. This is is now TokuDB maintains consistent -secondary databases. - -To build it and run it (it's been tested on Fedora 10): -$ make (Makes the binaries) -Run the insertion workload under TokuDB: -$ ./db-insert -Run the insertion workload under BDB: -$ ./db-insert-bdb - -Here is what the output looks like (this on a Thinkpad X61s laptop -running Fedora 10). BDB is a little faster for sequential insertions -(the first three columns), but much much slower for random insertions -(the next 3 columns), so that TokuDB is faster on combined workload. - -$ ./db-insert -serial and random insertions of 1048576 per batch -serial 2.609965s 401759/s random 10.983798s 95466/s cumulative 13.593869s 154272/s -serial 3.053433s 343409/s random 12.008670s 87318/s cumulative 28.656115s 146367/s -serial 5.198312s 201715/s random 15.087426s 69500/s cumulative 48.954605s 128516/s -serial 6.096396s 171999/s random 13.550688s 77382/s cumulative 68.638321s 122215/s -Shutdown 4.025110s -Total time 72.677498s for 8388608 insertions = 115422/s -$ ./db-insert-bdb -serial and random insertions of 1048576 per batch -serial 2.623888s 399627/s random 8.770850s 119552/s cumulative 11.394805s 184045/s -serial 3.081946s 340232/s random 21.046589s 49822/s cumulative 35.523434s 118071/s -serial 14.160498s 74049/s random 497.117523s 2109/s cumulative 546.804504s 11506/s -serial 1.534212s 683462/s random 1128.525146s 929/s cumulative 1676.863892s 5003/s -Shutdown 195.879242s -Total time 1872.746582s for 8388608 insertions = 4479/s - -The files are smaller for TokuDB than BDB. - -$ ls -lh bench.tokudb/ -total 39M --rwxrwxr-x 1 bradley bradley 39M 2009-07-28 15:36 bench.db -$ ls -lh bench.bdb/ -total 322M --rw-r--r-- 1 bradley bradley 322M 2009-07-28 16:14 bench.db - -When scanning the table, one can run out of locks with BDB. There are ways around it (increase the lock table size). - -$ ./db-scan-bdb --nox -Lock table is out of available object entries -db-scan-bdb: db-scan.c:177: scanscan_hwc: Assertion `r==(-30988)' failed. -Aborted - -TokuDB is fine on a big table scan. - -$ ./db-scan --nox -Scan 33162304 bytes (2072644 rows) in 7.924463s at 4.184801MB/s -Scan 33162304 bytes (2072644 rows) in 3.062239s at 10.829431MB/s -0:3 1:53 2:56 -miss=3 hit=53 wait_reading=0 wait=0 -VmPeak: 244668 kB -VmHWM: 68096 kB -VmRSS: 1232 kB - -The update-bdb program upserts 1B rows into a BDB database. When the database gets larger than memory, the throughput -should tank since every update needs to read a block from the storage system. The storage system becomes the performance -bottleneck. The program uses 1 1GB cache in front of the kernel's file system buffer cache. The program should hit the wall -at about 300M rows on a machine with 16GB of memory since keys are 8 bytes and values are 8 bytes in size. - -$ ./db-update-bdb - -The update program upserts 1B rows into a TokuDB database. Throughput should be not degrade significantly since the cost -of the storage system reads is amortized over 1000's of update operations. One should expect TokuDB to be at least 50 times -faster than BDB. - -$ ./db-update - -There isn't much documentation for the Tokutek Fractal Tree index library, but most of the API is like Berkeley DB's. diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/background_job_manager.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/background_job_manager.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/background_job_manager.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/background_job_manager.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,159 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include - -#include "background_job_manager.h" - -struct background_job_manager_struct { - bool accepting_jobs; - uint32_t num_jobs; - toku_cond_t jobs_wait; - toku_mutex_t jobs_lock; -}; - -void bjm_init(BACKGROUND_JOB_MANAGER* pbjm) { - BACKGROUND_JOB_MANAGER XCALLOC(bjm); - toku_mutex_init(&bjm->jobs_lock, 0); - toku_cond_init(&bjm->jobs_wait, NULL); - bjm->accepting_jobs = true; - bjm->num_jobs = 0; - *pbjm = bjm; -} - -void bjm_destroy(BACKGROUND_JOB_MANAGER bjm) { - assert(bjm->num_jobs == 0); - toku_cond_destroy(&bjm->jobs_wait); - toku_mutex_destroy(&bjm->jobs_lock); - toku_free(bjm); -} - -void bjm_reset(BACKGROUND_JOB_MANAGER bjm) { - toku_mutex_lock(&bjm->jobs_lock); - assert(bjm->num_jobs == 0); - bjm->accepting_jobs = true; - toku_mutex_unlock(&bjm->jobs_lock); -} - -int bjm_add_background_job(BACKGROUND_JOB_MANAGER bjm) { - int ret_val; - toku_mutex_lock(&bjm->jobs_lock); - if (bjm->accepting_jobs) { - bjm->num_jobs++; - ret_val = 0; - } - else { - ret_val = -1; - } - toku_mutex_unlock(&bjm->jobs_lock); - return ret_val; -} -void bjm_remove_background_job(BACKGROUND_JOB_MANAGER bjm){ - toku_mutex_lock(&bjm->jobs_lock); - assert(bjm->num_jobs > 0); - bjm->num_jobs--; - if (bjm->num_jobs == 0 && !bjm->accepting_jobs) { - toku_cond_broadcast(&bjm->jobs_wait); - } - toku_mutex_unlock(&bjm->jobs_lock); -} - -void bjm_wait_for_jobs_to_finish(BACKGROUND_JOB_MANAGER bjm) { - toku_mutex_lock(&bjm->jobs_lock); - bjm->accepting_jobs = false; - while (bjm->num_jobs > 0) { - toku_cond_wait(&bjm->jobs_wait, &bjm->jobs_lock); - } - toku_mutex_unlock(&bjm->jobs_lock); -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/background_job_manager.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/background_job_manager.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/background_job_manager.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/background_job_manager.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,134 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef BACKGROUND_JOB_MANAGER_H -#define BACKGROUND_JOB_MANAGER_H -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - - -// -// The background job manager keeps track of the existence of -// background jobs running. We use the background job manager -// to allow threads to perform background jobs on various pieces -// of the system (e.g. cachefiles and cloned pairs being written out -// for checkpoint) -// - -typedef struct background_job_manager_struct *BACKGROUND_JOB_MANAGER; - - -void bjm_init(BACKGROUND_JOB_MANAGER* bjm); -void bjm_destroy(BACKGROUND_JOB_MANAGER bjm); - -// -// Re-allows a background job manager to accept background jobs -// -void bjm_reset(BACKGROUND_JOB_MANAGER bjm); - -// -// add a background job. If return value is 0, then the addition of the job -// was successful and the user may perform the background job. If return -// value is non-zero, then adding of the background job failed and the user -// may not perform the background job. -// -int bjm_add_background_job(BACKGROUND_JOB_MANAGER bjm); - -// -// remove a background job -// -void bjm_remove_background_job(BACKGROUND_JOB_MANAGER bjm); - -// -// This function waits for all current background jobs to be removed. If the user -// calls bjm_add_background_job while this function is running, or after this function -// has completed, bjm_add_background_job returns an error. -// -void bjm_wait_for_jobs_to_finish(BACKGROUND_JOB_MANAGER bjm); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/block_allocator.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/block_allocator.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/block_allocator.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/block_allocator.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,473 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ident "$Id$" - -#include "block_allocator.h" -#include -#include -#include -#include -#include - -// Here's a very simple implementation. -// It's not very fast at allocating or freeing. -// Previous implementation used next_fit, but now use first_fit since we are moving blocks around to reduce file size. - -struct block_allocator { - uint64_t reserve_at_beginning; // How much to reserve at the beginning - uint64_t alignment; // Block alignment - uint64_t n_blocks; // How many blocks - uint64_t blocks_array_size; // How big is the blocks_array. Must be >= n_blocks. - struct block_allocator_blockpair *blocks_array; // These blocks are sorted by address. - uint64_t n_bytes_in_use; // including the reserve_at_beginning -}; - -void -block_allocator_validate (BLOCK_ALLOCATOR ba) { - uint64_t i; - uint64_t n_bytes_in_use = ba->reserve_at_beginning; - for (i=0; in_blocks; i++) { - n_bytes_in_use += ba->blocks_array[i].size; - if (i>0) { - assert(ba->blocks_array[i].offset > ba->blocks_array[i-1].offset); - assert(ba->blocks_array[i].offset >= ba->blocks_array[i-1].offset + ba->blocks_array[i-1].size ); - } - } - assert(n_bytes_in_use == ba->n_bytes_in_use); -} - -#if 0 -#define VALIDATE(b) block_allocator_validate(b) -#else -#define VALIDATE(b) ((void)0) -#endif - -#if 0 -void -block_allocator_print (BLOCK_ALLOCATOR ba) { - uint64_t i; - for (i=0; in_blocks; i++) { - printf("%" PRId64 ":%" PRId64 " ", ba->blocks_array[i].offset, ba->blocks_array[i].size); - } - printf("\n"); - VALIDATE(ba); -} -#endif - -void -create_block_allocator (BLOCK_ALLOCATOR *ba, uint64_t reserve_at_beginning, uint64_t alignment) { - assert(alignment>=512 && 0==(alignment%512)); // the alignment must be at least 512 and aligned with 512 to make DIRECT_IO happy. - BLOCK_ALLOCATOR XMALLOC(result); - result->reserve_at_beginning = reserve_at_beginning; - result->alignment = alignment; - result->n_blocks = 0; - result->blocks_array_size = 1; - XMALLOC_N(result->blocks_array_size, result->blocks_array); - result->n_bytes_in_use = reserve_at_beginning; - *ba = result; - VALIDATE(result); -} - -void -destroy_block_allocator (BLOCK_ALLOCATOR *bap) { - BLOCK_ALLOCATOR ba = *bap; - *bap = 0; - toku_free(ba->blocks_array); - toku_free(ba); -} - -static void -grow_blocks_array_by (BLOCK_ALLOCATOR ba, uint64_t n_to_add) { - if (ba->n_blocks + n_to_add > ba->blocks_array_size) { - uint64_t new_size = ba->n_blocks + n_to_add; - uint64_t at_least = ba->blocks_array_size * 2; - if (at_least > new_size) { - new_size = at_least; - } - ba->blocks_array_size = new_size; - XREALLOC_N(ba->blocks_array_size, ba->blocks_array); - } -} - - -static void -grow_blocks_array (BLOCK_ALLOCATOR ba) { - grow_blocks_array_by(ba, 1); -} - -void -block_allocator_merge_blockpairs_into (uint64_t d, struct block_allocator_blockpair dst[/*d*/], - uint64_t s, const struct block_allocator_blockpair src[/*s*/]) -{ - uint64_t tail = d+s; - while (d>0 && s>0) { - struct block_allocator_blockpair *dp = &dst[d-1]; - struct block_allocator_blockpair const *sp = &src[s-1]; - struct block_allocator_blockpair *tp = &dst[tail-1]; - assert(tail>0); - if (dp->offset > sp->offset) { - *tp = *dp; - d--; - tail--; - } else { - *tp = *sp; - s--; - tail--; - } - } - while (d>0) { - struct block_allocator_blockpair *dp = &dst[d-1]; - struct block_allocator_blockpair *tp = &dst[tail-1]; - *tp = *dp; - d--; - tail--; - } - while (s>0) { - struct block_allocator_blockpair const *sp = &src[s-1]; - struct block_allocator_blockpair *tp = &dst[tail-1]; - *tp = *sp; - s--; - tail--; - } -} - -static int -compare_blockpairs (const void *av, const void *bv) { - const struct block_allocator_blockpair *a = (const struct block_allocator_blockpair *) av; - const struct block_allocator_blockpair *b = (const struct block_allocator_blockpair *) bv; - if (a->offset < b->offset) return -1; - if (a->offset > b->offset) return +1; - return 0; -} - -void -block_allocator_alloc_blocks_at (BLOCK_ALLOCATOR ba, uint64_t n_blocks, struct block_allocator_blockpair pairs[/*n_blocks*/]) -// See the documentation in block_allocator.h -{ - VALIDATE(ba); - qsort(pairs, n_blocks, sizeof(*pairs), compare_blockpairs); - for (uint64_t i=0; i= ba->reserve_at_beginning); - assert(pairs[i].offset%ba->alignment == 0); - ba->n_bytes_in_use += pairs[i].size; - invariant(pairs[i].size > 0); //Allocator does not support size 0 blocks. See block_allocator_free_block. - } - grow_blocks_array_by(ba, n_blocks); - block_allocator_merge_blockpairs_into(ba->n_blocks, ba->blocks_array, - n_blocks, pairs); - ba->n_blocks += n_blocks; - VALIDATE(ba); -} - -void -block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t offset) { - struct block_allocator_blockpair p = {.offset = offset, .size=size}; - // Just do a linear search for the block. - // This data structure is a sorted array (no gaps or anything), so the search isn't really making this any slower than the insertion. - // To speed up the insertion when opening a file, we provide the block_allocator_alloc_blocks_at function. - block_allocator_alloc_blocks_at(ba, 1, &p); -} - -static inline uint64_t -align (uint64_t value, BLOCK_ALLOCATOR ba) -// Effect: align a value by rounding up. -{ - return ((value+ba->alignment-1)/ba->alignment)*ba->alignment; -} - -void block_allocator_alloc_block(BLOCK_ALLOCATOR ba, uint64_t size, uint64_t *offset) -// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512). -{ - invariant(size > 0); //Allocator does not support size 0 blocks. See block_allocator_free_block. - grow_blocks_array(ba); - ba->n_bytes_in_use += size; - if (ba->n_blocks==0) { - assert(ba->n_bytes_in_use == ba->reserve_at_beginning + size); // we know exactly how many are in use - ba->blocks_array[0].offset = align(ba->reserve_at_beginning, ba); - ba->blocks_array[0].size = size; - *offset = ba->blocks_array[0].offset; - ba->n_blocks++; - return; - } - // Implement first fit. - { - uint64_t end_of_reserve = align(ba->reserve_at_beginning, ba); - if (end_of_reserve + size <= ba->blocks_array[0].offset ) { - // Check to see if the space immediately after the reserve is big enough to hold the new block. - struct block_allocator_blockpair *bp = &ba->blocks_array[0]; - memmove(bp+1, bp, (ba->n_blocks)*sizeof(*bp)); - bp[0].offset = end_of_reserve; - bp[0].size = size; - ba->n_blocks++; - *offset = end_of_reserve; - VALIDATE(ba); - return; - } - } - for (uint64_t blocknum = 0; blocknum +1 < ba->n_blocks; blocknum ++) { - // Consider the space after blocknum - struct block_allocator_blockpair *bp = &ba->blocks_array[blocknum]; - uint64_t this_offset = bp[0].offset; - uint64_t this_size = bp[0].size; - uint64_t answer_offset = align(this_offset + this_size, ba); - if (answer_offset + size > bp[1].offset) continue; // The block we want doesn't fit after this block. - // It fits, so allocate it here. - memmove(bp+2, bp+1, (ba->n_blocks - blocknum -1)*sizeof(*bp)); - bp[1].offset = answer_offset; - bp[1].size = size; - ba->n_blocks++; - *offset = answer_offset; - VALIDATE(ba); - return; - } - // It didn't fit anywhere, so fit it on the end. - assert(ba->n_blocks < ba->blocks_array_size); - struct block_allocator_blockpair *bp = &ba->blocks_array[ba->n_blocks]; - uint64_t answer_offset = align(bp[-1].offset+bp[-1].size, ba); - bp->offset = answer_offset; - bp->size = size; - ba->n_blocks++; - *offset = answer_offset; - VALIDATE(ba); -} - -static int64_t -find_block (BLOCK_ALLOCATOR ba, uint64_t offset) -// Find the index in the blocks array that has a particular offset. Requires that the block exist. -// Use binary search so it runs fast. -{ - VALIDATE(ba); - if (ba->n_blocks==1) { - assert(ba->blocks_array[0].offset == offset); - return 0; - } - uint64_t lo = 0; - uint64_t hi = ba->n_blocks; - while (1) { - assert(loblocks_array[mid].offset; - //printf("lo=%" PRId64 " hi=%" PRId64 " mid=%" PRId64 " thisoff=%" PRId64 " offset=%" PRId64 "\n", lo, hi, mid, thisoff, offset); - if (thisoff < offset) { - lo = mid+1; - } else if (thisoff > offset) { - hi = mid; - } else { - return mid; - } - } -} - -// To support 0-sized blocks, we need to include size as an input to this function. -// All 0-sized blocks at the same offset can be considered identical, but -// a 0-sized block can share offset with a non-zero sized block. -// The non-zero sized block is not exchangable with a zero sized block (or vice versa), -// so inserting 0-sized blocks can cause corruption here. -void -block_allocator_free_block (BLOCK_ALLOCATOR ba, uint64_t offset) { - VALIDATE(ba); - int64_t bn = find_block(ba, offset); - assert(bn>=0); // we require that there is a block with that offset. Might as well abort if no such block exists. - ba->n_bytes_in_use -= ba->blocks_array[bn].size; - memmove(&ba->blocks_array[bn], &ba->blocks_array[bn+1], (ba->n_blocks-bn-1) * sizeof(struct block_allocator_blockpair)); - ba->n_blocks--; - VALIDATE(ba); -} - -uint64_t -block_allocator_block_size (BLOCK_ALLOCATOR ba, uint64_t offset) { - int64_t bn = find_block(ba, offset); - assert(bn>=0); // we require that there is a block with that offset. Might as well abort if no such block exists. - return ba->blocks_array[bn].size; -} - -uint64_t -block_allocator_allocated_limit (BLOCK_ALLOCATOR ba) { - if (ba->n_blocks==0) return ba->reserve_at_beginning; - else { - struct block_allocator_blockpair *last = &ba->blocks_array[ba->n_blocks-1]; - return last->offset + last->size; - } -} - -int -block_allocator_get_nth_block_in_layout_order (BLOCK_ALLOCATOR ba, uint64_t b, uint64_t *offset, uint64_t *size) -// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. -// Return the offset and size of the block with that number. -// Return 0 if there is a block that big, return nonzero if b is too big. -{ - if (b==0) { - *offset=0; - *size =ba->reserve_at_beginning; - return 0; - } else if (b > ba->n_blocks) { - return -1; - } else { - *offset=ba->blocks_array[b-1].offset; - *size =ba->blocks_array[b-1].size; - return 0; - } -} - -void -block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION report) { - //Requires: report->file_size_bytes is filled in - //Requires: report->data_bytes is filled in - //Requires: report->checkpoint_bytes_additional is filled in - - assert(ba->n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional); - - report->unused_bytes = 0; - report->unused_blocks = 0; - report->largest_unused_block = 0; - if (ba->n_blocks > 0) { - //Deal with space before block 0 and after reserve: - { - struct block_allocator_blockpair *bp = &ba->blocks_array[0]; - assert(bp->offset >= align(ba->reserve_at_beginning, ba)); - uint64_t free_space = bp->offset - align(ba->reserve_at_beginning, ba); - if (free_space > 0) { - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - - //Deal with space between blocks: - for (uint64_t blocknum = 0; blocknum +1 < ba->n_blocks; blocknum ++) { - // Consider the space after blocknum - struct block_allocator_blockpair *bp = &ba->blocks_array[blocknum]; - uint64_t this_offset = bp[0].offset; - uint64_t this_size = bp[0].size; - uint64_t end_of_this_block = align(this_offset+this_size, ba); - uint64_t next_offset = bp[1].offset; - uint64_t free_space = next_offset - end_of_this_block; - if (free_space > 0) { - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - - //Deal with space after last block - { - struct block_allocator_blockpair *bp = &ba->blocks_array[ba->n_blocks-1]; - uint64_t this_offset = bp[0].offset; - uint64_t this_size = bp[0].size; - uint64_t end_of_this_block = align(this_offset+this_size, ba); - if (end_of_this_block < report->file_size_bytes) { - uint64_t free_space = report->file_size_bytes - end_of_this_block; - assert(free_space > 0); - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - } - else { - //No blocks. Just the reserve. - uint64_t end_of_this_block = align(ba->reserve_at_beginning, ba); - if (end_of_this_block < report->file_size_bytes) { - uint64_t free_space = report->file_size_bytes - end_of_this_block; - assert(free_space > 0); - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/block_allocator.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/block_allocator.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/block_allocator.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/block_allocator.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,230 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef BLOCK_ALLOCATOR_H -#define BLOCK_ALLOCATOR_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fttypes.h" - - -#define BLOCK_ALLOCATOR_ALIGNMENT 4096 -// How much must be reserved at the beginning for the block? -// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root. -// So 4096 should be enough. -#define BLOCK_ALLOCATOR_HEADER_RESERVE 4096 -#if (BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT) != 0 -#error -#endif - -// Block allocator. -// Overview: A block allocator manages the allocation of variable-sized blocks. -// The translation of block numbers to addresses is handled elsewhere. -// The allocation of block numbers is handled elsewhere. - -// We can create a block allocator. -// When creating a block allocator we also specify a certain-sized -// block at the beginning that is preallocated (and cannot be allocated -// or freed) - -// We can allocate blocks of a particular size at a particular location. -// We can allocate blocks of a particular size at a location chosen by the allocator. -// We can free blocks. -// We can determine the size of a block. - - -#define BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE (2*BLOCK_ALLOCATOR_HEADER_RESERVE) - -typedef struct block_allocator *BLOCK_ALLOCATOR; - -void create_block_allocator (BLOCK_ALLOCATOR * ba, uint64_t reserve_at_beginning, uint64_t alignment); -// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. -// All blocks be start on a multiple of ALIGNMENT. -// Aborts if we run out of memory. -// Parameters -// ba (OUT): Result stored here. -// reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned. -// alignment (IN) Block alignment. - -void destroy_block_allocator (BLOCK_ALLOCATOR *ba); -// Effect: Destroy a block allocator at *ba. -// Also, set *ba=NULL. -// Rationale: If there was only one copy of the pointer, this kills that copy too. -// Paramaters: -// ba (IN/OUT): - - -void block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t offset); -// Effect: Allocate a block of the specified size at a particular offset. -// Aborts if anything goes wrong. -// The performance of this function may be as bad as Theta(N), where N is the number of blocks currently in use. -// Usage note: To allocate several blocks (e.g., when opening a BRT), use block_allocator_alloc_blocks_at(). -// Requires: The resulting block may not overlap any other allocated block. -// And the offset must be a multiple of the block alignment. -// Parameters: -// ba (IN/OUT): The block allocator. (Modifies ba.) -// size (IN): The size of the block. -// offset (IN): The location of the block. - - -struct block_allocator_blockpair { - uint64_t offset; - uint64_t size; -}; -void block_allocator_alloc_blocks_at (BLOCK_ALLOCATOR ba, uint64_t n_blocks, struct block_allocator_blockpair *pairs); -// Effect: Take pairs in any order, and add them all, as if we did block_allocator_alloc_block() on each pair. -// This should run in time O(N + M log M) where N is the number of blocks in ba, and M is the number of new blocks. -// Modifies: pairs (sorts them). - -void block_allocator_alloc_block (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t *offset); -// Effect: Allocate a block of the specified size at an address chosen by the allocator. -// Aborts if anything goes wrong. -// The block address will be a multiple of the alignment. -// Parameters: -// ba (IN/OUT): The block allocator. (Modifies ba.) -// size (IN): The size of the block. (The size does not have to be aligned.) -// offset (OUT): The location of the block. - -void block_allocator_free_block (BLOCK_ALLOCATOR ba, uint64_t offset); -// Effect: Free the block at offset. -// Requires: There must be a block currently allocated at that offset. -// Parameters: -// ba (IN/OUT): The block allocator. (Modifies ba.) -// offset (IN): The offset of the block. - - -uint64_t block_allocator_block_size (BLOCK_ALLOCATOR ba, uint64_t offset); -// Effect: Return the size of the block that starts at offset. -// Requires: There must be a block currently allocated at that offset. -// Parameters: -// ba (IN/OUT): The block allocator. (Modifies ba.) -// offset (IN): The offset of the block. - -void block_allocator_validate (BLOCK_ALLOCATOR ba); -// Effect: Check to see if the block allocator is OK. This may take a long time. -// Usage Hints: Probably only use this for unit tests. - -void block_allocator_print (BLOCK_ALLOCATOR ba); -// Effect: Print information about the block allocator. -// Rationale: This is probably useful only for debugging. - -uint64_t block_allocator_allocated_limit (BLOCK_ALLOCATOR ba); -// Effect: Return the unallocated block address of "infinite" size. -// That is, return the smallest address that is above all the allocated blocks. -// Rationale: When writing the root FIFO we don't know how big the block is. -// So we start at the "infinite" block, write the fifo, and then -// allocate_block_at of the correct size and offset to account for the root FIFO. - -int block_allocator_get_nth_block_in_layout_order (BLOCK_ALLOCATOR ba, uint64_t b, uint64_t *offset, uint64_t *size); -// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. -// Return the offset and size of the block with that number. -// Return 0 if there is a block that big, return nonzero if b is too big. -// Rationale: This is probably useful only for tests. - -void block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION report); -// Effect: Fill in report to indicate how the file is used. -// Requires: -// report->file_size_bytes is filled in -// report->data_bytes is filled in -// report->checkpoint_bytes_additional is filled in - -void block_allocator_merge_blockpairs_into (uint64_t d, struct block_allocator_blockpair dst[/*d*/], - uint64_t s, const struct block_allocator_blockpair src[/*s*/]); -// Effect: Merge dst[d] and src[s] into dst[d+s], merging in place. -// Initially dst and src hold sorted arrays (sorted by increasing offset). -// Finally dst contains all d+s elements sorted in order. -// Requires: -// dst and src are sorted. -// dst must be large enough. -// No blocks may overlap. -// Rationale: This is exposed so it can be tested by a glass box tester. Otherwise it would be static (file-scope) function inside block_allocator.c - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/block_table.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/block_table.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/block_table.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/block_table.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,1199 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include "ft-internal.h" // ugly but pragmatic, need access to dirty bits while holding translation lock -#include "fttypes.h" -#include "block_table.h" -#include "memory.h" -#include "toku_assert.h" -#include -#include "block_allocator.h" -#include "rbuf.h" -#include "wbuf.h" -#include - -//When the translation (btt) is stored on disk: -// In Header: -// size_on_disk -// location_on_disk -// In block translation table (in order): -// smallest_never_used_blocknum -// blocknum_freelist_head -// array -// a checksum -struct translation { //This is the BTT (block translation table) - enum translation_type type; - int64_t length_of_array; //Number of elements in array (block_translation). always >= smallest_never_used_blocknum - BLOCKNUM smallest_never_used_blocknum; - BLOCKNUM blocknum_freelist_head; // next (previously used) unused blocknum (free list) - struct block_translation_pair *block_translation; - - // Where and how big is the block translation vector stored on disk. - // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size - // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff -}; - -static const BLOCKNUM freelist_null = {-1}; // in a freelist, this indicates end of list -static const DISKOFF size_is_free = (DISKOFF)-1; // value of block_translation_pair.size if blocknum is unused -static const DISKOFF diskoff_unused = (DISKOFF)-2; // value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock - -/******** - * There are three copies of the translation table (btt) in the block table: - * - * checkpointed Is initialized by deserializing from disk, - * and is the only version ever read from disk. - * When read from disk it is copied to current. - * It is immutable. It can be replaced by an inprogress btt. - * - * inprogress Is only filled by copying from current, - * and is the only version ever serialized to disk. - * (It is serialized to disk on checkpoint and clean shutdown.) - * At end of checkpoint it replaces 'checkpointed'. - * During a checkpoint, any 'pending' dirty writes will update - * inprogress. - * - * current Is initialized by copying from checkpointed, - * is the only version ever modified while the database is in use, - * and is the only version ever copied to inprogress. - * It is never stored on disk. - ********/ - - -struct block_table { - struct translation current; // The current translation is the one used by client threads. It is not represented on disk. - struct translation inprogress; // the translation used by the checkpoint currently in progress. If the checkpoint thread allocates a block, it must also update the current translation. - struct translation checkpointed; // the translation for the data that shall remain inviolate on disk until the next checkpoint finishes, after which any blocks used only in this translation can be freed. - - // The in-memory data structure for block allocation. There is no on-disk data structure for block allocation. - // Note: This is *allocation* not *translation*. The block_allocator is unaware of which blocks are used for which translation, but simply allocates and deallocates blocks. - BLOCK_ALLOCATOR block_allocator; - toku_mutex_t mutex; - struct nb_mutex safe_file_size_lock; - bool checkpoint_skipped; - uint64_t safe_file_size; -}; - -//forward decls -static int64_t calculate_size_on_disk (struct translation *t); -static inline bool translation_prevents_freeing (struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair); -static inline void lock_for_blocktable (BLOCK_TABLE bt); -static inline void unlock_for_blocktable (BLOCK_TABLE bt); - - - -static void -ft_set_dirty(FT ft, bool for_checkpoint){ - toku_mutex_assert_locked(&ft->blocktable->mutex); - paranoid_invariant(ft->h->type == FT_CURRENT); - if (for_checkpoint) { - paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); - ft->checkpoint_header->dirty = 1; - } - else { - ft->h->dirty = 1; - } -} - -static void -maybe_truncate_file(BLOCK_TABLE bt, int fd, uint64_t size_needed_before) { - toku_mutex_assert_locked(&bt->mutex); - uint64_t new_size_needed = block_allocator_allocated_limit(bt->block_allocator); - //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful. - if (new_size_needed < size_needed_before && new_size_needed < bt->safe_file_size) { - nb_mutex_lock(&bt->safe_file_size_lock, &bt->mutex); - - // Must hold safe_file_size_lock to change safe_file_size. - if (new_size_needed < bt->safe_file_size) { - int64_t safe_file_size_before = bt->safe_file_size; - // Not safe to use the 'to-be-truncated' portion until truncate is done. - bt->safe_file_size = new_size_needed; - unlock_for_blocktable(bt); - - uint64_t size_after; - toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after); - lock_for_blocktable(bt); - - bt->safe_file_size = size_after; - } - nb_mutex_unlock(&bt->safe_file_size_lock); - } -} - -void -toku_maybe_truncate_file_on_open(BLOCK_TABLE bt, int fd) { - lock_for_blocktable(bt); - maybe_truncate_file(bt, fd, bt->safe_file_size); - unlock_for_blocktable(bt); -} - - -static void -copy_translation(struct translation * dst, struct translation * src, enum translation_type newtype) { - paranoid_invariant(src->length_of_array >= src->smallest_never_used_blocknum.b); //verify invariant - paranoid_invariant(newtype==TRANSLATION_DEBUG || - (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) || - (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT)); - dst->type = newtype; - dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum; - dst->blocknum_freelist_head = src->blocknum_freelist_head; - // destination btt is of fixed size. Allocate+memcpy the exact length necessary. - dst->length_of_array = dst->smallest_never_used_blocknum.b; - XMALLOC_N(dst->length_of_array, dst->block_translation); - memcpy(dst->block_translation, - src->block_translation, - dst->length_of_array * sizeof(*dst->block_translation)); - //New version of btt is not yet stored on disk. - dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0; - dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused; -} - -int64_t -toku_block_get_blocks_in_use_unlocked(BLOCK_TABLE bt) { - BLOCKNUM b; - struct translation *t = &bt->current; - int64_t num_blocks = 0; - { - //Reserved blocknums do not get upgraded; They are part of the header. - for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { - if (t->block_translation[b.b].size != size_is_free) { - num_blocks++; - } - } - } - return num_blocks; -} - -static void -maybe_optimize_translation(struct translation *t) { - //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just - //on a free list. Doing so requires us to regenerate the free list. - //This is O(n) work, so do it only if you're already doing that. - - BLOCKNUM b; - paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); - //Calculate how large the free suffix is. - int64_t freed; - { - for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) { - if (t->block_translation[b.b-1].size != size_is_free) { - break; - } - } - freed = t->smallest_never_used_blocknum.b - b.b; - } - if (freed>0) { - t->smallest_never_used_blocknum.b = b.b; - if (t->length_of_array/4 > t->smallest_never_used_blocknum.b) { - //We're using more memory than necessary to represent this now. Reduce. - uint64_t new_length = t->smallest_never_used_blocknum.b * 2; - XREALLOC_N(new_length, t->block_translation); - t->length_of_array = new_length; - //No need to zero anything out. - } - - //Regenerate free list. - t->blocknum_freelist_head.b = freelist_null.b; - for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { - if (t->block_translation[b.b].size == size_is_free) { - t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; - t->blocknum_freelist_head = b; - } - } - } -} - -// block table must be locked by caller of this function -void -toku_block_translation_note_start_checkpoint_unlocked (BLOCK_TABLE bt) { - toku_mutex_assert_locked(&bt->mutex); - // Copy current translation to inprogress translation. - paranoid_invariant(bt->inprogress.block_translation == NULL); - //We're going to do O(n) work to copy the translation, so we - //can afford to do O(n) work by optimizing the translation - maybe_optimize_translation(&bt->current); - copy_translation(&bt->inprogress, &bt->current, TRANSLATION_INPROGRESS); - - bt->checkpoint_skipped = false; -} - -//#define PRNTF(str, b, siz, ad, bt) printf("%s[%d] %s %" PRId64 " %" PRId64 " %" PRId64 "\n", __FUNCTION__, __LINE__, str, b, siz, ad); fflush(stdout); if (bt) block_allocator_validate(((BLOCK_TABLE)(bt))->block_allocator); -//Debugging function -#define PRNTF(str, b, siz, ad, bt) - -void toku_block_translation_note_skipped_checkpoint (BLOCK_TABLE bt) { - //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header - lock_for_blocktable(bt); - paranoid_invariant_notnull(bt->inprogress.block_translation); - bt->checkpoint_skipped = true; - unlock_for_blocktable(bt); -} - -// Purpose: free any disk space used by previous checkpoint that isn't in use by either -// - current state -// - in-progress checkpoint -// capture inprogress as new checkpointed. -// For each entry in checkpointBTT -// if offset does not match offset in inprogress -// assert offset does not match offset in current -// free (offset,len) from checkpoint -// move inprogress to checkpoint (resetting type) -// inprogress = NULL -void -toku_block_translation_note_end_checkpoint (BLOCK_TABLE bt, int fd) { - // Free unused blocks - lock_for_blocktable(bt); - uint64_t allocated_limit_at_start = block_allocator_allocated_limit(bt->block_allocator); - paranoid_invariant_notnull(bt->inprogress.block_translation); - if (bt->checkpoint_skipped) { - toku_free(bt->inprogress.block_translation); - memset(&bt->inprogress, 0, sizeof(bt->inprogress)); - goto end; - } - - //Make certain inprogress was allocated space on disk - assert(bt->inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0); - assert(bt->inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0); - - { - int64_t i; - struct translation *t = &bt->checkpointed; - - for (i = 0; i < t->length_of_array; i++) { - struct block_translation_pair *pair = &t->block_translation[i]; - if (pair->size > 0 && !translation_prevents_freeing(&bt->inprogress, make_blocknum(i), pair)) { - assert(!translation_prevents_freeing(&bt->current, make_blocknum(i), pair)); - PRNTF("free", i, pair->size, pair->u.diskoff, bt); - block_allocator_free_block(bt->block_allocator, pair->u.diskoff); - } - } - toku_free(bt->checkpointed.block_translation); - bt->checkpointed = bt->inprogress; - bt->checkpointed.type = TRANSLATION_CHECKPOINTED; - memset(&bt->inprogress, 0, sizeof(bt->inprogress)); - maybe_truncate_file(bt, fd, allocated_limit_at_start); - } -end: - unlock_for_blocktable(bt); -} - -__attribute__((nonnull,const)) -static inline bool -is_valid_blocknum(struct translation *t, BLOCKNUM b) { - //Sanity check: Verify invariant - paranoid_invariant(t->length_of_array >= t->smallest_never_used_blocknum.b); - return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b; -} - -static inline void -verify_valid_blocknum (struct translation *UU(t), BLOCKNUM UU(b)) { - paranoid_invariant(is_valid_blocknum(t, b)); -} - -__attribute__((nonnull,const)) -static inline bool -is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) { - //Sanity check: Verify invariant - paranoid_invariant(t->length_of_array >= t->smallest_never_used_blocknum.b); - return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b; -} - -//Can be freed -static inline void -verify_valid_freeable_blocknum (struct translation *UU(t), BLOCKNUM UU(b)) { - paranoid_invariant(is_valid_freeable_blocknum(t, b)); -} - -static void -blocktable_lock_init (BLOCK_TABLE bt) { - memset(&bt->mutex, 0, sizeof(bt->mutex)); - toku_mutex_init(&bt->mutex, NULL); -} - -static void -blocktable_lock_destroy (BLOCK_TABLE bt) { - toku_mutex_destroy(&bt->mutex); -} - -static inline void -lock_for_blocktable (BLOCK_TABLE bt) { - // Locks the blocktable_mutex. - toku_mutex_lock(&bt->mutex); -} - -static inline void -unlock_for_blocktable (BLOCK_TABLE bt) { - toku_mutex_unlock(&bt->mutex); -} - -void -toku_ft_lock (FT ft) { - BLOCK_TABLE bt = ft->blocktable; - lock_for_blocktable(bt); -} - -void -toku_ft_unlock (FT ft) { - BLOCK_TABLE bt = ft->blocktable; - toku_mutex_assert_locked(&bt->mutex); - unlock_for_blocktable(bt); -} - -// Also used only in ft-serialize-test. -void -toku_block_free(BLOCK_TABLE bt, uint64_t offset) { - lock_for_blocktable(bt); -PRNTF("freeSOMETHINGunknown", 0L, 0L, offset, bt); - block_allocator_free_block(bt->block_allocator, offset); - unlock_for_blocktable(bt); -} - -static int64_t -calculate_size_on_disk (struct translation *t) { - int64_t r = (8 + // smallest_never_used_blocknum - 8 + // blocknum_freelist_head - t->smallest_never_used_blocknum.b * 16 + // Array - 4); // 4 for checksum - return r; -} - -// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table. -static inline bool -translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) { - return (t->block_translation && - b.b < t->smallest_never_used_blocknum.b && - old_pair->u.diskoff == t->block_translation[b.b].u.diskoff); -} - -static void -blocknum_realloc_on_disk_internal (BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint) { - toku_mutex_assert_locked(&bt->mutex); - ft_set_dirty(ft, for_checkpoint); - - struct translation *t = &bt->current; - struct block_translation_pair old_pair = t->block_translation[b.b]; -PRNTF("old", b.b, old_pair.size, old_pair.u.diskoff, bt); - //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint - bool cannot_free = (bool) - ((!for_checkpoint && translation_prevents_freeing(&bt->inprogress, b, &old_pair)) || - translation_prevents_freeing(&bt->checkpointed, b, &old_pair)); - if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) { -PRNTF("Freed", b.b, old_pair.size, old_pair.u.diskoff, bt); - block_allocator_free_block(bt->block_allocator, old_pair.u.diskoff); - } - - uint64_t allocator_offset = diskoff_unused; - t->block_translation[b.b].size = size; - if (size > 0) { - // Allocate a new block if the size is greater than 0, - // if the size is just 0, offset will be set to diskoff_unused - block_allocator_alloc_block(bt->block_allocator, size, &allocator_offset); - } - t->block_translation[b.b].u.diskoff = allocator_offset; - *offset = allocator_offset; - -PRNTF("New", b.b, t->block_translation[b.b].size, t->block_translation[b.b].u.diskoff, bt); - //Update inprogress btt if appropriate (if called because Pending bit is set). - if (for_checkpoint) { - paranoid_invariant(b.b < bt->inprogress.length_of_array); - bt->inprogress.block_translation[b.b] = t->block_translation[b.b]; - } -} - -static void -ensure_safe_write_unlocked(BLOCK_TABLE bt, int fd, DISKOFF block_size, DISKOFF block_offset) { - // Requires: holding bt->mutex - uint64_t size_needed = block_size + block_offset; - if (size_needed > bt->safe_file_size) { - // Must hold safe_file_size_lock to change safe_file_size. - nb_mutex_lock(&bt->safe_file_size_lock, &bt->mutex); - if (size_needed > bt->safe_file_size) { - unlock_for_blocktable(bt); - - int64_t size_after; - toku_maybe_preallocate_in_file(fd, size_needed, bt->safe_file_size, &size_after); - - lock_for_blocktable(bt); - bt->safe_file_size = size_after; - } - nb_mutex_unlock(&bt->safe_file_size_lock); - } -} - -void -toku_blocknum_realloc_on_disk (BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint) { - lock_for_blocktable(bt); - struct translation *t = &bt->current; - verify_valid_freeable_blocknum(t, b); - blocknum_realloc_on_disk_internal(bt, b, size, offset, ft, for_checkpoint); - - ensure_safe_write_unlocked(bt, fd, size, *offset); - unlock_for_blocktable(bt); -} - -__attribute__((nonnull,const)) -static inline bool -pair_is_unallocated(struct block_translation_pair *pair) { - return pair->size == 0 && pair->u.diskoff == diskoff_unused; -} - -static void blocknum_alloc_translation_on_disk_unlocked(BLOCK_TABLE bt) -// Effect: figure out where to put the inprogress btt on disk, allocate space for it there. -// The space must be 512-byte aligned (both the starting address and the size). -// As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt. -{ - toku_mutex_assert_locked(&bt->mutex); - - struct translation *t = &bt->inprogress; - paranoid_invariant_notnull(t->block_translation); - BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); - //Each inprogress is allocated only once - paranoid_invariant(pair_is_unallocated(&t->block_translation[b.b])); - - //Allocate a new block - int64_t size = calculate_size_on_disk(t); - uint64_t offset; - block_allocator_alloc_block(bt->block_allocator, size, &offset); -PRNTF("blokAllokator", 1L, size, offset, bt); - t->block_translation[b.b].u.diskoff = offset; - t->block_translation[b.b].size = size; -} - -void toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, int fd, struct wbuf *w, - int64_t *address, int64_t *size) -// Effect: Fills wbuf (which starts uninitialized) with bt -// A clean shutdown runs checkpoint start so that current and inprogress are copies. -// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd) -// The address is guaranteed to be 512-byte aligned, but the size is not guaranteed. -// It *is* guaranteed that we can read up to the next 512-byte boundary, however -{ - lock_for_blocktable(bt); - struct translation *t = &bt->inprogress; - - BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); - blocknum_alloc_translation_on_disk_unlocked(bt); // The allocated block must be 512-byte aligned to make O_DIRECT happy. - uint64_t size_translation = calculate_size_on_disk(t); - uint64_t size_aligned = roundup_to_multiple(512, size_translation); - assert((int64_t)size_translation==t->block_translation[b.b].size); - { - //Init wbuf - if (0) - printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff); - char *XMALLOC_N_ALIGNED(512, size_aligned, buf); - for (uint64_t i=size_translation; ismallest_never_used_blocknum); - wbuf_BLOCKNUM(w, t->blocknum_freelist_head); - int64_t i; - for (i=0; ismallest_never_used_blocknum.b; i++) { - if (0) - printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size); - wbuf_DISKOFF(w, t->block_translation[i].u.diskoff); - wbuf_DISKOFF(w, t->block_translation[i].size); - } - uint32_t checksum = toku_x1764_finish(&w->checksum); - wbuf_int(w, checksum); - *address = t->block_translation[b.b].u.diskoff; - *size = size_translation; - assert((*address)%512 == 0); - - ensure_safe_write_unlocked(bt, fd, size_aligned, *address); - unlock_for_blocktable(bt); -} - - -// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) -static void -translate_blocknum_to_offset_size_unlocked(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { - struct translation *t = &bt->current; - verify_valid_blocknum(t, b); - if (offset) *offset = t->block_translation[b.b].u.diskoff; - if (size) *size = t->block_translation[b.b].size; -} - -// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) -void -toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { - lock_for_blocktable(bt); - translate_blocknum_to_offset_size_unlocked(bt, b, offset, size); - unlock_for_blocktable(bt); -} - -//Only called by toku_allocate_blocknum -static void -maybe_expand_translation (struct translation *t) { -// Effect: expand the array to maintain size invariant -// given that one more never-used blocknum will soon be used. - if (t->length_of_array <= t->smallest_never_used_blocknum.b) { - //expansion is necessary - uint64_t new_length = t->smallest_never_used_blocknum.b * 2; - XREALLOC_N(new_length, t->block_translation); - uint64_t i; - for (i = t->length_of_array; i < new_length; i++) { - t->block_translation[i].u.next_free_blocknum = freelist_null; - t->block_translation[i].size = size_is_free; - } - t->length_of_array = new_length; - } -} - -void -toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, FT ft) { - toku_mutex_assert_locked(&bt->mutex); - BLOCKNUM result; - struct translation * t = &bt->current; - if (t->blocknum_freelist_head.b == freelist_null.b) { - // no previously used blocknums are available - // use a never used blocknum - maybe_expand_translation(t); //Ensure a never used blocknums is available - result = t->smallest_never_used_blocknum; - t->smallest_never_used_blocknum.b++; - } else { // reuse a previously used blocknum - result = t->blocknum_freelist_head; - BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum; - t->blocknum_freelist_head = next; - } - //Verify the blocknum is free - paranoid_invariant(t->block_translation[result.b].size == size_is_free); - //blocknum is not free anymore - t->block_translation[result.b].u.diskoff = diskoff_unused; - t->block_translation[result.b].size = 0; - verify_valid_freeable_blocknum(t, result); - *res = result; - ft_set_dirty(ft, false); -} - -void -toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, FT ft) { - lock_for_blocktable(bt); - toku_allocate_blocknum_unlocked(bt, res, ft); - unlock_for_blocktable(bt); -} - -static void -free_blocknum_in_translation(struct translation *t, BLOCKNUM b) -{ - verify_valid_freeable_blocknum(t, b); - paranoid_invariant(t->block_translation[b.b].size != size_is_free); - - PRNTF("free_blocknum", b.b, t->block_translation[b.b].size, t->block_translation[b.b].u.diskoff, bt); - t->block_translation[b.b].size = size_is_free; - t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; - t->blocknum_freelist_head = b; -} - -static void -free_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *bp, FT ft, bool for_checkpoint) { -// Effect: Free a blocknum. -// If the blocknum holds the only reference to a block on disk, free that block - toku_mutex_assert_locked(&bt->mutex); - BLOCKNUM b = *bp; - bp->b = 0; //Remove caller's reference. - - struct block_translation_pair old_pair = bt->current.block_translation[b.b]; - - free_blocknum_in_translation(&bt->current, b); - if (for_checkpoint) { - paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); - free_blocknum_in_translation(&bt->inprogress, b); - } - - //If the size is 0, no disk block has ever been assigned to this blocknum. - if (old_pair.size > 0) { - //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint - bool cannot_free = (bool) - (translation_prevents_freeing(&bt->inprogress, b, &old_pair) || - translation_prevents_freeing(&bt->checkpointed, b, &old_pair)); - if (!cannot_free) { -PRNTF("free_blocknum_free", b.b, old_pair.size, old_pair.u.diskoff, bt); - block_allocator_free_block(bt->block_allocator, old_pair.u.diskoff); - } - } - else { - paranoid_invariant(old_pair.size==0); - paranoid_invariant(old_pair.u.diskoff == diskoff_unused); - } - ft_set_dirty(ft, for_checkpoint); -} - -void -toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *bp, FT ft, bool for_checkpoint) { - lock_for_blocktable(bt); - free_blocknum_unlocked(bt, bp, ft, for_checkpoint); - unlock_for_blocktable(bt); -} - -//Verify there are no free blocks. -void -toku_block_verify_no_free_blocknums(BLOCK_TABLE UU(bt)) { - paranoid_invariant(bt->current.blocknum_freelist_head.b == freelist_null.b); -} - -// Frees blocknums that have a size of 0 and unused diskoff -// Currently used for eliminating unused cached rollback log nodes -void -toku_free_unused_blocknums(BLOCK_TABLE bt, BLOCKNUM root) { - lock_for_blocktable(bt); - int64_t smallest = bt->current.smallest_never_used_blocknum.b; - for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) { - if (i == root.b) { - continue; - } - BLOCKNUM b = make_blocknum(i); - if (bt->current.block_translation[b.b].size == 0) { - invariant(bt->current.block_translation[b.b].u.diskoff == diskoff_unused); - free_blocknum_in_translation(&bt->current, b); - } - } - unlock_for_blocktable(bt); -} - -__attribute__((nonnull,const,unused)) -static inline bool -no_data_blocks_except_root(BLOCK_TABLE bt, BLOCKNUM root) { - bool ok = true; - lock_for_blocktable(bt); - int64_t smallest = bt->current.smallest_never_used_blocknum.b; - if (root.b < RESERVED_BLOCKNUMS) { - ok = false; - goto cleanup; - } - int64_t i; - for (i=RESERVED_BLOCKNUMS; i < smallest; i++) { - if (i == root.b) { - continue; - } - BLOCKNUM b = make_blocknum(i); - if (bt->current.block_translation[b.b].size != size_is_free) { - ok = false; - goto cleanup; - } - } - cleanup: - unlock_for_blocktable(bt); - return ok; -} - -//Verify there are no data blocks except root. -// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now. -void -toku_block_verify_no_data_blocks_except_root(BLOCK_TABLE UU(bt), BLOCKNUM UU(root)) { - paranoid_invariant(no_data_blocks_except_root(bt, root)); -} - -__attribute__((nonnull,const,unused)) -static inline bool -blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b) { - lock_for_blocktable(bt); - struct translation *t = &bt->current; - verify_valid_blocknum(t, b); - bool ok = t->block_translation[b.b].size != size_is_free; - unlock_for_blocktable(bt); - return ok; -} - -//Verify a blocknum is currently allocated. -void -toku_verify_blocknum_allocated(BLOCK_TABLE UU(bt), BLOCKNUM UU(b)) { - paranoid_invariant(blocknum_allocated(bt, b)); -} - -//Only used by toku_dump_translation table (debug info) -static void -dump_translation(FILE *f, struct translation *t) { - if (t->block_translation) { - BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); - fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array); - fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b); - fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b); - fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size); - fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff); - int64_t i; - for (i=0; ilength_of_array; i++) { - fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); - } - fprintf(f, "\n"); - } - else fprintf(f, " does not exist\n"); -} - -//Only used by toku_ft_dump which is only for debugging purposes -// "pretty" just means we use tabs so we can parse output easier later -void -toku_dump_translation_table_pretty(FILE *f, BLOCK_TABLE bt) { - lock_for_blocktable(bt); - struct translation *t = &bt->checkpointed; - assert(t->block_translation != nullptr); - for (int64_t i = 0; i < t->length_of_array; ++i) { - fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); - } - unlock_for_blocktable(bt); -} - -//Only used by toku_ft_dump which is only for debugging purposes -void -toku_dump_translation_table(FILE *f, BLOCK_TABLE bt) { - lock_for_blocktable(bt); - fprintf(f, "Current block translation:"); - dump_translation(f, &bt->current); - fprintf(f, "Checkpoint in progress block translation:"); - dump_translation(f, &bt->inprogress); - fprintf(f, "Checkpointed block translation:"); - dump_translation(f, &bt->checkpointed); - unlock_for_blocktable(bt); -} - -//Only used by ftdump -void -toku_blocknum_dump_translation(BLOCK_TABLE bt, BLOCKNUM b) { - lock_for_blocktable(bt); - - struct translation *t = &bt->current; - if (b.b < t->length_of_array) { - struct block_translation_pair *bx = &t->block_translation[b.b]; - printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size); - } - unlock_for_blocktable(bt); -} - - -//Must not call this function when anything else is using the blocktable. -//No one may use the blocktable afterwards. -void -toku_blocktable_destroy(BLOCK_TABLE *btp) { - BLOCK_TABLE bt = *btp; - *btp = NULL; - if (bt->current.block_translation) toku_free(bt->current.block_translation); - if (bt->inprogress.block_translation) toku_free(bt->inprogress.block_translation); - if (bt->checkpointed.block_translation) toku_free(bt->checkpointed.block_translation); - - destroy_block_allocator(&bt->block_allocator); - blocktable_lock_destroy(bt); - nb_mutex_destroy(&bt->safe_file_size_lock); - toku_free(bt); -} - - -static BLOCK_TABLE -blocktable_create_internal (void) { -// Effect: Fill it in, including the translation table, which is uninitialized - BLOCK_TABLE XCALLOC(bt); - blocktable_lock_init(bt); - nb_mutex_init(&bt->safe_file_size_lock); - - //There are two headers, so we reserve space for two. - uint64_t reserve_per_header = BLOCK_ALLOCATOR_HEADER_RESERVE; - - //Must reserve in multiples of BLOCK_ALLOCATOR_ALIGNMENT - //Round up the per-header usage if necessary. - //We want each header aligned. - uint64_t remainder = BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT; - if (remainder!=0) { - reserve_per_header += BLOCK_ALLOCATOR_ALIGNMENT; - reserve_per_header -= remainder; - } - assert(2*reserve_per_header == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - create_block_allocator(&bt->block_allocator, - BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, - BLOCK_ALLOCATOR_ALIGNMENT); - return bt; -} - - - -static void -translation_default(struct translation *t) { // destination into which to create a default translation - t->type = TRANSLATION_CHECKPOINTED; - t->smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS); - t->length_of_array = t->smallest_never_used_blocknum.b; - t->blocknum_freelist_head = freelist_null; - XMALLOC_N(t->length_of_array, t->block_translation); - int64_t i; - for (i = 0; i < t->length_of_array; i++) { - t->block_translation[i].size = 0; - t->block_translation[i].u.diskoff = diskoff_unused; - } -} - - -static int -translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize - DISKOFF location_on_disk, //Location of translation_buffer - uint64_t size_on_disk, - unsigned char * translation_buffer) { // buffer with serialized translation - int r = 0; - assert(location_on_disk!=0); - t->type = TRANSLATION_CHECKPOINTED; - { - // check the checksum - uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4); - uint64_t offset = size_on_disk - 4; - //printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, block_translation_address_on_disk, offset, block_translation_size_on_disk); - uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset)); - if (x1764 != stored_x1764) { - fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764); - r = TOKUDB_BAD_CHECKSUM; - goto exit; - } - } - struct rbuf rt; - rt.buf = translation_buffer; - rt.ndone = 0; - rt.size = size_on_disk-4;//4==checksum - - t->smallest_never_used_blocknum = rbuf_blocknum(&rt); - t->length_of_array = t->smallest_never_used_blocknum.b; - assert(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); - t->blocknum_freelist_head = rbuf_blocknum(&rt); - XMALLOC_N(t->length_of_array, t->block_translation); - int64_t i; - for (i=0; i < t->length_of_array; i++) { - t->block_translation[i].u.diskoff = rbuf_diskoff(&rt); - t->block_translation[i].size = rbuf_diskoff(&rt); -PRNTF("ReadIn", i, t->block_translation[i].size, t->block_translation[i].u.diskoff, NULL); - } - assert(calculate_size_on_disk(t) == (int64_t)size_on_disk); - assert(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t)size_on_disk); - assert(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk); -exit: - return r; -} - -// We just initialized a translation, inform block allocator to reserve space for each blocknum in use. -static void -blocktable_note_translation (BLOCK_ALLOCATOR allocator, struct translation *t) { - //This is where the space for them will be reserved (in addition to normal blocks). - //See RESERVED_BLOCKNUMS - - // Previously this added blocks one at a time. Now we make an array and pass it in so it can be sorted and merged. See #3218. - struct block_allocator_blockpair *XMALLOC_N(t->smallest_never_used_blocknum.b, pairs); - uint64_t n_pairs = 0; - for (int64_t i=0; ismallest_never_used_blocknum.b; i++) { - struct block_translation_pair pair = t->block_translation[i]; - if (pair.size > 0) { - paranoid_invariant(pair.u.diskoff != diskoff_unused); - int cur_pair = n_pairs++; - pairs[cur_pair] = (struct block_allocator_blockpair) { .offset = (uint64_t) pair.u.diskoff, - .size = (uint64_t) pair.size }; - } - } - block_allocator_alloc_blocks_at(allocator, n_pairs, pairs); - toku_free(pairs); -} - - -// Fill in the checkpointed translation from buffer, and copy checkpointed to current. -// The one read from disk is the last known checkpointed one, so we are keeping it in -// place and then setting current (which is never stored on disk) for current use. -// The translation_buffer has translation only, we create the rest of the block_table. -int -toku_blocktable_create_from_buffer(int fd, - BLOCK_TABLE *btp, - DISKOFF location_on_disk, //Location of translation_buffer - DISKOFF size_on_disk, - unsigned char *translation_buffer) { - BLOCK_TABLE bt = blocktable_create_internal(); - int r = translation_deserialize_from_buffer(&bt->checkpointed, location_on_disk, size_on_disk, translation_buffer); - if (r != 0) { - goto exit; - } - blocktable_note_translation(bt->block_allocator, &bt->checkpointed); - // we just filled in checkpointed, now copy it to current. - copy_translation(&bt->current, &bt->checkpointed, TRANSLATION_CURRENT); - - int64_t file_size; - r = toku_os_get_file_size(fd, &file_size); - lazy_assert_zero(r); - invariant(file_size >= 0); - bt->safe_file_size = file_size; - - *btp = bt; -exit: - return r; -} - - -void -toku_blocktable_create_new(BLOCK_TABLE *btp) { - BLOCK_TABLE bt = blocktable_create_internal(); - translation_default(&bt->checkpointed); // create default btt (empty except for reserved blocknums) - blocktable_note_translation(bt->block_allocator, &bt->checkpointed); - // we just created a default checkpointed, now copy it to current. - copy_translation(&bt->current, &bt->checkpointed, TRANSLATION_CURRENT); - - *btp = bt; -} - -int -toku_blocktable_iterate (BLOCK_TABLE bt, enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) { - struct translation *src; - - int r = 0; - switch (type) { - case TRANSLATION_CURRENT: src = &bt->current; break; - case TRANSLATION_INPROGRESS: src = &bt->inprogress; break; - case TRANSLATION_CHECKPOINTED: src = &bt->checkpointed; break; - default: r = EINVAL; break; - } - struct translation fakecurrent; - struct translation *t = &fakecurrent; - if (r==0) { - lock_for_blocktable(bt); - copy_translation(t, src, TRANSLATION_DEBUG); - t->block_translation[RESERVED_BLOCKNUM_TRANSLATION] = - src->block_translation[RESERVED_BLOCKNUM_TRANSLATION]; - unlock_for_blocktable(bt); - int64_t i; - for (i=0; ismallest_never_used_blocknum.b; i++) { - struct block_translation_pair pair = t->block_translation[i]; - if (data_only && i< RESERVED_BLOCKNUMS) continue; - if (used_only && pair.size <= 0) continue; - r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra); - if (r!=0) break; - } - toku_free(t->block_translation); - } - return r; -} - -typedef struct { - int64_t used_space; - int64_t total_space; -} frag_extra; - -static int -frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) { - frag_extra *info = (frag_extra *) extra; - - if (size + address > info->total_space) - info->total_space = size + address; - info->used_space += size; - return 0; -} - -void -toku_blocktable_internal_fragmentation (BLOCK_TABLE bt, int64_t *total_sizep, int64_t *used_sizep) { - frag_extra info = {0,0}; - int r = toku_blocktable_iterate(bt, TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true); - assert_zero(r); - - if (total_sizep) *total_sizep = info.total_space; - if (used_sizep) *used_sizep = info.used_space; -} - -void -toku_realloc_descriptor_on_disk_unlocked(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT ft) { - toku_mutex_assert_locked(&bt->mutex); - BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); - blocknum_realloc_on_disk_internal(bt, b, size, offset, ft, false); -} - -void -toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT ft, int fd) { - lock_for_blocktable(bt); - toku_realloc_descriptor_on_disk_unlocked(bt, size, offset, ft); - - ensure_safe_write_unlocked(bt, fd, size, *offset); - unlock_for_blocktable(bt); -} - -void -toku_get_descriptor_offset_size(BLOCK_TABLE bt, DISKOFF *offset, DISKOFF *size) { - lock_for_blocktable(bt); - BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); - translate_blocknum_to_offset_size_unlocked(bt, b, offset, size); - unlock_for_blocktable(bt); -} - -void -toku_block_table_get_fragmentation_unlocked(BLOCK_TABLE bt, TOKU_DB_FRAGMENTATION report) { - //Requires: blocktable lock is held. - //Requires: report->file_size_bytes is already filled in. - - //Count the headers. - report->data_bytes = BLOCK_ALLOCATOR_HEADER_RESERVE; - report->data_blocks = 1; - report->checkpoint_bytes_additional = BLOCK_ALLOCATOR_HEADER_RESERVE; - report->checkpoint_blocks_additional = 1; - - struct translation *current = &bt->current; - int64_t i; - for (i = 0; i < current->length_of_array; i++) { - struct block_translation_pair *pair = ¤t->block_translation[i]; - if (pair->size > 0) { - report->data_bytes += pair->size; - report->data_blocks++; - } - } - struct translation *checkpointed = &bt->checkpointed; - for (i = 0; i < checkpointed->length_of_array; i++) { - struct block_translation_pair *pair = &checkpointed->block_translation[i]; - if (pair->size > 0 && - !(i < current->length_of_array && - current->block_translation[i].size > 0 && - current->block_translation[i].u.diskoff == pair->u.diskoff) - ) { - report->checkpoint_bytes_additional += pair->size; - report->checkpoint_blocks_additional++; - } - } - struct translation *inprogress = &bt->inprogress; - for (i = 0; i < inprogress->length_of_array; i++) { - struct block_translation_pair *pair = &inprogress->block_translation[i]; - if (pair->size > 0 && - !(i < current->length_of_array && - current->block_translation[i].size > 0 && - current->block_translation[i].u.diskoff == pair->u.diskoff) && - !(i < checkpointed->length_of_array && - checkpointed->block_translation[i].size > 0 && - checkpointed->block_translation[i].u.diskoff == pair->u.diskoff) - ) { - report->checkpoint_bytes_additional += pair->size; - report->checkpoint_blocks_additional++; - } - } - - block_allocator_get_unused_statistics(bt->block_allocator, report); -} - -void -toku_blocktable_get_info64(BLOCK_TABLE bt, struct ftinfo64 *s) { - lock_for_blocktable(bt); - - struct translation *current = &bt->current; - s->num_blocks_allocated = current->length_of_array; - s->num_blocks_in_use = 0; - s->size_allocated = 0; - s->size_in_use = 0; - - for (int64_t i = 0; i < current->length_of_array; ++i) { - struct block_translation_pair *block = ¤t->block_translation[i]; - if (block->size != size_is_free) { - ++s->num_blocks_in_use; - s->size_in_use += block->size; - if (block->u.diskoff != diskoff_unused) { - uint64_t limit = block->u.diskoff + block->size; - if (limit > s->size_allocated) { - s->size_allocated = limit; - } - } - } - } - - unlock_for_blocktable(bt); -} - -int -toku_blocktable_iterate_translation_tables(BLOCK_TABLE bt, uint64_t checkpoint_count, - int (*iter)(uint64_t checkpoint_count, - int64_t total_num_rows, - int64_t blocknum, - int64_t diskoff, - int64_t size, - void *extra), - void *iter_extra) { - int error = 0; - lock_for_blocktable(bt); - - int64_t total_num_rows = bt->current.length_of_array + bt->checkpointed.length_of_array; - for (int64_t i = 0; error == 0 && i < bt->current.length_of_array; ++i) { - struct block_translation_pair *block = &bt->current.block_translation[i]; - error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra); - } - for (int64_t i = 0; error == 0 && i < bt->checkpointed.length_of_array; ++i) { - struct block_translation_pair *block = &bt->checkpointed.block_translation[i]; - error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra); - } - - unlock_for_blocktable(bt); - return error; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/block_table.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/block_table.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/block_table.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/block_table.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,176 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef BLOCKTABLE_H -#define BLOCKTABLE_H -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fttypes.h" - - -typedef struct block_table *BLOCK_TABLE; - -//Needed by tests, ftdump -struct block_translation_pair { - union { // If in the freelist, use next_free_blocknum, otherwise diskoff. - DISKOFF diskoff; - BLOCKNUM next_free_blocknum; - } u; - DISKOFF size; // set to 0xFFFFFFFFFFFFFFFF for free -}; - -void toku_blocktable_create_new(BLOCK_TABLE *btp); -int toku_blocktable_create_from_buffer(int fd, BLOCK_TABLE *btp, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer); -void toku_blocktable_destroy(BLOCK_TABLE *btp); - -void toku_ft_lock(FT h); -void toku_ft_unlock(FT h); - -void toku_block_translation_note_start_checkpoint_unlocked(BLOCK_TABLE bt); -void toku_block_translation_note_end_checkpoint(BLOCK_TABLE bt, int fd); -void toku_block_translation_note_skipped_checkpoint(BLOCK_TABLE bt); -void toku_maybe_truncate_file_on_open(BLOCK_TABLE bt, int fd); - -//Blocknums -void toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, FT h); -void toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, FT h); -void toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *b, FT h, bool for_checkpoint); -void toku_verify_blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b); -void toku_block_verify_no_data_blocks_except_root(BLOCK_TABLE bt, BLOCKNUM root); -void toku_free_unused_blocknums(BLOCK_TABLE bt, BLOCKNUM root); -void toku_block_verify_no_free_blocknums(BLOCK_TABLE bt); -void toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT h, int fd); -void toku_realloc_descriptor_on_disk_unlocked(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT h); -void toku_get_descriptor_offset_size(BLOCK_TABLE bt, DISKOFF *offset, DISKOFF *size); - -//Blocks and Blocknums -void toku_blocknum_realloc_on_disk(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint); -void toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size); - -//Serialization -void toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, int fd, struct wbuf *w, int64_t *address, int64_t *size); - -void toku_block_table_swap_for_redirect(BLOCK_TABLE old_bt, BLOCK_TABLE new_bt); - - -//DEBUG ONLY (ftdump included), tests included -void toku_blocknum_dump_translation(BLOCK_TABLE bt, BLOCKNUM b); -void toku_dump_translation_table_pretty(FILE *f, BLOCK_TABLE bt); -void toku_dump_translation_table(FILE *f, BLOCK_TABLE bt); -void toku_block_free(BLOCK_TABLE bt, uint64_t offset); -typedef int(*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra); -enum translation_type {TRANSLATION_NONE=0, - TRANSLATION_CURRENT, - TRANSLATION_INPROGRESS, - TRANSLATION_CHECKPOINTED, - TRANSLATION_DEBUG}; - -int toku_blocktable_iterate(BLOCK_TABLE bt, enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); -void toku_blocktable_internal_fragmentation(BLOCK_TABLE bt, int64_t *total_sizep, int64_t *used_sizep); - -void toku_block_table_get_fragmentation_unlocked(BLOCK_TABLE bt, TOKU_DB_FRAGMENTATION report); -//Requires: blocktable lock is held. -//Requires: report->file_size_bytes is already filled in. - -int64_t toku_block_get_blocks_in_use_unlocked(BLOCK_TABLE bt); - -void toku_blocktable_get_info64(BLOCK_TABLE, struct ftinfo64 *); - -int toku_blocktable_iterate_translation_tables(BLOCK_TABLE, uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *); - -//Unmovable reserved first, then reallocable. -// We reserve one blocknum for the translation table itself. -enum {RESERVED_BLOCKNUM_NULL =0, - RESERVED_BLOCKNUM_TRANSLATION=1, - RESERVED_BLOCKNUM_DESCRIPTOR =2, - RESERVED_BLOCKNUMS}; - - -#endif - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/bndata.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/bndata.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/bndata.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/bndata.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,10 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include +#include + +#include +#include using namespace toku; uint32_t bn_data::klpair_disksize(const uint32_t klpair_len, const klpair_struct *klpair) const { @@ -129,18 +131,18 @@ uint32_t ndone_before = rb->ndone; init_zero(); invariant(all_keys_same_length); // Until otherwise supported. - bytevec keys_src; + const void *keys_src; rbuf_literal_bytes(rb, &keys_src, key_data_size); //Generate dmt this->m_buffer.create_from_sorted_memory_of_fixed_size_elements( keys_src, num_entries, key_data_size, fixed_klpair_length); toku_mempool_construct(&this->m_buffer_mempool, val_data_size); - bytevec vals_src; + const void *vals_src; rbuf_literal_bytes(rb, &vals_src, val_data_size); if (num_entries > 0) { - void *vals_dest = toku_mempool_malloc(&this->m_buffer_mempool, val_data_size, 1); + void *vals_dest = toku_mempool_malloc(&this->m_buffer_mempool, val_data_size); paranoid_invariant_notnull(vals_dest); memcpy(vals_dest, vals_src, val_data_size); } @@ -256,7 +258,7 @@ } } // Version >= 26 and version 25 deserialization are now identical except that <= 25 might allocate too much memory. - bytevec bytes; + const void *bytes; rbuf_literal_bytes(rb, &bytes, data_size); const unsigned char *CAST_FROM_VOIDP(buf, bytes); if (data_size == 0) { @@ -384,7 +386,7 @@ static int move_it (const uint32_t, klpair_struct *klpair, const uint32_t idx UU(), struct dmt_compressor_state * const oc) { LEAFENTRY old_le = oc->bd->get_le_from_klpair(klpair); uint32_t size = leafentry_memsize(old_le); - void* newdata = toku_mempool_malloc(oc->new_kvspace, size, 1); + void* newdata = toku_mempool_malloc(oc->new_kvspace, size); paranoid_invariant_notnull(newdata); // we do this on a fresh mempool, so nothing bad should happen memcpy(newdata, old_le, size); klpair->le_offset = toku_mempool_get_offset_from_pointer_and_base(oc->new_kvspace, newdata); @@ -411,7 +413,7 @@ } else { toku_mempool_construct(&new_kvspace, total_size_needed); size_t old_offset_limit = toku_mempool_get_offset_limit(&m_buffer_mempool); - void *new_mempool_base = toku_mempool_malloc(&new_kvspace, old_offset_limit, 1); + void *new_mempool_base = toku_mempool_malloc(&new_kvspace, old_offset_limit); memcpy(new_mempool_base, old_mempool_base, old_offset_limit); } @@ -428,10 +430,10 @@ // If MAYBE_FREE is nullptr then free the old mempool's space. // Otherwise, store the old mempool's space in maybe_free. LEAFENTRY bn_data::mempool_malloc_and_update_dmt(size_t size, void **maybe_free) { - void *v = toku_mempool_malloc(&m_buffer_mempool, size, 1); + void *v = toku_mempool_malloc(&m_buffer_mempool, size); if (v == nullptr) { dmt_compress_kvspace(size, maybe_free, false); - v = toku_mempool_malloc(&m_buffer_mempool, size, 1); + v = toku_mempool_malloc(&m_buffer_mempool, size); paranoid_invariant_notnull(v); } return (LEAFENTRY)v; @@ -441,6 +443,7 @@ uint32_t idx, const void* keyp UU(), uint32_t keylen UU(), + uint32_t old_keylen, uint32_t old_le_size, uint32_t new_size, LEAFENTRY* new_le_space, @@ -455,8 +458,8 @@ int r = m_buffer.fetch(idx, &klpair_len, &klp); invariant_zero(r); paranoid_invariant(klp!=nullptr); - // Key never changes. - paranoid_invariant(keylen_from_klpair_len(klpair_len) == keylen); + // Old key length should be consistent with what is stored in the DMT + invariant(keylen_from_klpair_len(klpair_len) == old_keylen); size_t new_le_offset = toku_mempool_get_offset_from_pointer_and_base(&this->m_buffer_mempool, new_le); paranoid_invariant(new_le_offset <= UINT32_MAX - new_size); // Not using > 4GB @@ -505,7 +508,7 @@ LEAFENTRY old_le = m_left_bn->get_le_from_klpair(&klpair); size_t le_size = leafentry_memsize(old_le); - void *new_le = toku_mempool_malloc(dest_mp, le_size, 1); + void *new_le = toku_mempool_malloc(dest_mp, le_size); paranoid_invariant_notnull(new_le); memcpy(new_le, old_le, le_size); size_t le_offset = toku_mempool_get_offset_from_pointer_and_base(dest_mp, new_le); @@ -658,7 +661,7 @@ dmt_builder.create(num_les, total_key_size); for (uint32_t idx = 0; idx < num_les; idx++) { - void* new_le = toku_mempool_malloc(&m_buffer_mempool, le_sizes[idx], 1); + void* new_le = toku_mempool_malloc(&m_buffer_mempool, le_sizes[idx]); paranoid_invariant_notnull(new_le); memcpy(new_le, old_les[idx], le_sizes[idx]); size_t le_offset = toku_mempool_get_offset_from_pointer_and_base(&m_buffer_mempool, new_le); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/bndata.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/bndata.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/bndata.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/bndata.h 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,13 +88,13 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - #pragma once -#include -#include "wbuf.h" -#include -#include "leafentry.h" +#include "util/dmt.h" +#include "util/mempool.h" + +#include "ft/leafentry.h" +#include "ft/serialize/wbuf.h" // Key/leafentry pair stored in a dmt. The key is inlined, the offset (in leafentry mempool) is stored for the leafentry. struct klpair_struct { @@ -304,7 +304,8 @@ // Allocates space in the mempool to store a new leafentry. // This may require reorganizing the mempool and updating the dmt. __attribute__((__nonnull__)) - void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_size, uint32_t new_size, LEAFENTRY* new_le_space, void **const maybe_free); + void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_keylen, uint32_t old_size, + uint32_t new_size, LEAFENTRY* new_le_space, void **const maybe_free); // Allocates space in the mempool to store a new leafentry // and inserts a new key into the dmt @@ -383,4 +384,3 @@ uint32_t key_data_size, uint32_t val_data_size, bool all_keys_same_length, uint32_t fixed_klpair_length); }; - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/background_job_manager.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/background_job_manager.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/background_job_manager.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/background_job_manager.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,159 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include +#include + +#include "cachetable/background_job_manager.h" + +struct background_job_manager_struct { + bool accepting_jobs; + uint32_t num_jobs; + toku_cond_t jobs_wait; + toku_mutex_t jobs_lock; +}; + +void bjm_init(BACKGROUND_JOB_MANAGER* pbjm) { + BACKGROUND_JOB_MANAGER XCALLOC(bjm); + toku_mutex_init(&bjm->jobs_lock, 0); + toku_cond_init(&bjm->jobs_wait, NULL); + bjm->accepting_jobs = true; + bjm->num_jobs = 0; + *pbjm = bjm; +} + +void bjm_destroy(BACKGROUND_JOB_MANAGER bjm) { + assert(bjm->num_jobs == 0); + toku_cond_destroy(&bjm->jobs_wait); + toku_mutex_destroy(&bjm->jobs_lock); + toku_free(bjm); +} + +void bjm_reset(BACKGROUND_JOB_MANAGER bjm) { + toku_mutex_lock(&bjm->jobs_lock); + assert(bjm->num_jobs == 0); + bjm->accepting_jobs = true; + toku_mutex_unlock(&bjm->jobs_lock); +} + +int bjm_add_background_job(BACKGROUND_JOB_MANAGER bjm) { + int ret_val; + toku_mutex_lock(&bjm->jobs_lock); + if (bjm->accepting_jobs) { + bjm->num_jobs++; + ret_val = 0; + } + else { + ret_val = -1; + } + toku_mutex_unlock(&bjm->jobs_lock); + return ret_val; +} +void bjm_remove_background_job(BACKGROUND_JOB_MANAGER bjm){ + toku_mutex_lock(&bjm->jobs_lock); + assert(bjm->num_jobs > 0); + bjm->num_jobs--; + if (bjm->num_jobs == 0 && !bjm->accepting_jobs) { + toku_cond_broadcast(&bjm->jobs_wait); + } + toku_mutex_unlock(&bjm->jobs_lock); +} + +void bjm_wait_for_jobs_to_finish(BACKGROUND_JOB_MANAGER bjm) { + toku_mutex_lock(&bjm->jobs_lock); + bjm->accepting_jobs = false; + while (bjm->num_jobs > 0) { + toku_cond_wait(&bjm->jobs_wait, &bjm->jobs_lock); + } + toku_mutex_unlock(&bjm->jobs_lock); +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/background_job_manager.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/background_job_manager.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/background_job_manager.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/background_job_manager.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,132 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + + +// +// The background job manager keeps track of the existence of +// background jobs running. We use the background job manager +// to allow threads to perform background jobs on various pieces +// of the system (e.g. cachefiles and cloned pairs being written out +// for checkpoint) +// + +typedef struct background_job_manager_struct *BACKGROUND_JOB_MANAGER; + + +void bjm_init(BACKGROUND_JOB_MANAGER* bjm); +void bjm_destroy(BACKGROUND_JOB_MANAGER bjm); + +// +// Re-allows a background job manager to accept background jobs +// +void bjm_reset(BACKGROUND_JOB_MANAGER bjm); + +// +// add a background job. If return value is 0, then the addition of the job +// was successful and the user may perform the background job. If return +// value is non-zero, then adding of the background job failed and the user +// may not perform the background job. +// +int bjm_add_background_job(BACKGROUND_JOB_MANAGER bjm); + +// +// remove a background job +// +void bjm_remove_background_job(BACKGROUND_JOB_MANAGER bjm); + +// +// This function waits for all current background jobs to be removed. If the user +// calls bjm_add_background_job while this function is running, or after this function +// has completed, bjm_add_background_job returns an error. +// +void bjm_wait_for_jobs_to_finish(BACKGROUND_JOB_MANAGER bjm); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/cachetable.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/cachetable.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/cachetable.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/cachetable.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,4975 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ft/cachetable/cachetable.h" +#include "ft/cachetable/cachetable-internal.h" +#include "ft/cachetable/checkpoint.h" +#include "ft/logger/log-internal.h" +#include "util/rwlock.h" +#include "util/scoped_malloc.h" +#include "util/status.h" +#include "util/context.h" + +/////////////////////////////////////////////////////////////////////////////////// +// Engine status +// +// Status is intended for display to humans to help understand system behavior. +// It does not need to be perfectly thread-safe. + +// These should be in the cachetable object, but we make them file-wide so that gdb can get them easily. +// They were left here after engine status cleanup (#2949, rather than moved into the status struct) +// so they are still easily available to the debugger and to save lots of typing. +static uint64_t cachetable_miss; +static uint64_t cachetable_misstime; // time spent waiting for disk read +static uint64_t cachetable_prefetches; // how many times has a block been prefetched into the cachetable? +static uint64_t cachetable_evictions; +static uint64_t cleaner_executions; // number of times the cleaner thread's loop has executed + +static CACHETABLE_STATUS_S ct_status; + +// Note, toku_cachetable_get_status() is below, after declaration of cachetable. + +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ct_status, k, c, t, "cachetable: " l, inc) + +static void +status_init(void) { + // Note, this function initializes the keyname, type, and legend fields. + // Value fields are initialized to zero by compiler. + + STATUS_INIT(CT_MISS, CACHETABLE_MISS, UINT64, "miss", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_MISSTIME, CACHETABLE_MISS_TIME, UINT64, "miss time", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_PREFETCHES, CACHETABLE_PREFETCHES, UINT64, "prefetches", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_SIZE_CURRENT, CACHETABLE_SIZE_CURRENT, UINT64, "size current", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_SIZE_LIMIT, CACHETABLE_SIZE_LIMIT, UINT64, "size limit", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_SIZE_WRITING, CACHETABLE_SIZE_WRITING, UINT64, "size writing", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_SIZE_NONLEAF, CACHETABLE_SIZE_NONLEAF, UINT64, "size nonleaf", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_SIZE_LEAF, CACHETABLE_SIZE_LEAF, UINT64, "size leaf", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_SIZE_ROLLBACK, CACHETABLE_SIZE_ROLLBACK, UINT64, "size rollback", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_SIZE_CACHEPRESSURE, CACHETABLE_SIZE_CACHEPRESSURE, UINT64, "size cachepressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_SIZE_CLONED, CACHETABLE_SIZE_CLONED, UINT64, "size currently cloned data for checkpoint", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_EVICTIONS, CACHETABLE_EVICTIONS, UINT64, "evictions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_CLEANER_EXECUTIONS, CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_CLEANER_PERIOD, CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_CLEANER_ITERATIONS, CACHETABLE_CLEANER_ITERATIONS, UINT64, "cleaner iterations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + + STATUS_INIT(CT_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_TIME, UINT64, "time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_LONG_WAIT_PRESSURE_COUNT, CACHETABLE_LONG_WAIT_PRESSURE_COUNT, UINT64, "number of long waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_LONG_WAIT_PRESSURE_TIME, CACHETABLE_LONG_WAIT_PRESSURE_TIME, UINT64, "long time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + ct_status.initialized = true; +} +#undef STATUS_INIT + +#define STATUS_VALUE(x) ct_status.status[x].value.num + +static void * const zero_value = nullptr; +static PAIR_ATTR const zero_attr = { + .size = 0, + .nonleaf_size = 0, + .leaf_size = 0, + .rollback_size = 0, + .cache_pressure_size = 0, + .is_valid = true +}; + + +static inline void ctpair_destroy(PAIR p) { + p->value_rwlock.deinit(); + paranoid_invariant(p->refcount == 0); + nb_mutex_destroy(&p->disk_nb_mutex); + toku_cond_destroy(&p->refcount_wait); + toku_free(p); +} + +static inline void pair_lock(PAIR p) { + toku_mutex_lock(p->mutex); +} + +static inline void pair_unlock(PAIR p) { + toku_mutex_unlock(p->mutex); +} + +// adds a reference to the PAIR +// on input and output, PAIR mutex is held +static void pair_add_ref_unlocked(PAIR p) { + p->refcount++; +} + +// releases a reference to the PAIR +// on input and output, PAIR mutex is held +static void pair_release_ref_unlocked(PAIR p) { + paranoid_invariant(p->refcount > 0); + p->refcount--; + if (p->refcount == 0 && p->num_waiting_on_refs > 0) { + toku_cond_broadcast(&p->refcount_wait); + } +} + +static void pair_wait_for_ref_release_unlocked(PAIR p) { + p->num_waiting_on_refs++; + while (p->refcount > 0) { + toku_cond_wait(&p->refcount_wait, p->mutex); + } + p->num_waiting_on_refs--; +} + +bool toku_ctpair_is_write_locked(PAIR pair) { + return pair->value_rwlock.writers() == 1; +} + +void +toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS statp) { + if (!ct_status.initialized) { + status_init(); + } + STATUS_VALUE(CT_MISS) = cachetable_miss; + STATUS_VALUE(CT_MISSTIME) = cachetable_misstime; + STATUS_VALUE(CT_PREFETCHES) = cachetable_prefetches; + STATUS_VALUE(CT_EVICTIONS) = cachetable_evictions; + STATUS_VALUE(CT_CLEANER_EXECUTIONS) = cleaner_executions; + STATUS_VALUE(CT_CLEANER_PERIOD) = toku_get_cleaner_period_unlocked(ct); + STATUS_VALUE(CT_CLEANER_ITERATIONS) = toku_get_cleaner_iterations_unlocked(ct); + ct->ev.fill_engine_status(); + *statp = ct_status; +} + +// FIXME global with no toku prefix +void remove_background_job_from_cf(CACHEFILE cf) +{ + bjm_remove_background_job(cf->bjm); +} + +// FIXME global with no toku prefix +void cachefile_kibbutz_enq (CACHEFILE cf, void (*f)(void*), void *extra) +// The function f must call remove_background_job_from_cf when it completes +{ + int r = bjm_add_background_job(cf->bjm); + // if client is adding a background job, then it must be done + // at a time when the manager is accepting background jobs, otherwise + // the client is screwing up + assert_zero(r); + toku_kibbutz_enq(cf->cachetable->client_kibbutz, f, extra); +} + +static int +checkpoint_thread (void *checkpointer_v) +// Effect: If checkpoint_period>0 thn periodically run a checkpoint. +// If someone changes the checkpoint_period (calling toku_set_checkpoint_period), then the checkpoint will run sooner or later. +// If someone sets the checkpoint_shutdown boolean , then this thread exits. +// This thread notices those changes by waiting on a condition variable. +{ + CHECKPOINTER CAST_FROM_VOIDP(cp, checkpointer_v); + int r = toku_checkpoint(cp, cp->get_logger(), NULL, NULL, NULL, NULL, SCHEDULED_CHECKPOINT); + invariant_zero(r); + return r; +} + +void toku_set_checkpoint_period (CACHETABLE ct, uint32_t new_period) { + ct->cp.set_checkpoint_period(new_period); +} + +uint32_t toku_get_checkpoint_period_unlocked (CACHETABLE ct) { + return ct->cp.get_checkpoint_period(); +} + +void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period) { + ct->cl.set_period(new_period); +} + +uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct) { + return ct->cl.get_period_unlocked(); +} + +void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations) { + ct->cl.set_iterations(new_iterations); +} + +uint32_t toku_get_cleaner_iterations (CACHETABLE ct) { + return ct->cl.get_iterations(); +} + +uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct) { + return ct->cl.get_iterations(); +} + +// reserve 25% as "unreservable". The loader cannot have it. +#define unreservable_memory(size) ((size)/4) + +int toku_cachetable_create(CACHETABLE *ct_result, long size_limit, LSN UU(initial_lsn), TOKULOGGER logger) { + int result = 0; + int r; + + if (size_limit == 0) { + size_limit = 128*1024*1024; + } + + CACHETABLE XCALLOC(ct); + ct->list.init(); + ct->cf_list.init(); + + int num_processors = toku_os_get_number_active_processors(); + int checkpointing_nworkers = (num_processors/4) ? num_processors/4 : 1; + r = toku_kibbutz_create(num_processors, &ct->client_kibbutz); + if (r != 0) { + result = r; + goto cleanup; + } + r = toku_kibbutz_create(2*num_processors, &ct->ct_kibbutz); + if (r != 0) { + result = r; + goto cleanup; + } + r = toku_kibbutz_create(checkpointing_nworkers, &ct->checkpointing_kibbutz); + if (r != 0) { + result = r; + goto cleanup; + } + // must be done after creating ct_kibbutz + r = ct->ev.init(size_limit, &ct->list, &ct->cf_list, ct->ct_kibbutz, EVICTION_PERIOD); + if (r != 0) { + result = r; + goto cleanup; + } + r = ct->cp.init(&ct->list, logger, &ct->ev, &ct->cf_list); + if (r != 0) { + result = r; + goto cleanup; + } + r = ct->cl.init(1, &ct->list, ct); // by default, start with one iteration + if (r != 0) { + result = r; + goto cleanup; + } + ct->env_dir = toku_xstrdup("."); +cleanup: + if (result == 0) { + *ct_result = ct; + } else { + toku_cachetable_close(&ct); + } + return result; +} + +// Returns a pointer to the checkpoint contained within +// the given cachetable. +CHECKPOINTER toku_cachetable_get_checkpointer(CACHETABLE ct) { + return &ct->cp; +} + +uint64_t toku_cachetable_reserve_memory(CACHETABLE ct, double fraction, uint64_t upper_bound) { + uint64_t reserved_memory = ct->ev.reserve_memory(fraction, upper_bound); + return reserved_memory; +} + +void toku_cachetable_release_reserved_memory(CACHETABLE ct, uint64_t reserved_memory) { + ct->ev.release_reserved_memory(reserved_memory); +} + +void +toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir) { + toku_free(ct->env_dir); + ct->env_dir = toku_xstrdup(env_dir); +} + +// What cachefile goes with particular iname (iname relative to env)? +// The transaction that is adding the reference might not have a reference +// to the ft, therefore the cachefile might be closing. +// If closing, we want to return that it is not there, but must wait till after +// the close has finished. +// Once the close has finished, there must not be a cachefile with that name +// in the cachetable. +int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CACHEFILE *cf) { + return ct->cf_list.cachefile_of_iname_in_env(iname_in_env, cf); +} + +// What cachefile goes with particular fd? +// This function can only be called if the ft is still open, so file must +// still be open +int toku_cachefile_of_filenum (CACHETABLE ct, FILENUM filenum, CACHEFILE *cf) { + return ct->cf_list.cachefile_of_filenum(filenum, cf); +} + +// TEST-ONLY function +// If something goes wrong, close the fd. After this, the caller shouldn't close the fd, but instead should close the cachefile. +int toku_cachetable_openfd (CACHEFILE *cfptr, CACHETABLE ct, int fd, const char *fname_in_env) { + FILENUM filenum = toku_cachetable_reserve_filenum(ct); + bool was_open; + return toku_cachetable_openfd_with_filenum(cfptr, ct, fd, fname_in_env, filenum, &was_open); +} + +// Get a unique filenum from the cachetable +FILENUM +toku_cachetable_reserve_filenum(CACHETABLE ct) { + return ct->cf_list.reserve_filenum(); +} + +static void create_new_cachefile( + CACHETABLE ct, + FILENUM filenum, + uint32_t hash_id, + int fd, + const char *fname_in_env, + struct fileid fileid, + CACHEFILE *cfptr + ) { + // File is not open. Make a new cachefile. + CACHEFILE newcf = NULL; + XCALLOC(newcf); + newcf->cachetable = ct; + newcf->hash_id = hash_id; + newcf->fileid = fileid; + + newcf->filenum = filenum; + newcf->fd = fd; + newcf->fname_in_env = toku_xstrdup(fname_in_env); + bjm_init(&newcf->bjm); + *cfptr = newcf; +} + +int toku_cachetable_openfd_with_filenum (CACHEFILE *cfptr, CACHETABLE ct, int fd, + const char *fname_in_env, + FILENUM filenum, bool* was_open) { + int r; + CACHEFILE newcf; + struct fileid fileid; + + assert(filenum.fileid != FILENUM_NONE.fileid); + r = toku_os_get_unique_file_id(fd, &fileid); + if (r != 0) { + r = get_error_errno(); + close(fd); + return r; + } + ct->cf_list.write_lock(); + CACHEFILE existing_cf = ct->cf_list.find_cachefile_unlocked(&fileid); + if (existing_cf) { + *was_open = true; + // Reuse an existing cachefile and close the caller's fd, whose + // responsibility has been passed to us. + r = close(fd); + assert(r == 0); + *cfptr = existing_cf; + r = 0; + goto exit; + } + *was_open = false; + ct->cf_list.verify_unused_filenum(filenum); + // now let's try to find it in the stale cachefiles + existing_cf = ct->cf_list.find_stale_cachefile_unlocked(&fileid); + // found the stale file, + if (existing_cf) { + // fix up the fields in the cachefile + existing_cf->filenum = filenum; + existing_cf->fd = fd; + existing_cf->fname_in_env = toku_xstrdup(fname_in_env); + bjm_init(&existing_cf->bjm); + + // now we need to move all the PAIRs in it back into the cachetable + ct->list.write_list_lock(); + for (PAIR curr_pair = existing_cf->cf_head; curr_pair; curr_pair = curr_pair->cf_next) { + pair_lock(curr_pair); + ct->list.add_to_cachetable_only(curr_pair); + pair_unlock(curr_pair); + } + ct->list.write_list_unlock(); + // move the cachefile back to the list of active cachefiles + ct->cf_list.remove_stale_cf_unlocked(existing_cf); + ct->cf_list.add_cf_unlocked(existing_cf); + *cfptr = existing_cf; + r = 0; + goto exit; + } + + create_new_cachefile( + ct, + filenum, + ct->cf_list.get_new_hash_id_unlocked(), + fd, + fname_in_env, + fileid, + &newcf + ); + + ct->cf_list.add_cf_unlocked(newcf); + + *cfptr = newcf; + r = 0; + exit: + ct->cf_list.write_unlock(); + return r; +} + +static void cachetable_flush_cachefile (CACHETABLE, CACHEFILE cf, bool evict_completely); + +//TEST_ONLY_FUNCTION +int toku_cachetable_openf (CACHEFILE *cfptr, CACHETABLE ct, const char *fname_in_env, int flags, mode_t mode) { + char *fname_in_cwd = toku_construct_full_name(2, ct->env_dir, fname_in_env); + int fd = open(fname_in_cwd, flags+O_BINARY, mode); + int r; + if (fd < 0) { + r = get_error_errno(); + } else { + r = toku_cachetable_openfd (cfptr, ct, fd, fname_in_env); + } + toku_free(fname_in_cwd); + return r; +} + +char * +toku_cachefile_fname_in_env (CACHEFILE cf) { + return cf->fname_in_env; +} + +int +toku_cachefile_get_fd (CACHEFILE cf) { + return cf->fd; +} + +static void cachefile_destroy(CACHEFILE cf) { + if (cf->free_userdata) { + cf->free_userdata(cf, cf->userdata); + } + toku_free(cf); +} + +void toku_cachefile_close(CACHEFILE *cfp, bool oplsn_valid, LSN oplsn) { + CACHEFILE cf = *cfp; + CACHETABLE ct = cf->cachetable; + + bjm_wait_for_jobs_to_finish(cf->bjm); + + // Clients should never attempt to close a cachefile that is being + // checkpointed. We notify clients this is happening in the + // note_pin_by_checkpoint callback. + assert(!cf->for_checkpoint); + + // Flush the cachefile and remove all of its pairs from the cachetable, + // but keep the PAIRs linked in the cachefile. We will store the cachefile + // away in case it gets opened immedietely + // + // if we are unlinking on close, then we want to evict completely, + // otherwise, we will keep the PAIRs and cachefile around in case + // a subsequent open comes soon + cachetable_flush_cachefile(ct, cf, cf->unlink_on_close); + + // Call the close userdata callback to notify the client this cachefile + // and its underlying file are going to be closed + if (cf->close_userdata) { + cf->close_userdata(cf, cf->fd, cf->userdata, oplsn_valid, oplsn); + } + // fsync and close the fd. + toku_file_fsync_without_accounting(cf->fd); + int r = close(cf->fd); + assert(r == 0); + cf->fd = -1; + + // destroy the parts of the cachefile + // that do not persist across opens/closes + bjm_destroy(cf->bjm); + cf->bjm = NULL; + + // remove the cf from the list of active cachefiles + ct->cf_list.remove_cf(cf); + cf->filenum = FILENUM_NONE; + + // Unlink the file if the bit was set + if (cf->unlink_on_close) { + char *fname_in_cwd = toku_cachetable_get_fname_in_cwd(cf->cachetable, cf->fname_in_env); + r = unlink(fname_in_cwd); + assert_zero(r); + toku_free(fname_in_cwd); + } + toku_free(cf->fname_in_env); + cf->fname_in_env = NULL; + + // we destroy the cf if the unlink bit was set or if no PAIRs exist + // if no PAIRs exist, there is no sense in keeping the cachefile around + bool destroy_cf = cf->unlink_on_close || (cf->cf_head == NULL); + if (destroy_cf) { + cachefile_destroy(cf); + } + else { + ct->cf_list.add_stale_cf(cf); + } +} + +// This hash function comes from Jenkins: http://burtleburtle.net/bob/c/lookup3.c +// The idea here is to mix the bits thoroughly so that we don't have to do modulo by a prime number. +// Instead we can use a bitmask on a table of size power of two. +// This hash function does yield improved performance on ./db-benchmark-test-tokudb and ./scanscan +static inline uint32_t rot(uint32_t x, uint32_t k) { + return (x<>(32-k)); +} +static inline uint32_t final (uint32_t a, uint32_t b, uint32_t c) { + c ^= b; c -= rot(b,14); + a ^= c; a -= rot(c,11); + b ^= a; b -= rot(a,25); + c ^= b; c -= rot(b,16); + a ^= c; a -= rot(c,4); + b ^= a; b -= rot(a,14); + c ^= b; c -= rot(b,24); + return c; +} + +uint32_t toku_cachetable_hash (CACHEFILE cachefile, BLOCKNUM key) +// Effect: Return a 32-bit hash key. The hash key shall be suitable for using with bitmasking for a table of size power-of-two. +{ + return final(cachefile->hash_id, (uint32_t)(key.b>>32), (uint32_t)key.b); +} + +#define CLOCK_SATURATION 15 +#define CLOCK_INITIAL_COUNT 3 + +// Requires pair's mutex to be held +static void pair_touch (PAIR p) { + p->count = (p->count < CLOCK_SATURATION) ? p->count+1 : CLOCK_SATURATION; +} + +// Remove a pair from the cachetable, requires write list lock to be held and p->mutex to be held +// Effects: the pair is removed from the LRU list and from the cachetable's hash table. +// The size of the objects in the cachetable is adjusted by the size of the pair being +// removed. +static void cachetable_remove_pair (pair_list* list, evictor* ev, PAIR p) { + list->evict_completely(p); + ev->remove_pair_attr(p->attr); +} + +static void cachetable_free_pair(PAIR p) { + CACHETABLE_FLUSH_CALLBACK flush_callback = p->flush_callback; + CACHEKEY key = p->key; + void *value = p->value_data; + void* disk_data = p->disk_data; + void *write_extraargs = p->write_extraargs; + PAIR_ATTR old_attr = p->attr; + + cachetable_evictions++; + PAIR_ATTR new_attr = p->attr; + // Note that flush_callback is called with write_me false, so the only purpose of this + // call is to tell the ft layer to evict the node (keep_me is false). + // Also, because we have already removed the PAIR from the cachetable in + // cachetable_remove_pair, we cannot pass in p->cachefile and p->cachefile->fd + // for the first two parameters, as these may be invalid (#5171), so, we + // pass in NULL and -1, dummy values + flush_callback(NULL, -1, key, value, &disk_data, write_extraargs, old_attr, &new_attr, false, false, true, false); + + ctpair_destroy(p); +} + +// assumes value_rwlock and disk_nb_mutex held on entry +// responsibility of this function is to only write a locked PAIR to disk +// and NOTHING else. We do not manipulate the state of the PAIR +// of the cachetable here (with the exception of ct->size_current for clones) +// +// No pair_list lock should be held, and the PAIR mutex should not be held +// +static void cachetable_only_write_locked_data( + evictor* ev, + PAIR p, + bool for_checkpoint, + PAIR_ATTR* new_attr, + bool is_clone + ) +{ + CACHETABLE_FLUSH_CALLBACK flush_callback = p->flush_callback; + CACHEFILE cachefile = p->cachefile; + CACHEKEY key = p->key; + void *value = is_clone ? p->cloned_value_data : p->value_data; + void *disk_data = p->disk_data; + void *write_extraargs = p->write_extraargs; + PAIR_ATTR old_attr; + // we do this for drd. If we are a cloned pair and only + // have the disk_nb_mutex, it is a race to access p->attr. + // Luckily, old_attr here is only used for some test applications, + // so inaccurate non-size fields are ok. + if (is_clone) { + old_attr = make_pair_attr(p->cloned_value_size); + } + else { + old_attr = p->attr; + } + bool dowrite = true; + + // write callback + flush_callback( + cachefile, + cachefile->fd, + key, + value, + &disk_data, + write_extraargs, + old_attr, + new_attr, + dowrite, + is_clone ? false : true, // keep_me (only keep if this is not cloned pointer) + for_checkpoint, + is_clone //is_clone + ); + p->disk_data = disk_data; + if (is_clone) { + p->cloned_value_data = NULL; + ev->remove_cloned_data_size(p->cloned_value_size); + p->cloned_value_size = 0; + } +} + + +// +// This function writes a PAIR's value out to disk. Currently, it is called +// by get_and_pin functions that write a PAIR out for checkpoint, by +// evictor threads that evict dirty PAIRS, and by the checkpoint thread +// that needs to write out a dirty node for checkpoint. +// +// Requires on entry for p->mutex to NOT be held, otherwise +// calling cachetable_only_write_locked_data will be very expensive +// +static void cachetable_write_locked_pair( + evictor* ev, + PAIR p, + bool for_checkpoint + ) +{ + PAIR_ATTR old_attr = p->attr; + PAIR_ATTR new_attr = p->attr; + // grabbing the disk_nb_mutex here ensures that + // after this point, no one is writing out a cloned value + // if we grab the disk_nb_mutex inside the if clause, + // then we may try to evict a PAIR that is in the process + // of having its clone be written out + pair_lock(p); + nb_mutex_lock(&p->disk_nb_mutex, p->mutex); + pair_unlock(p); + // make sure that assumption about cloned_value_data is true + // if we have grabbed the disk_nb_mutex, then that means that + // there should be no cloned value data + assert(p->cloned_value_data == NULL); + if (p->dirty) { + cachetable_only_write_locked_data(ev, p, for_checkpoint, &new_attr, false); + // + // now let's update variables + // + if (new_attr.is_valid) { + p->attr = new_attr; + ev->change_pair_attr(old_attr, new_attr); + } + } + // the pair is no longer dirty once written + p->dirty = CACHETABLE_CLEAN; + pair_lock(p); + nb_mutex_unlock(&p->disk_nb_mutex); + pair_unlock(p); +} + +// Worker thread function to writes and evicts a pair from memory to its cachefile +static void cachetable_evicter(void* extra) { + PAIR p = (PAIR)extra; + pair_list* pl = p->list; + CACHEFILE cf = p->cachefile; + pl->read_pending_exp_lock(); + bool for_checkpoint = p->checkpoint_pending; + p->checkpoint_pending = false; + // per the contract of evictor::evict_pair, + // the pair's mutex, p->mutex, must be held on entry + pair_lock(p); + p->ev->evict_pair(p, for_checkpoint); + pl->read_pending_exp_unlock(); + bjm_remove_background_job(cf->bjm); +} + +static void cachetable_partial_eviction(void* extra) { + PAIR p = (PAIR)extra; + CACHEFILE cf = p->cachefile; + p->ev->do_partial_eviction(p); + bjm_remove_background_job(cf->bjm); +} + +void toku_cachetable_swap_pair_values(PAIR old_pair, PAIR new_pair) { + void* old_value = old_pair->value_data; + void* new_value = new_pair->value_data; + old_pair->value_data = new_value; + new_pair->value_data = old_value; +} + +void toku_cachetable_maybe_flush_some(CACHETABLE ct) { + // TODO: Maybe move this... + ct->ev.signal_eviction_thread(); +} + +// Initializes a pair's members. +// +void pair_init(PAIR p, + CACHEFILE cachefile, + CACHEKEY key, + void *value, + PAIR_ATTR attr, + enum cachetable_dirty dirty, + uint32_t fullhash, + CACHETABLE_WRITE_CALLBACK write_callback, + evictor *ev, + pair_list *list) +{ + p->cachefile = cachefile; + p->key = key; + p->value_data = value; + p->cloned_value_data = NULL; + p->cloned_value_size = 0; + p->disk_data = NULL; + p->attr = attr; + p->dirty = dirty; + p->fullhash = fullhash; + + p->flush_callback = write_callback.flush_callback; + p->pe_callback = write_callback.pe_callback; + p->pe_est_callback = write_callback.pe_est_callback; + p->cleaner_callback = write_callback.cleaner_callback; + p->clone_callback = write_callback.clone_callback; + p->checkpoint_complete_callback = write_callback.checkpoint_complete_callback; + p->write_extraargs = write_callback.write_extraargs; + + p->count = 0; // Is zero the correct init value? + p->refcount = 0; + p->num_waiting_on_refs = 0; + toku_cond_init(&p->refcount_wait, NULL); + p->checkpoint_pending = false; + + p->mutex = list->get_mutex_for_pair(fullhash); + assert(p->mutex); + p->value_rwlock.init(p->mutex); + nb_mutex_init(&p->disk_nb_mutex); + + p->size_evicting_estimate = 0; // Is zero the correct init value? + + p->ev = ev; + p->list = list; + + p->clock_next = p->clock_prev = NULL; + p->pending_next = p->pending_prev = NULL; + p->cf_next = p->cf_prev = NULL; + p->hash_chain = NULL; +} + +// has ct locked on entry +// This function MUST NOT release and reacquire the cachetable lock +// Its callers (toku_cachetable_put_with_dep_pairs) depend on this behavior. +// +// Requires pair list's write lock to be held on entry. +// the pair's mutex must be held as wel +// +// +static PAIR cachetable_insert_at(CACHETABLE ct, + CACHEFILE cachefile, CACHEKEY key, void *value, + uint32_t fullhash, + PAIR_ATTR attr, + CACHETABLE_WRITE_CALLBACK write_callback, + enum cachetable_dirty dirty) { + PAIR MALLOC(p); + assert(p); + memset(p, 0, sizeof *p); + pair_init(p, + cachefile, + key, + value, + attr, + dirty, + fullhash, + write_callback, + &ct->ev, + &ct->list + ); + + ct->list.put(p); + ct->ev.add_pair_attr(attr); + return p; +} + +// on input, the write list lock must be held AND +// the pair's mutex must be held as wel +static void cachetable_insert_pair_at(CACHETABLE ct, PAIR p, PAIR_ATTR attr) { + ct->list.put(p); + ct->ev.add_pair_attr(attr); +} + + +// has ct locked on entry +// This function MUST NOT release and reacquire the cachetable lock +// Its callers (toku_cachetable_put_with_dep_pairs) depend on this behavior. +// +// Requires pair list's write lock to be held on entry +// +static void cachetable_put_internal( + CACHEFILE cachefile, + PAIR p, + void *value, + PAIR_ATTR attr, + CACHETABLE_PUT_CALLBACK put_callback + ) +{ + CACHETABLE ct = cachefile->cachetable; + // + // + // TODO: (Zardosht), make code run in debug only + // + // + //PAIR dummy_p = ct->list.find_pair(cachefile, key, fullhash); + //invariant_null(dummy_p); + cachetable_insert_pair_at(ct, p, attr); + invariant_notnull(put_callback); + put_callback(p->key, value, p); +} + +// Pair mutex (p->mutex) is may or may not be held on entry, +// Holding the pair mutex on entry is not important +// for performance or corrrectness +// Pair is pinned on entry +static void +clone_pair(evictor* ev, PAIR p) { + PAIR_ATTR old_attr = p->attr; + PAIR_ATTR new_attr; + long clone_size = 0; + + // act of cloning should be fast, + // not sure if we have to release + // and regrab the cachetable lock, + // but doing it for now + p->clone_callback( + p->value_data, + &p->cloned_value_data, + &clone_size, + &new_attr, + true, + p->write_extraargs + ); + + // now we need to do the same actions we would do + // if the PAIR had been written to disk + // + // because we hold the value_rwlock, + // it doesn't matter whether we clear + // the pending bit before the clone + // or after the clone + p->dirty = CACHETABLE_CLEAN; + if (new_attr.is_valid) { + p->attr = new_attr; + ev->change_pair_attr(old_attr, new_attr); + } + p->cloned_value_size = clone_size; + ev->add_cloned_data_size(p->cloned_value_size); +} + +static void checkpoint_cloned_pair(void* extra) { + PAIR p = (PAIR)extra; + CACHETABLE ct = p->cachefile->cachetable; + PAIR_ATTR new_attr; + // note that pending lock is not needed here because + // we KNOW we are in the middle of a checkpoint + // and that a begin_checkpoint cannot happen + cachetable_only_write_locked_data( + p->ev, + p, + true, //for_checkpoint + &new_attr, + true //is_clone + ); + pair_lock(p); + nb_mutex_unlock(&p->disk_nb_mutex); + pair_unlock(p); + ct->cp.remove_background_job(); +} + +static void +checkpoint_cloned_pair_on_writer_thread(CACHETABLE ct, PAIR p) { + toku_kibbutz_enq(ct->checkpointing_kibbutz, checkpoint_cloned_pair, p); +} + + +// +// Given a PAIR p with the value_rwlock altready held, do the following: +// - If the PAIR needs to be written out to disk for checkpoint: +// - If the PAIR is cloneable, clone the PAIR and place the work +// of writing the PAIR on a background thread. +// - If the PAIR is not cloneable, write the PAIR to disk for checkpoint +// on the current thread +// +// On entry, pair's mutex is NOT held +// +static void +write_locked_pair_for_checkpoint(CACHETABLE ct, PAIR p, bool checkpoint_pending) +{ + if (checkpoint_pending && p->checkpoint_complete_callback) { + p->checkpoint_complete_callback(p->value_data); + } + if (p->dirty && checkpoint_pending) { + if (p->clone_callback) { + pair_lock(p); + nb_mutex_lock(&p->disk_nb_mutex, p->mutex); + pair_unlock(p); + assert(!p->cloned_value_data); + clone_pair(&ct->ev, p); + assert(p->cloned_value_data); + // place it on the background thread and continue + // responsibility of writer thread to release disk_nb_mutex + ct->cp.add_background_job(); + checkpoint_cloned_pair_on_writer_thread(ct, p); + } + else { + // The pair is not cloneable, just write the pair to disk + // we already have p->value_rwlock and we just do the write in our own thread. + cachetable_write_locked_pair(&ct->ev, p, true); // keeps the PAIR's write lock + } + } +} + +// On entry and exit: hold the pair's mutex (p->mutex) +// Method: take write lock +// maybe write out the node +// Else release write lock +// +static void +write_pair_for_checkpoint_thread (evictor* ev, PAIR p) +{ + // Grab an exclusive lock on the pair. + // If we grab an expensive lock, then other threads will return + // TRY_AGAIN rather than waiting. In production, the only time + // another thread will check if grabbing a lock is expensive is when + // we have a clone_callback (FTNODEs), so the act of checkpointing + // will be cheap. Also, much of the time we'll just be clearing + // pending bits and that's definitely cheap. (see #5427) + p->value_rwlock.write_lock(false); + if (p->checkpoint_pending && p->checkpoint_complete_callback) { + p->checkpoint_complete_callback(p->value_data); + } + if (p->dirty && p->checkpoint_pending) { + if (p->clone_callback) { + nb_mutex_lock(&p->disk_nb_mutex, p->mutex); + assert(!p->cloned_value_data); + clone_pair(ev, p); + assert(p->cloned_value_data); + } + else { + // The pair is not cloneable, just write the pair to disk + // we already have p->value_rwlock and we just do the write in our own thread. + // this will grab and release disk_nb_mutex + pair_unlock(p); + cachetable_write_locked_pair(ev, p, true); // keeps the PAIR's write lock + pair_lock(p); + } + p->checkpoint_pending = false; + + // now release value_rwlock, before we write the PAIR out + // so that the PAIR is available to client threads + p->value_rwlock.write_unlock(); // didn't call cachetable_evict_pair so we have to unlock it ourselves. + if (p->clone_callback) { + // note that pending lock is not needed here because + // we KNOW we are in the middle of a checkpoint + // and that a begin_checkpoint cannot happen + PAIR_ATTR attr; + pair_unlock(p); + cachetable_only_write_locked_data( + ev, + p, + true, //for_checkpoint + &attr, + true //is_clone + ); + pair_lock(p); + nb_mutex_unlock(&p->disk_nb_mutex); + } + } + else { + // + // we may clear the pending bit here because we have + // both the cachetable lock and the PAIR lock. + // The rule, as mentioned in toku_cachetable_begin_checkpoint, + // is that to clear the bit, we must have both the PAIR lock + // and the pending lock + // + p->checkpoint_pending = false; + p->value_rwlock.write_unlock(); + } +} + +// +// For each PAIR associated with these CACHEFILEs and CACHEKEYs +// if the checkpoint_pending bit is set and the PAIR is dirty, write the PAIR +// to disk. +// We assume the PAIRs passed in have been locked by the client that made calls +// into the cachetable that eventually make it here. +// +static void checkpoint_dependent_pairs( + CACHETABLE ct, + uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint + PAIR* dependent_pairs, + bool* checkpoint_pending, + enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs + ) +{ + for (uint32_t i =0; i < num_dependent_pairs; i++) { + PAIR curr_dep_pair = dependent_pairs[i]; + // we need to update the dirtyness of the dependent pair, + // because the client may have dirtied it while holding its lock, + // and if the pair is pending a checkpoint, it needs to be written out + if (dependent_dirty[i]) curr_dep_pair->dirty = CACHETABLE_DIRTY; + if (checkpoint_pending[i]) { + write_locked_pair_for_checkpoint(ct, curr_dep_pair, checkpoint_pending[i]); + } + } +} + +void toku_cachetable_put_with_dep_pairs( + CACHEFILE cachefile, + CACHETABLE_GET_KEY_AND_FULLHASH get_key_and_fullhash, + void *value, + PAIR_ATTR attr, + CACHETABLE_WRITE_CALLBACK write_callback, + void *get_key_and_fullhash_extra, + uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint + PAIR* dependent_pairs, + enum cachetable_dirty* dependent_dirty, // array stating dirty/cleanness of dependent pairs + CACHEKEY* key, + uint32_t* fullhash, + CACHETABLE_PUT_CALLBACK put_callback + ) +{ + // + // need to get the key and filehash + // + CACHETABLE ct = cachefile->cachetable; + if (ct->ev.should_client_thread_sleep()) { + ct->ev.wait_for_cache_pressure_to_subside(); + } + if (ct->ev.should_client_wake_eviction_thread()) { + ct->ev.signal_eviction_thread(); + } + + PAIR p = NULL; + XMALLOC(p); + memset(p, 0, sizeof *p); + + ct->list.write_list_lock(); + get_key_and_fullhash(key, fullhash, get_key_and_fullhash_extra); + pair_init( + p, + cachefile, + *key, + value, + attr, + CACHETABLE_DIRTY, + *fullhash, + write_callback, + &ct->ev, + &ct->list + ); + pair_lock(p); + p->value_rwlock.write_lock(true); + cachetable_put_internal( + cachefile, + p, + value, + attr, + put_callback + ); + pair_unlock(p); + bool checkpoint_pending[num_dependent_pairs]; + ct->list.write_pending_cheap_lock(); + for (uint32_t i = 0; i < num_dependent_pairs; i++) { + checkpoint_pending[i] = dependent_pairs[i]->checkpoint_pending; + dependent_pairs[i]->checkpoint_pending = false; + } + ct->list.write_pending_cheap_unlock(); + ct->list.write_list_unlock(); + + // + // now that we have inserted the row, let's checkpoint the + // dependent nodes, if they need checkpointing + // + checkpoint_dependent_pairs( + ct, + num_dependent_pairs, + dependent_pairs, + checkpoint_pending, + dependent_dirty + ); +} + +void toku_cachetable_put(CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, void*value, PAIR_ATTR attr, + CACHETABLE_WRITE_CALLBACK write_callback, + CACHETABLE_PUT_CALLBACK put_callback + ) { + CACHETABLE ct = cachefile->cachetable; + if (ct->ev.should_client_thread_sleep()) { + ct->ev.wait_for_cache_pressure_to_subside(); + } + if (ct->ev.should_client_wake_eviction_thread()) { + ct->ev.signal_eviction_thread(); + } + + PAIR p = NULL; + XMALLOC(p); + memset(p, 0, sizeof *p); + + ct->list.write_list_lock(); + pair_init( + p, + cachefile, + key, + value, + attr, + CACHETABLE_DIRTY, + fullhash, + write_callback, + &ct->ev, + &ct->list + ); + pair_lock(p); + p->value_rwlock.write_lock(true); + cachetable_put_internal( + cachefile, + p, + value, + attr, + put_callback + ); + pair_unlock(p); + ct->list.write_list_unlock(); +} + +static uint64_t get_tnow(void) { + struct timeval tv; + int r = gettimeofday(&tv, NULL); assert(r == 0); + return tv.tv_sec * 1000000ULL + tv.tv_usec; +} + +// +// cachetable lock and PAIR lock are held on entry +// On exit, cachetable lock is still held, but PAIR lock +// is either released. +// +// No locks are held on entry (besides the rwlock write lock of the PAIR) +// +static void +do_partial_fetch( + CACHETABLE ct, + CACHEFILE cachefile, + PAIR p, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + void *read_extraargs, + bool keep_pair_locked + ) +{ + PAIR_ATTR old_attr = p->attr; + PAIR_ATTR new_attr = zero_attr; + // As of Dr. No, only clean PAIRs may have pieces missing, + // so we do a sanity check here. + assert(!p->dirty); + + pair_lock(p); + invariant(p->value_rwlock.writers()); + nb_mutex_lock(&p->disk_nb_mutex, p->mutex); + pair_unlock(p); + int r = pf_callback(p->value_data, p->disk_data, read_extraargs, cachefile->fd, &new_attr); + lazy_assert_zero(r); + p->attr = new_attr; + ct->ev.change_pair_attr(old_attr, new_attr); + pair_lock(p); + nb_mutex_unlock(&p->disk_nb_mutex); + if (!keep_pair_locked) { + p->value_rwlock.write_unlock(); + } + pair_unlock(p); +} + +void toku_cachetable_pf_pinned_pair( + void* value, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + void* read_extraargs, + CACHEFILE cf, + CACHEKEY key, + uint32_t fullhash + ) +{ + PAIR_ATTR attr; + PAIR p = NULL; + CACHETABLE ct = cf->cachetable; + ct->list.pair_lock_by_fullhash(fullhash); + p = ct->list.find_pair(cf, key, fullhash); + assert(p != NULL); + assert(p->value_data == value); + assert(p->value_rwlock.writers()); + nb_mutex_lock(&p->disk_nb_mutex, p->mutex); + pair_unlock(p); + + int fd = cf->fd; + pf_callback(value, p->disk_data, read_extraargs, fd, &attr); + + pair_lock(p); + nb_mutex_unlock(&p->disk_nb_mutex); + pair_unlock(p); +} + +int toku_cachetable_get_and_pin ( + CACHEFILE cachefile, + CACHEKEY key, + uint32_t fullhash, + void**value, + long *sizep, + CACHETABLE_WRITE_CALLBACK write_callback, + CACHETABLE_FETCH_CALLBACK fetch_callback, + CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + bool may_modify_value, + void* read_extraargs // parameter for fetch_callback, pf_req_callback, and pf_callback + ) +{ + pair_lock_type lock_type = may_modify_value ? PL_WRITE_EXPENSIVE : PL_READ; + // We have separate parameters of read_extraargs and write_extraargs because + // the lifetime of the two parameters are different. write_extraargs may be used + // long after this function call (e.g. after a flush to disk), whereas read_extraargs + // will not be used after this function returns. As a result, the caller may allocate + // read_extraargs on the stack, whereas write_extraargs must be allocated + // on the heap. + return toku_cachetable_get_and_pin_with_dep_pairs ( + cachefile, + key, + fullhash, + value, + sizep, + write_callback, + fetch_callback, + pf_req_callback, + pf_callback, + lock_type, + read_extraargs, + 0, // number of dependent pairs that we may need to checkpoint + NULL, // array of dependent pairs + NULL // array stating dirty/cleanness of dependent pairs + ); +} + +// Read a pair from a cachefile into memory using the pair's fetch callback +// on entry, pair mutex (p->mutex) is NOT held, but pair is pinned +static void cachetable_fetch_pair( + CACHETABLE ct, + CACHEFILE cf, + PAIR p, + CACHETABLE_FETCH_CALLBACK fetch_callback, + void* read_extraargs, + bool keep_pair_locked + ) +{ + // helgrind + CACHEKEY key = p->key; + uint32_t fullhash = p->fullhash; + + void *toku_value = NULL; + void *disk_data = NULL; + PAIR_ATTR attr; + + // FIXME this should be enum cachetable_dirty, right? + int dirty = 0; + + pair_lock(p); + nb_mutex_lock(&p->disk_nb_mutex, p->mutex); + pair_unlock(p); + + int r; + r = fetch_callback(cf, p, cf->fd, key, fullhash, &toku_value, &disk_data, &attr, &dirty, read_extraargs); + if (dirty) { + p->dirty = CACHETABLE_DIRTY; + } + assert(r == 0); + + p->value_data = toku_value; + p->disk_data = disk_data; + p->attr = attr; + ct->ev.add_pair_attr(attr); + pair_lock(p); + nb_mutex_unlock(&p->disk_nb_mutex); + if (!keep_pair_locked) { + p->value_rwlock.write_unlock(); + } + pair_unlock(p); +} + +static bool get_checkpoint_pending(PAIR p, pair_list* pl) { + bool checkpoint_pending = false; + pl->read_pending_cheap_lock(); + checkpoint_pending = p->checkpoint_pending; + p->checkpoint_pending = false; + pl->read_pending_cheap_unlock(); + return checkpoint_pending; +} + +static void checkpoint_pair_and_dependent_pairs( + CACHETABLE ct, + PAIR p, + bool p_is_pending_checkpoint, + uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint + PAIR* dependent_pairs, + bool* dependent_pairs_pending_checkpoint, + enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs + ) +{ + + // + // A checkpoint must not begin while we are checking dependent pairs or pending bits. + // Here is why. + // + // Now that we have all of the locks on the pairs we + // care about, we can take care of the necessary checkpointing. + // For each pair, we simply need to write the pair if it is + // pending a checkpoint. If no pair is pending a checkpoint, + // then all of this work will be done with the cachetable lock held, + // so we don't need to worry about a checkpoint beginning + // in the middle of any operation below. If some pair + // is pending a checkpoint, then the checkpoint thread + // will not complete its current checkpoint until it can + // successfully grab a lock on the pending pair and + // remove it from its list of pairs pending a checkpoint. + // This cannot be done until we release the lock + // that we have, which is not done in this function. + // So, the point is, it is impossible for a checkpoint + // to begin while we write any of these locked pairs + // for checkpoint, even though writing a pair releases + // the cachetable lock. + // + write_locked_pair_for_checkpoint(ct, p, p_is_pending_checkpoint); + + checkpoint_dependent_pairs( + ct, + num_dependent_pairs, + dependent_pairs, + dependent_pairs_pending_checkpoint, + dependent_dirty + ); +} + +static void unpin_pair(PAIR p, bool read_lock_grabbed) { + if (read_lock_grabbed) { + p->value_rwlock.read_unlock(); + } + else { + p->value_rwlock.write_unlock(); + } +} + + +// on input, the pair's mutex is held, +// on output, the pair's mutex is not held. +// if true, we must try again, and pair is not pinned +// if false, we succeeded, the pair is pinned +static bool try_pin_pair( + PAIR p, + CACHETABLE ct, + CACHEFILE cachefile, + pair_lock_type lock_type, + uint32_t num_dependent_pairs, + PAIR* dependent_pairs, + enum cachetable_dirty* dependent_dirty, + CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + void* read_extraargs, + bool already_slept + ) +{ + bool dep_checkpoint_pending[num_dependent_pairs]; + bool try_again = true; + bool expensive = (lock_type == PL_WRITE_EXPENSIVE); + if (lock_type != PL_READ) { + p->value_rwlock.write_lock(expensive); + } + else { + p->value_rwlock.read_lock(); + } + pair_touch(p); + pair_unlock(p); + + bool partial_fetch_required = pf_req_callback(p->value_data,read_extraargs); + + if (partial_fetch_required) { + toku::context pf_ctx(CTX_PARTIAL_FETCH); + + if (ct->ev.should_client_thread_sleep() && !already_slept) { + pair_lock(p); + unpin_pair(p, (lock_type == PL_READ)); + pair_unlock(p); + try_again = true; + goto exit; + } + if (ct->ev.should_client_wake_eviction_thread()) { + ct->ev.signal_eviction_thread(); + } + // + // Just because the PAIR exists does necessarily mean the all the data the caller requires + // is in memory. A partial fetch may be required, which is evaluated above + // if the variable is true, a partial fetch is required so we must grab the PAIR's write lock + // and then call a callback to retrieve what we need + // + assert(partial_fetch_required); + // As of Dr. No, only clean PAIRs may have pieces missing, + // so we do a sanity check here. + assert(!p->dirty); + + if (lock_type == PL_READ) { + pair_lock(p); + p->value_rwlock.read_unlock(); + p->value_rwlock.write_lock(true); + pair_unlock(p); + } + else if (lock_type == PL_WRITE_CHEAP) { + pair_lock(p); + p->value_rwlock.write_unlock(); + p->value_rwlock.write_lock(true); + pair_unlock(p); + } + + partial_fetch_required = pf_req_callback(p->value_data,read_extraargs); + if (partial_fetch_required) { + do_partial_fetch(ct, cachefile, p, pf_callback, read_extraargs, true); + } + if (lock_type == PL_READ) { + // + // TODO: Zardosht, somehow ensure that a partial eviction cannot happen + // between these two calls + // + pair_lock(p); + p->value_rwlock.write_unlock(); + p->value_rwlock.read_lock(); + pair_unlock(p); + } + else if (lock_type == PL_WRITE_CHEAP) { + pair_lock(p); + p->value_rwlock.write_unlock(); + p->value_rwlock.write_lock(false); + pair_unlock(p); + } + // small hack here for #5439, + // for queries, pf_req_callback does some work for the caller, + // that information may be out of date after a write_unlock + // followed by a relock, so we do it again. + bool pf_required = pf_req_callback(p->value_data,read_extraargs); + assert(!pf_required); + } + + if (lock_type != PL_READ) { + ct->list.read_pending_cheap_lock(); + bool p_checkpoint_pending = p->checkpoint_pending; + p->checkpoint_pending = false; + for (uint32_t i = 0; i < num_dependent_pairs; i++) { + dep_checkpoint_pending[i] = dependent_pairs[i]->checkpoint_pending; + dependent_pairs[i]->checkpoint_pending = false; + } + ct->list.read_pending_cheap_unlock(); + checkpoint_pair_and_dependent_pairs( + ct, + p, + p_checkpoint_pending, + num_dependent_pairs, + dependent_pairs, + dep_checkpoint_pending, + dependent_dirty + ); + } + + try_again = false; +exit: + return try_again; +} + +int toku_cachetable_get_and_pin_with_dep_pairs ( + CACHEFILE cachefile, + CACHEKEY key, + uint32_t fullhash, + void**value, + long *sizep, + CACHETABLE_WRITE_CALLBACK write_callback, + CACHETABLE_FETCH_CALLBACK fetch_callback, + CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + pair_lock_type lock_type, + void* read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback + uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint + PAIR* dependent_pairs, + enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs + ) +// See cachetable/cachetable.h +{ + CACHETABLE ct = cachefile->cachetable; + bool wait = false; + bool already_slept = false; + bool dep_checkpoint_pending[num_dependent_pairs]; + + // + // If in the process of pinning the node we add data to the cachetable via a partial fetch + // or a full fetch, we may need to first sleep because there is too much data in the + // cachetable. In those cases, we set the bool wait to true and goto try_again, so that + // we can do our sleep and then restart the function. + // +beginning: + if (wait) { + // We shouldn't be holding the read list lock while + // waiting for the evictor to remove pairs. + already_slept = true; + ct->ev.wait_for_cache_pressure_to_subside(); + } + + ct->list.pair_lock_by_fullhash(fullhash); + PAIR p = ct->list.find_pair(cachefile, key, fullhash); + if (p) { + // on entry, holds p->mutex (which is locked via pair_lock_by_fullhash) + // on exit, does not hold p->mutex + bool try_again = try_pin_pair( + p, + ct, + cachefile, + lock_type, + num_dependent_pairs, + dependent_pairs, + dependent_dirty, + pf_req_callback, + pf_callback, + read_extraargs, + already_slept + ); + if (try_again) { + wait = true; + goto beginning; + } + else { + goto got_value; + } + } + else { + toku::context fetch_ctx(CTX_FULL_FETCH); + + ct->list.pair_unlock_by_fullhash(fullhash); + // we only want to sleep once per call to get_and_pin. If we have already + // slept and there is still cache pressure, then we might as + // well just complete the call, because the sleep did not help + // By sleeping only once per get_and_pin, we prevent starvation and ensure + // that we make progress (however slow) on each thread, which allows + // assumptions of the form 'x will eventually happen'. + // This happens in extreme scenarios. + if (ct->ev.should_client_thread_sleep() && !already_slept) { + wait = true; + goto beginning; + } + if (ct->ev.should_client_wake_eviction_thread()) { + ct->ev.signal_eviction_thread(); + } + // Since the pair was not found, we need the write list + // lock to add it. So, we have to release the read list lock + // first. + ct->list.write_list_lock(); + ct->list.pair_lock_by_fullhash(fullhash); + p = ct->list.find_pair(cachefile, key, fullhash); + if (p != NULL) { + ct->list.write_list_unlock(); + // on entry, holds p->mutex, + // on exit, does not hold p->mutex + bool try_again = try_pin_pair( + p, + ct, + cachefile, + lock_type, + num_dependent_pairs, + dependent_pairs, + dependent_dirty, + pf_req_callback, + pf_callback, + read_extraargs, + already_slept + ); + if (try_again) { + wait = true; + goto beginning; + } + else { + goto got_value; + } + } + assert(p == NULL); + + // Insert a PAIR into the cachetable + // NOTE: At this point we still have the write list lock held. + p = cachetable_insert_at( + ct, + cachefile, + key, + zero_value, + fullhash, + zero_attr, + write_callback, + CACHETABLE_CLEAN + ); + invariant_notnull(p); + + // Pin the pair. + p->value_rwlock.write_lock(true); + pair_unlock(p); + + + if (lock_type != PL_READ) { + ct->list.read_pending_cheap_lock(); + invariant(!p->checkpoint_pending); + for (uint32_t i = 0; i < num_dependent_pairs; i++) { + dep_checkpoint_pending[i] = dependent_pairs[i]->checkpoint_pending; + dependent_pairs[i]->checkpoint_pending = false; + } + ct->list.read_pending_cheap_unlock(); + } + // We should release the lock before we perform + // these expensive operations. + ct->list.write_list_unlock(); + + if (lock_type != PL_READ) { + checkpoint_dependent_pairs( + ct, + num_dependent_pairs, + dependent_pairs, + dep_checkpoint_pending, + dependent_dirty + ); + } + uint64_t t0 = get_tnow(); + + // Retrieve the value of the PAIR from disk. + // The pair being fetched will be marked as pending if a checkpoint happens during the + // fetch because begin_checkpoint will mark as pending any pair that is locked even if it is clean. + cachetable_fetch_pair(ct, cachefile, p, fetch_callback, read_extraargs, true); + cachetable_miss++; + cachetable_misstime += get_tnow() - t0; + + // If the lock_type requested was a PL_READ, we downgrade to PL_READ, + // but if the request was for a PL_WRITE_CHEAP, we don't bother + // downgrading, because we would have to possibly resolve the + // checkpointing again, and that would just make this function even + // messier. + // + // TODO(yoni): in case of PL_WRITE_CHEAP, write and use + // p->value_rwlock.write_change_status_to_not_expensive(); (Also name it better) + // to downgrade from an expensive write lock to a cheap one + if (lock_type == PL_READ) { + pair_lock(p); + p->value_rwlock.write_unlock(); + p->value_rwlock.read_lock(); + pair_unlock(p); + // small hack here for #5439, + // for queries, pf_req_callback does some work for the caller, + // that information may be out of date after a write_unlock + // followed by a read_lock, so we do it again. + bool pf_required = pf_req_callback(p->value_data,read_extraargs); + assert(!pf_required); + } + goto got_value; + } +got_value: + *value = p->value_data; + if (sizep) *sizep = p->attr.size; + return 0; +} + +// Lookup a key in the cachetable. If it is found and it is not being written, then +// acquire a read lock on the pair, update the LRU list, and return sucess. +// +// However, if the page is clean or has checkpoint pending, don't return success. +// This will minimize the number of dirty nodes. +// Rationale: maybe_get_and_pin is used when the system has an alternative to modifying a node. +// In the context of checkpointing, we don't want to gratuituously dirty a page, because it causes an I/O. +// For example, imagine that we can modify a bit in a dirty parent, or modify a bit in a clean child, then we should modify +// the dirty parent (which will have to do I/O eventually anyway) rather than incur a full block write to modify one bit. +// Similarly, if the checkpoint is actually pending, we don't want to block on it. +int toku_cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, pair_lock_type lock_type, void**value) { + CACHETABLE ct = cachefile->cachetable; + int r = -1; + ct->list.pair_lock_by_fullhash(fullhash); + PAIR p = ct->list.find_pair(cachefile, key, fullhash); + if (p) { + const bool lock_is_expensive = (lock_type == PL_WRITE_EXPENSIVE); + bool got_lock = false; + switch (lock_type) { + case PL_READ: + if (p->value_rwlock.try_read_lock()) { + got_lock = p->dirty; + + if (!got_lock) { + p->value_rwlock.read_unlock(); + } + } + break; + case PL_WRITE_CHEAP: + case PL_WRITE_EXPENSIVE: + if (p->value_rwlock.try_write_lock(lock_is_expensive)) { + // we got the lock fast, so continue + ct->list.read_pending_cheap_lock(); + + // if pending a checkpoint, then we don't want to return + // the value to the user, because we are responsible for + // handling the checkpointing, which we do not want to do, + // because it is expensive + got_lock = p->dirty && !p->checkpoint_pending; + + ct->list.read_pending_cheap_unlock(); + if (!got_lock) { + p->value_rwlock.write_unlock(); + } + } + break; + } + if (got_lock) { + pair_touch(p); + *value = p->value_data; + r = 0; + } + } + ct->list.pair_unlock_by_fullhash(fullhash); + return r; +} + +//Used by flusher threads to possibly pin child on client thread if pinning is cheap +//Same as toku_cachetable_maybe_get_and_pin except that we don't care if the node is clean or dirty (return the node regardless). +//All other conditions remain the same. +int toku_cachetable_maybe_get_and_pin_clean (CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, pair_lock_type lock_type, void**value) { + CACHETABLE ct = cachefile->cachetable; + int r = -1; + ct->list.pair_lock_by_fullhash(fullhash); + PAIR p = ct->list.find_pair(cachefile, key, fullhash); + if (p) { + const bool lock_is_expensive = (lock_type == PL_WRITE_EXPENSIVE); + bool got_lock = false; + switch (lock_type) { + case PL_READ: + if (p->value_rwlock.try_read_lock()) { + got_lock = true; + } else if (!p->value_rwlock.read_lock_is_expensive()) { + p->value_rwlock.write_lock(lock_is_expensive); + got_lock = true; + } + if (got_lock) { + pair_touch(p); + } + pair_unlock(p); + break; + case PL_WRITE_CHEAP: + case PL_WRITE_EXPENSIVE: + if (p->value_rwlock.try_write_lock(lock_is_expensive)) { + got_lock = true; + } else if (!p->value_rwlock.write_lock_is_expensive()) { + p->value_rwlock.write_lock(lock_is_expensive); + got_lock = true; + } + if (got_lock) { + pair_touch(p); + } + pair_unlock(p); + if (got_lock) { + bool checkpoint_pending = get_checkpoint_pending(p, &ct->list); + write_locked_pair_for_checkpoint(ct, p, checkpoint_pending); + } + break; + } + if (got_lock) { + *value = p->value_data; + r = 0; + } + } else { + ct->list.pair_unlock_by_fullhash(fullhash); + } + return r; +} + +// +// internal function to unpin a PAIR. +// As of Clayface, this is may be called in two ways: +// - with flush false +// - with flush true +// The first is for when this is run during run_unlockers in +// toku_cachetable_get_and_pin_nonblocking, the second is during +// normal operations. Only during normal operations do we want to possibly +// induce evictions or sleep. +// +static int +cachetable_unpin_internal( + CACHEFILE cachefile, + PAIR p, + enum cachetable_dirty dirty, + PAIR_ATTR attr, + bool flush + ) +{ + invariant_notnull(p); + + CACHETABLE ct = cachefile->cachetable; + bool added_data_to_cachetable = false; + + // hack for #3969, only exists in case where we run unlockers + pair_lock(p); + PAIR_ATTR old_attr = p->attr; + PAIR_ATTR new_attr = attr; + if (dirty) { + p->dirty = CACHETABLE_DIRTY; + } + if (attr.is_valid) { + p->attr = attr; + } + bool read_lock_grabbed = p->value_rwlock.readers() != 0; + unpin_pair(p, read_lock_grabbed); + pair_unlock(p); + + if (attr.is_valid) { + if (new_attr.size > old_attr.size) { + added_data_to_cachetable = true; + } + ct->ev.change_pair_attr(old_attr, new_attr); + } + + // see comments above this function to understand this code + if (flush && added_data_to_cachetable) { + if (ct->ev.should_client_thread_sleep()) { + ct->ev.wait_for_cache_pressure_to_subside(); + } + if (ct->ev.should_client_wake_eviction_thread()) { + ct->ev.signal_eviction_thread(); + } + } + return 0; +} + +int toku_cachetable_unpin(CACHEFILE cachefile, PAIR p, enum cachetable_dirty dirty, PAIR_ATTR attr) { + return cachetable_unpin_internal(cachefile, p, dirty, attr, true); +} +int toku_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE cachefile, PAIR p, enum cachetable_dirty dirty, PAIR_ATTR attr) { + return cachetable_unpin_internal(cachefile, p, dirty, attr, false); +} + +static void +run_unlockers (UNLOCKERS unlockers) { + while (unlockers) { + assert(unlockers->locked); + unlockers->locked = false; + unlockers->f(unlockers->extra); + unlockers=unlockers->next; + } +} + +// +// This function tries to pin the pair without running the unlockers. +// If it can pin the pair cheaply, it does so, and returns 0. +// If the pin will be expensive, it runs unlockers, +// pins the pair, then releases the pin, +// and then returns TOKUDB_TRY_AGAIN +// +// on entry, pair mutex is held, +// on exit, pair mutex is NOT held +static int +maybe_pin_pair( + PAIR p, + pair_lock_type lock_type, + UNLOCKERS unlockers + ) +{ + int retval = 0; + bool expensive = (lock_type == PL_WRITE_EXPENSIVE); + + // we can pin the PAIR. In each case, we check to see + // if acquiring the pin is expensive. If so, we run the unlockers, set the + // retval to TOKUDB_TRY_AGAIN, pin AND release the PAIR. + // If not, then we pin the PAIR, keep retval at 0, and do not + // run the unlockers, as we intend to return the value to the user + if (lock_type == PL_READ) { + if (p->value_rwlock.read_lock_is_expensive()) { + pair_add_ref_unlocked(p); + pair_unlock(p); + run_unlockers(unlockers); + retval = TOKUDB_TRY_AGAIN; + pair_lock(p); + pair_release_ref_unlocked(p); + } + p->value_rwlock.read_lock(); + } + else if (lock_type == PL_WRITE_EXPENSIVE || lock_type == PL_WRITE_CHEAP){ + if (p->value_rwlock.write_lock_is_expensive()) { + pair_add_ref_unlocked(p); + pair_unlock(p); + run_unlockers(unlockers); + // change expensive to false because + // we will unpin the pair immedietely + // after pinning it + expensive = false; + retval = TOKUDB_TRY_AGAIN; + pair_lock(p); + pair_release_ref_unlocked(p); + } + p->value_rwlock.write_lock(expensive); + } + else { + abort(); + } + + if (retval == TOKUDB_TRY_AGAIN) { + unpin_pair(p, (lock_type == PL_READ)); + } + pair_touch(p); + pair_unlock(p); + return retval; +} + +int toku_cachetable_get_and_pin_nonblocking( + CACHEFILE cf, + CACHEKEY key, + uint32_t fullhash, + void**value, + long* UU(sizep), + CACHETABLE_WRITE_CALLBACK write_callback, + CACHETABLE_FETCH_CALLBACK fetch_callback, + CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + pair_lock_type lock_type, + void *read_extraargs, + UNLOCKERS unlockers + ) +// See cachetable/cachetable.h. +{ + CACHETABLE ct = cf->cachetable; + assert(lock_type == PL_READ || + lock_type == PL_WRITE_CHEAP || + lock_type == PL_WRITE_EXPENSIVE + ); +try_again: + ct->list.pair_lock_by_fullhash(fullhash); + PAIR p = ct->list.find_pair(cf, key, fullhash); + if (p == NULL) { + toku::context fetch_ctx(CTX_FULL_FETCH); + + // Not found + ct->list.pair_unlock_by_fullhash(fullhash); + ct->list.write_list_lock(); + ct->list.pair_lock_by_fullhash(fullhash); + p = ct->list.find_pair(cf, key, fullhash); + if (p != NULL) { + // we just did another search with the write list lock and + // found the pair this means that in between our + // releasing the read list lock and grabbing the write list lock, + // another thread snuck in and inserted the PAIR into + // the cachetable. For simplicity, we just return + // to the top and restart the function + ct->list.write_list_unlock(); + ct->list.pair_unlock_by_fullhash(fullhash); + goto try_again; + } + + p = cachetable_insert_at( + ct, + cf, + key, + zero_value, + fullhash, + zero_attr, + write_callback, + CACHETABLE_CLEAN + ); + assert(p); + // grab expensive write lock, because we are about to do a fetch + // off disk + // No one can access this pair because + // we hold the write list lock and we just injected + // the pair into the cachetable. Therefore, this lock acquisition + // will not block. + p->value_rwlock.write_lock(true); + pair_unlock(p); + run_unlockers(unlockers); // we hold the write list_lock. + ct->list.write_list_unlock(); + + // at this point, only the pair is pinned, + // and no pair mutex held, and + // no list lock is held + uint64_t t0 = get_tnow(); + cachetable_fetch_pair(ct, cf, p, fetch_callback, read_extraargs, false); + cachetable_miss++; + cachetable_misstime += get_tnow() - t0; + + if (ct->ev.should_client_thread_sleep()) { + ct->ev.wait_for_cache_pressure_to_subside(); + } + if (ct->ev.should_client_wake_eviction_thread()) { + ct->ev.signal_eviction_thread(); + } + + return TOKUDB_TRY_AGAIN; + } + else { + int r = maybe_pin_pair(p, lock_type, unlockers); + if (r == TOKUDB_TRY_AGAIN) { + return TOKUDB_TRY_AGAIN; + } + assert_zero(r); + + if (lock_type != PL_READ) { + bool checkpoint_pending = get_checkpoint_pending(p, &ct->list); + write_locked_pair_for_checkpoint(ct, p, checkpoint_pending); + } + + // At this point, we have pinned the PAIR + // and resolved its checkpointing. The pair's + // mutex is not held. The read list lock IS held. Before + // returning the PAIR to the user, we must + // still check for partial fetch + bool partial_fetch_required = pf_req_callback(p->value_data,read_extraargs); + if (partial_fetch_required) { + toku::context fetch_ctx(CTX_PARTIAL_FETCH); + + run_unlockers(unlockers); + + // we are now getting an expensive write lock, because we + // are doing a partial fetch. So, if we previously have + // either a read lock or a cheap write lock, we need to + // release and reacquire the correct lock type + if (lock_type == PL_READ) { + pair_lock(p); + p->value_rwlock.read_unlock(); + p->value_rwlock.write_lock(true); + pair_unlock(p); + } + else if (lock_type == PL_WRITE_CHEAP) { + pair_lock(p); + p->value_rwlock.write_unlock(); + p->value_rwlock.write_lock(true); + pair_unlock(p); + } + + // Now wait for the I/O to occur. + partial_fetch_required = pf_req_callback(p->value_data,read_extraargs); + if (partial_fetch_required) { + do_partial_fetch(ct, cf, p, pf_callback, read_extraargs, false); + } + else { + pair_lock(p); + p->value_rwlock.write_unlock(); + pair_unlock(p); + } + + if (ct->ev.should_client_thread_sleep()) { + ct->ev.wait_for_cache_pressure_to_subside(); + } + if (ct->ev.should_client_wake_eviction_thread()) { + ct->ev.signal_eviction_thread(); + } + + return TOKUDB_TRY_AGAIN; + } + else { + *value = p->value_data; + return 0; + } + } + // We should not get here. Above code should hit a return in all cases. + abort(); +} + +struct cachefile_prefetch_args { + PAIR p; + CACHETABLE_FETCH_CALLBACK fetch_callback; + void* read_extraargs; +}; + +struct cachefile_partial_prefetch_args { + PAIR p; + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback; + void *read_extraargs; +}; + +// Worker thread function to read a pair from a cachefile to memory +static void cachetable_reader(void* extra) { + struct cachefile_prefetch_args* cpargs = (struct cachefile_prefetch_args*)extra; + CACHEFILE cf = cpargs->p->cachefile; + CACHETABLE ct = cf->cachetable; + cachetable_fetch_pair( + ct, + cpargs->p->cachefile, + cpargs->p, + cpargs->fetch_callback, + cpargs->read_extraargs, + false + ); + bjm_remove_background_job(cf->bjm); + toku_free(cpargs); +} + +static void cachetable_partial_reader(void* extra) { + struct cachefile_partial_prefetch_args *cpargs = (struct cachefile_partial_prefetch_args*)extra; + CACHEFILE cf = cpargs->p->cachefile; + CACHETABLE ct = cf->cachetable; + do_partial_fetch(ct, cpargs->p->cachefile, cpargs->p, cpargs->pf_callback, cpargs->read_extraargs, false); + bjm_remove_background_job(cf->bjm); + toku_free(cpargs); +} + +int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, uint32_t fullhash, + CACHETABLE_WRITE_CALLBACK write_callback, + CACHETABLE_FETCH_CALLBACK fetch_callback, + CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + void *read_extraargs, + bool *doing_prefetch) +// Effect: See the documentation for this function in cachetable/cachetable.h +{ + int r = 0; + PAIR p = NULL; + if (doing_prefetch) { + *doing_prefetch = false; + } + CACHETABLE ct = cf->cachetable; + // if cachetable has too much data, don't bother prefetching + if (ct->ev.should_client_thread_sleep()) { + goto exit; + } + ct->list.pair_lock_by_fullhash(fullhash); + // lookup + p = ct->list.find_pair(cf, key, fullhash); + // if not found then create a pair and fetch it + if (p == NULL) { + cachetable_prefetches++; + ct->list.pair_unlock_by_fullhash(fullhash); + ct->list.write_list_lock(); + ct->list.pair_lock_by_fullhash(fullhash); + p = ct->list.find_pair(cf, key, fullhash); + if (p != NULL) { + ct->list.write_list_unlock(); + goto found_pair; + } + + r = bjm_add_background_job(cf->bjm); + assert_zero(r); + p = cachetable_insert_at( + ct, + cf, + key, + zero_value, + fullhash, + zero_attr, + write_callback, + CACHETABLE_CLEAN + ); + assert(p); + p->value_rwlock.write_lock(true); + pair_unlock(p); + ct->list.write_list_unlock(); + + struct cachefile_prefetch_args *MALLOC(cpargs); + cpargs->p = p; + cpargs->fetch_callback = fetch_callback; + cpargs->read_extraargs = read_extraargs; + toku_kibbutz_enq(ct->ct_kibbutz, cachetable_reader, cpargs); + if (doing_prefetch) { + *doing_prefetch = true; + } + goto exit; + } + +found_pair: + // at this point, p is found, pair's mutex is grabbed, and + // no list lock is held + // TODO(leif): should this also just go ahead and wait if all there + // are to wait for are readers? + if (p->value_rwlock.try_write_lock(true)) { + // nobody else is using the node, so we should go ahead and prefetch + pair_touch(p); + pair_unlock(p); + bool partial_fetch_required = pf_req_callback(p->value_data, read_extraargs); + + if (partial_fetch_required) { + r = bjm_add_background_job(cf->bjm); + assert_zero(r); + struct cachefile_partial_prefetch_args *MALLOC(cpargs); + cpargs->p = p; + cpargs->pf_callback = pf_callback; + cpargs->read_extraargs = read_extraargs; + toku_kibbutz_enq(ct->ct_kibbutz, cachetable_partial_reader, cpargs); + if (doing_prefetch) { + *doing_prefetch = true; + } + } + else { + pair_lock(p); + p->value_rwlock.write_unlock(); + pair_unlock(p); + } + } + else { + // Couldn't get the write lock cheaply + pair_unlock(p); + } +exit: + return 0; +} + +void toku_cachefile_verify (CACHEFILE cf) { + toku_cachetable_verify(cf->cachetable); +} + +void toku_cachetable_verify (CACHETABLE ct) { + ct->list.verify(); +} + + + +struct pair_flush_for_close{ + PAIR p; + BACKGROUND_JOB_MANAGER bjm; +}; + +static void cachetable_flush_pair_for_close(void* extra) { + struct pair_flush_for_close *CAST_FROM_VOIDP(args, extra); + PAIR p = args->p; + CACHEFILE cf = p->cachefile; + CACHETABLE ct = cf->cachetable; + PAIR_ATTR attr; + cachetable_only_write_locked_data( + &ct->ev, + p, + false, // not for a checkpoint, as we assert above + &attr, + false // not a clone + ); + p->dirty = CACHETABLE_CLEAN; + bjm_remove_background_job(args->bjm); + toku_free(args); +} + + +static void flush_pair_for_close_on_background_thread( + PAIR p, + BACKGROUND_JOB_MANAGER bjm, + CACHETABLE ct + ) +{ + pair_lock(p); + assert(p->value_rwlock.users() == 0); + assert(nb_mutex_users(&p->disk_nb_mutex) == 0); + assert(!p->cloned_value_data); + if (p->dirty == CACHETABLE_DIRTY) { + int r = bjm_add_background_job(bjm); + assert_zero(r); + struct pair_flush_for_close *XMALLOC(args); + args->p = p; + args->bjm = bjm; + toku_kibbutz_enq(ct->ct_kibbutz, cachetable_flush_pair_for_close, args); + } + pair_unlock(p); +} + +static void remove_pair_for_close(PAIR p, CACHETABLE ct, bool completely) { + pair_lock(p); + assert(p->value_rwlock.users() == 0); + assert(nb_mutex_users(&p->disk_nb_mutex) == 0); + assert(!p->cloned_value_data); + assert(p->dirty == CACHETABLE_CLEAN); + assert(p->refcount == 0); + if (completely) { + cachetable_remove_pair(&ct->list, &ct->ev, p); + pair_unlock(p); + // TODO: Eventually, we should not hold the write list lock during free + cachetable_free_pair(p); + } + else { + // if we are not evicting completely, + // we only want to remove the PAIR from the cachetable, + // that is, remove from the hashtable and various linked + // list, but we will keep the PAIRS and the linked list + // in the cachefile intact, as they will be cached away + // in case an open comes soon. + ct->list.evict_from_cachetable(p); + pair_unlock(p); + } +} + +// helper function for cachetable_flush_cachefile, which happens on a close +// writes out the dirty pairs on background threads and returns when +// the writing is done +static void write_dirty_pairs_for_close(CACHETABLE ct, CACHEFILE cf) { + BACKGROUND_JOB_MANAGER bjm = NULL; + bjm_init(&bjm); + ct->list.write_list_lock(); // TODO: (Zardosht), verify that this lock is unnecessary to take here + PAIR p = NULL; + // write out dirty PAIRs + uint32_t i; + if (cf) { + for (i = 0, p = cf->cf_head; + i < cf->num_pairs; + i++, p = p->cf_next) + { + flush_pair_for_close_on_background_thread(p, bjm, ct); + } + } + else { + for (i = 0, p = ct->list.m_checkpoint_head; + i < ct->list.m_n_in_table; + i++, p = p->clock_next) + { + flush_pair_for_close_on_background_thread(p, bjm, ct); + } + } + ct->list.write_list_unlock(); + bjm_wait_for_jobs_to_finish(bjm); + bjm_destroy(bjm); +} + +static void remove_all_pairs_for_close(CACHETABLE ct, CACHEFILE cf, bool evict_completely) { + ct->list.write_list_lock(); + if (cf) { + if (evict_completely) { + // if we are evicting completely, then the PAIRs will + // be removed from the linked list managed by the + // cachefile, so this while loop works + while (cf->num_pairs > 0) { + PAIR p = cf->cf_head; + remove_pair_for_close(p, ct, evict_completely); + } + } + else { + // on the other hand, if we are not evicting completely, + // then the cachefile's linked list stays intact, and we must + // iterate like this. + for (PAIR p = cf->cf_head; p; p = p->cf_next) { + remove_pair_for_close(p, ct, evict_completely); + } + } + } + else { + while (ct->list.m_n_in_table > 0) { + PAIR p = ct->list.m_checkpoint_head; + // if there is no cachefile, then we better + // be evicting completely because we have no + // cachefile to save the PAIRs to. At least, + // we have no guarantees that the cachefile + // will remain good + invariant(evict_completely); + remove_pair_for_close(p, ct, true); + } + } + ct->list.write_list_unlock(); +} + +static void verify_cachefile_flushed(CACHETABLE ct UU(), CACHEFILE cf UU()) { +#ifdef TOKU_DEBUG_PARANOID + // assert here that cachefile is flushed by checking + // pair_list and finding no pairs belonging to this cachefile + // Make a list of pairs that belong to this cachefile. + if (cf) { + ct->list.write_list_lock(); + // assert here that cachefile is flushed by checking + // pair_list and finding no pairs belonging to this cachefile + // Make a list of pairs that belong to this cachefile. + uint32_t i; + PAIR p = NULL; + for (i = 0, p = ct->list.m_checkpoint_head; + i < ct->list.m_n_in_table; + i++, p = p->clock_next) + { + assert(p->cachefile != cf); + } + ct->list.write_list_unlock(); + } +#endif +} + +// Flush (write to disk) all of the pairs that belong to a cachefile (or all pairs if +// the cachefile is NULL. +// Must be holding cachetable lock on entry. +// +// This function assumes that no client thread is accessing or +// trying to access the cachefile while this function is executing. +// This implies no client thread will be trying to lock any nodes +// belonging to the cachefile. +// +// This function also assumes that the cachefile is not in the process +// of being used by a checkpoint. If a checkpoint is currently happening, +// it does NOT include this cachefile. +// +static void cachetable_flush_cachefile(CACHETABLE ct, CACHEFILE cf, bool evict_completely) { + // + // Because work on a kibbutz is always done by the client thread, + // and this function assumes that no client thread is doing any work + // on the cachefile, we assume that no client thread will be adding jobs + // to this cachefile's kibbutz. + // + // The caller of this function must ensure that there are + // no jobs added to the kibbutz. This implies that the only work other + // threads may be doing is work by the writer threads. + // + // first write out dirty PAIRs + write_dirty_pairs_for_close(ct, cf); + + // now that everything is clean, get rid of everything + remove_all_pairs_for_close(ct, cf, evict_completely); + + verify_cachefile_flushed(ct, cf); +} + +/* Requires that no locks be held that are used by the checkpoint logic */ +void +toku_cachetable_minicron_shutdown(CACHETABLE ct) { + int r = ct->cp.shutdown(); + assert(r==0); + ct->cl.destroy(); +} + +void toku_cachetable_prepare_close(CACHETABLE ct UU()) { + extern bool toku_serialize_in_parallel; + toku_serialize_in_parallel = true; +} + +/* Requires that it all be flushed. */ +void toku_cachetable_close (CACHETABLE *ctp) { + CACHETABLE ct = *ctp; + ct->cp.destroy(); + ct->cl.destroy(); + ct->cf_list.free_stale_data(&ct->ev); + cachetable_flush_cachefile(ct, NULL, true); + ct->ev.destroy(); + ct->list.destroy(); + ct->cf_list.destroy(); + + if (ct->client_kibbutz) + toku_kibbutz_destroy(ct->client_kibbutz); + if (ct->ct_kibbutz) + toku_kibbutz_destroy(ct->ct_kibbutz); + if (ct->checkpointing_kibbutz) + toku_kibbutz_destroy(ct->checkpointing_kibbutz); + toku_free(ct->env_dir); + toku_free(ct); + *ctp = 0; +} + +static PAIR test_get_pair(CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, bool have_ct_lock) { + CACHETABLE ct = cachefile->cachetable; + + if (!have_ct_lock) { + ct->list.read_list_lock(); + } + + PAIR p = ct->list.find_pair(cachefile, key, fullhash); + assert(p != NULL); + if (!have_ct_lock) { + ct->list.read_list_unlock(); + } + return p; +} + +//test-only wrapper +int toku_test_cachetable_unpin(CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR attr) { + // By default we don't have the lock + PAIR p = test_get_pair(cachefile, key, fullhash, false); + return toku_cachetable_unpin(cachefile, p, dirty, attr); // assume read lock is not grabbed, and that it is a write lock +} + +//test-only wrapper +int toku_test_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR attr) { + // We hold the cachetable mutex. + PAIR p = test_get_pair(cachefile, key, fullhash, true); + return toku_cachetable_unpin_ct_prelocked_no_flush(cachefile, p, dirty, attr); +} + +//test-only wrapper +int toku_test_cachetable_unpin_and_remove ( + CACHEFILE cachefile, + CACHEKEY key, + CACHETABLE_REMOVE_KEY remove_key, + void* remove_key_extra) +{ + uint32_t fullhash = toku_cachetable_hash(cachefile, key); + PAIR p = test_get_pair(cachefile, key, fullhash, false); + return toku_cachetable_unpin_and_remove(cachefile, p, remove_key, remove_key_extra); +} + +int toku_cachetable_unpin_and_remove ( + CACHEFILE cachefile, + PAIR p, + CACHETABLE_REMOVE_KEY remove_key, + void* remove_key_extra + ) +{ + invariant_notnull(p); + int r = ENOENT; + CACHETABLE ct = cachefile->cachetable; + + p->dirty = CACHETABLE_CLEAN; // clear the dirty bit. We're just supposed to remove it. + // grab disk_nb_mutex to ensure any background thread writing + // out a cloned value completes + pair_lock(p); + assert(p->value_rwlock.writers()); + nb_mutex_lock(&p->disk_nb_mutex, p->mutex); + pair_unlock(p); + assert(p->cloned_value_data == NULL); + + // + // take care of key removal + // + ct->list.write_list_lock(); + ct->list.read_pending_cheap_lock(); + bool for_checkpoint = p->checkpoint_pending; + // now let's wipe out the pending bit, because we are + // removing the PAIR + p->checkpoint_pending = false; + + // For the PAIR to not be picked by the + // cleaner thread, we mark the cachepressure_size to be 0 + // (This is redundant since we have the write_list_lock) + // This should not be an issue because we call + // cachetable_remove_pair before + // releasing the cachetable lock. + // + CACHEKEY key_to_remove = p->key; + p->attr.cache_pressure_size = 0; + // + // callback for removing the key + // for FTNODEs, this leads to calling + // toku_free_blocknum + // + if (remove_key) { + remove_key( + &key_to_remove, + for_checkpoint, + remove_key_extra + ); + } + ct->list.read_pending_cheap_unlock(); + + pair_lock(p); + p->value_rwlock.write_unlock(); + nb_mutex_unlock(&p->disk_nb_mutex); + // + // As of Clayface (6.5), only these threads may be + // blocked waiting to lock this PAIR: + // - the checkpoint thread (because a checkpoint is in progress + // and the PAIR was in the list of pending pairs) + // - a client thread running get_and_pin_nonblocking, who + // ran unlockers, then waited on the PAIR lock. + // While waiting on a PAIR lock, another thread comes in, + // locks the PAIR, and ends up calling unpin_and_remove, + // all while get_and_pin_nonblocking is waiting on the PAIR lock. + // We did not realize this at first, which caused bug #4357 + // The following threads CANNOT be blocked waiting on + // the PAIR lock: + // - a thread trying to run eviction via run_eviction. + // That cannot happen because run_eviction only + // attempts to lock PAIRS that are not locked, and this PAIR + // is locked. + // - cleaner thread, for the same reason as a thread running + // eviction + // - client thread doing a normal get_and_pin. The client is smart + // enough to not try to lock a PAIR that another client thread + // is trying to unpin and remove. Note that this includes work + // done on kibbutzes. + // - writer thread. Writer threads do not grab PAIR locks. They + // get PAIR locks transferred to them by client threads. + // + + // first thing we do is remove the PAIR from the various + // cachetable data structures, so no other thread can possibly + // access it. We do not want to risk some other thread + // trying to lock this PAIR if we release the write list lock + // below. If some thread is already waiting on the lock, + // then we let that thread grab the lock and finish, but + // we don't want any NEW threads to try to grab the PAIR + // lock. + // + // Because we call cachetable_remove_pair and wait, + // the threads that may be waiting + // on this PAIR lock must be careful to do NOTHING with the PAIR + // As per our analysis above, we only need + // to make sure the checkpoint thread and get_and_pin_nonblocking do + // nothing, and looking at those functions, it is clear they do nothing. + // + cachetable_remove_pair(&ct->list, &ct->ev, p); + ct->list.write_list_unlock(); + if (p->refcount > 0) { + pair_wait_for_ref_release_unlocked(p); + } + if (p->value_rwlock.users() > 0) { + // Need to wait for everyone else to leave + // This write lock will be granted only after all waiting + // threads are done. + p->value_rwlock.write_lock(true); + assert(p->refcount == 0); + assert(p->value_rwlock.users() == 1); // us + assert(!p->checkpoint_pending); + assert(p->attr.cache_pressure_size == 0); + p->value_rwlock.write_unlock(); + } + // just a sanity check + assert(nb_mutex_users(&p->disk_nb_mutex) == 0); + assert(p->cloned_value_data == NULL); + //Remove pair. + pair_unlock(p); + cachetable_free_pair(p); + r = 0; + return r; +} + +int set_filenum_in_array(const FT &ft, const uint32_t index, FILENUM *const array); +int set_filenum_in_array(const FT &ft, const uint32_t index, FILENUM *const array) { + array[index] = toku_cachefile_filenum(ft->cf); + return 0; +} + +static int log_open_txn (TOKUTXN txn, void* extra) { + int r; + checkpointer* cp = (checkpointer *)extra; + TOKULOGGER logger = txn->logger; + FILENUMS open_filenums; + uint32_t num_filenums = txn->open_fts.size(); + FILENUM array[num_filenums]; + if (toku_txn_is_read_only(txn)) { + goto cleanup; + } + else { + cp->increment_num_txns(); + } + + open_filenums.num = num_filenums; + open_filenums.filenums = array; + //Fill in open_filenums + r = txn->open_fts.iterate(array); + invariant(r==0); + switch (toku_txn_get_state(txn)) { + case TOKUTXN_LIVE:{ + toku_log_xstillopen(logger, NULL, 0, txn, + toku_txn_get_txnid(txn), + toku_txn_get_txnid(toku_logger_txn_parent(txn)), + txn->roll_info.rollentry_raw_count, + open_filenums, + txn->force_fsync_on_commit, + txn->roll_info.num_rollback_nodes, + txn->roll_info.num_rollentries, + txn->roll_info.spilled_rollback_head, + txn->roll_info.spilled_rollback_tail, + txn->roll_info.current_rollback); + goto cleanup; + } + case TOKUTXN_PREPARING: { + TOKU_XA_XID xa_xid; + toku_txn_get_prepared_xa_xid(txn, &xa_xid); + toku_log_xstillopenprepared(logger, NULL, 0, txn, + toku_txn_get_txnid(txn), + &xa_xid, + txn->roll_info.rollentry_raw_count, + open_filenums, + txn->force_fsync_on_commit, + txn->roll_info.num_rollback_nodes, + txn->roll_info.num_rollentries, + txn->roll_info.spilled_rollback_head, + txn->roll_info.spilled_rollback_tail, + txn->roll_info.current_rollback); + goto cleanup; + } + case TOKUTXN_RETIRED: + case TOKUTXN_COMMITTING: + case TOKUTXN_ABORTING: { + assert(0); + } + } + // default is an error + assert(0); +cleanup: + return 0; +} + +// Requires: All three checkpoint-relevant locks must be held (see checkpoint.c). +// Algorithm: Write a checkpoint record to the log, noting the LSN of that record. +// Use the begin_checkpoint callback to take necessary snapshots (header, btt) +// Mark every dirty node as "pending." ("Pending" means that the node must be +// written to disk before it can be modified.) +void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, TOKULOGGER UU(logger)) { + cp->begin_checkpoint(); +} + + +// This is used by the cachetable_race test. +static volatile int toku_checkpointing_user_data_status = 0; +static void toku_cachetable_set_checkpointing_user_data_status (int v) { + toku_checkpointing_user_data_status = v; +} +int toku_cachetable_get_checkpointing_user_data_status (void) { + return toku_checkpointing_user_data_status; +} + +// Requires: The big checkpoint lock must be held (see checkpoint.c). +// Algorithm: Write all pending nodes to disk +// Use checkpoint callback to write snapshot information to disk (header, btt) +// Use end_checkpoint callback to fsync dictionary and log, and to free unused blocks +// Note: If testcallback is null (for testing purposes only), call it after writing dictionary but before writing log +void toku_cachetable_end_checkpoint(CHECKPOINTER cp, TOKULOGGER UU(logger), + void (*testcallback_f)(void*), void* testextra) { + cp->end_checkpoint(testcallback_f, testextra); +} + +TOKULOGGER toku_cachefile_logger (CACHEFILE cf) { + return cf->cachetable->cp.get_logger(); +} + +FILENUM toku_cachefile_filenum (CACHEFILE cf) { + return cf->filenum; +} + +// debug functions + +int toku_cachetable_assert_all_unpinned (CACHETABLE ct) { + uint32_t i; + int some_pinned=0; + ct->list.read_list_lock(); + for (i=0; ilist.m_table_size; i++) { + PAIR p; + for (p=ct->list.m_table[i]; p; p=p->hash_chain) { + pair_lock(p); + if (p->value_rwlock.users()) { + //printf("%s:%d pinned: %" PRId64 " (%p)\n", __FILE__, __LINE__, p->key.b, p->value_data); + some_pinned=1; + } + pair_unlock(p); + } + } + ct->list.read_list_unlock(); + return some_pinned; +} + +int toku_cachefile_count_pinned (CACHEFILE cf, int print_them) { + assert(cf != NULL); + int n_pinned=0; + CACHETABLE ct = cf->cachetable; + ct->list.read_list_lock(); + + // Iterate over all the pairs to find pairs specific to the + // given cachefile. + for (uint32_t i = 0; i < ct->list.m_table_size; i++) { + for (PAIR p = ct->list.m_table[i]; p; p = p->hash_chain) { + if (p->cachefile == cf) { + pair_lock(p); + if (p->value_rwlock.users()) { + if (print_them) { + printf("%s:%d pinned: %" PRId64 " (%p)\n", + __FILE__, + __LINE__, + p->key.b, + p->value_data); + } + n_pinned++; + } + pair_unlock(p); + } + } + } + + ct->list.read_list_unlock(); + return n_pinned; +} + +void toku_cachetable_print_state (CACHETABLE ct) { + uint32_t i; + ct->list.read_list_lock(); + for (i=0; ilist.m_table_size; i++) { + PAIR p = ct->list.m_table[i]; + if (p != 0) { + pair_lock(p); + printf("t[%u]=", i); + for (p=ct->list.m_table[i]; p; p=p->hash_chain) { + printf(" {%" PRId64 ", %p, dirty=%d, pin=%d, size=%ld}", p->key.b, p->cachefile, (int) p->dirty, p->value_rwlock.users(), p->attr.size); + } + printf("\n"); + pair_unlock(p); + } + } + ct->list.read_list_unlock(); +} + +void toku_cachetable_get_state (CACHETABLE ct, int *num_entries_ptr, int *hash_size_ptr, long *size_current_ptr, long *size_limit_ptr) { + ct->list.get_state(num_entries_ptr, hash_size_ptr); + ct->ev.get_state(size_current_ptr, size_limit_ptr); +} + +int toku_cachetable_get_key_state (CACHETABLE ct, CACHEKEY key, CACHEFILE cf, void **value_ptr, + int *dirty_ptr, long long *pin_ptr, long *size_ptr) { + int r = -1; + uint32_t fullhash = toku_cachetable_hash(cf, key); + ct->list.read_list_lock(); + PAIR p = ct->list.find_pair(cf, key, fullhash); + if (p) { + pair_lock(p); + if (value_ptr) + *value_ptr = p->value_data; + if (dirty_ptr) + *dirty_ptr = p->dirty; + if (pin_ptr) + *pin_ptr = p->value_rwlock.users(); + if (size_ptr) + *size_ptr = p->attr.size; + r = 0; + pair_unlock(p); + } + ct->list.read_list_unlock(); + return r; +} + +void +toku_cachefile_set_userdata (CACHEFILE cf, + void *userdata, + void (*log_fassociate_during_checkpoint)(CACHEFILE, void*), + void (*close_userdata)(CACHEFILE, int, void*, bool, LSN), + void (*free_userdata)(CACHEFILE, void*), + void (*checkpoint_userdata)(CACHEFILE, int, void*), + void (*begin_checkpoint_userdata)(LSN, void*), + void (*end_checkpoint_userdata)(CACHEFILE, int, void*), + void (*note_pin_by_checkpoint)(CACHEFILE, void*), + void (*note_unpin_by_checkpoint)(CACHEFILE, void*)) { + cf->userdata = userdata; + cf->log_fassociate_during_checkpoint = log_fassociate_during_checkpoint; + cf->close_userdata = close_userdata; + cf->free_userdata = free_userdata; + cf->checkpoint_userdata = checkpoint_userdata; + cf->begin_checkpoint_userdata = begin_checkpoint_userdata; + cf->end_checkpoint_userdata = end_checkpoint_userdata; + cf->note_pin_by_checkpoint = note_pin_by_checkpoint; + cf->note_unpin_by_checkpoint = note_unpin_by_checkpoint; +} + +void *toku_cachefile_get_userdata(CACHEFILE cf) { + return cf->userdata; +} + +CACHETABLE +toku_cachefile_get_cachetable(CACHEFILE cf) { + return cf->cachetable; +} + +//Only called by ft_end_checkpoint +//Must have access to cf->fd (must be protected) +void toku_cachefile_fsync(CACHEFILE cf) { + toku_file_fsync(cf->fd); +} + +// Make it so when the cachefile closes, the underlying file is unlinked +void toku_cachefile_unlink_on_close(CACHEFILE cf) { + assert(!cf->unlink_on_close); + cf->unlink_on_close = true; +} + +// is this cachefile marked as unlink on close? +bool toku_cachefile_is_unlink_on_close(CACHEFILE cf) { + return cf->unlink_on_close; +} + +uint64_t toku_cachefile_size(CACHEFILE cf) { + int64_t file_size; + int fd = toku_cachefile_get_fd(cf); + int r = toku_os_get_file_size(fd, &file_size); + assert_zero(r); + return file_size; +} + +char * +toku_construct_full_name(int count, ...) { + va_list ap; + char *name = NULL; + size_t n = 0; + int i; + va_start(ap, count); + for (i=0; ienv_dir, fname_in_env); +} + +static long +cleaner_thread_rate_pair(PAIR p) +{ + return p->attr.cache_pressure_size; +} + +static int const CLEANER_N_TO_CHECK = 8; + +int toku_cleaner_thread_for_test (CACHETABLE ct) { + return ct->cl.run_cleaner(); +} + +int toku_cleaner_thread (void *cleaner_v) { + cleaner* cl = (cleaner *) cleaner_v; + assert(cl); + return cl->run_cleaner(); +} + +///////////////////////////////////////////////////////////////////////// +// +// cleaner methods +// +ENSURE_POD(cleaner); + +int cleaner::init(uint32_t _cleaner_iterations, pair_list* _pl, CACHETABLE _ct) { + // default is no cleaner, for now + m_cleaner_cron_init = false; + int r = toku_minicron_setup(&m_cleaner_cron, 0, toku_cleaner_thread, this); + if (r == 0) { + m_cleaner_cron_init = true; + } + TOKU_VALGRIND_HG_DISABLE_CHECKING(&m_cleaner_iterations, sizeof m_cleaner_iterations); + m_cleaner_iterations = _cleaner_iterations; + m_pl = _pl; + m_ct = _ct; + m_cleaner_init = true; + return r; +} + +// this function is allowed to be called multiple times +void cleaner::destroy(void) { + if (!m_cleaner_init) { + return; + } + if (m_cleaner_cron_init && !toku_minicron_has_been_shutdown(&m_cleaner_cron)) { + // for test code only, production code uses toku_cachetable_minicron_shutdown() + int r = toku_minicron_shutdown(&m_cleaner_cron); + assert(r==0); + } +} + +uint32_t cleaner::get_iterations(void) { + return m_cleaner_iterations; +} + +void cleaner::set_iterations(uint32_t new_iterations) { + m_cleaner_iterations = new_iterations; +} + +uint32_t cleaner::get_period_unlocked(void) { + return toku_minicron_get_period_in_seconds_unlocked(&m_cleaner_cron); +} + +// +// Sets how often the cleaner thread will run, in seconds +// +void cleaner::set_period(uint32_t new_period) { + toku_minicron_change_period(&m_cleaner_cron, new_period*1000); +} + +// Effect: runs a cleaner. +// +// We look through some number of nodes, the first N that we see which are +// unlocked and are not involved in a cachefile flush, pick one, and call +// the cleaner callback. While we're picking a node, we have the +// cachetable lock the whole time, so we don't need any extra +// synchronization. Once we have one we want, we lock it and notify the +// cachefile that we're doing some background work (so a flush won't +// start). At this point, we can safely unlock the cachetable, do the +// work (callback), and unlock/release our claim to the cachefile. +int cleaner::run_cleaner(void) { + toku::context cleaner_ctx(CTX_CLEANER); + + int r; + uint32_t num_iterations = this->get_iterations(); + for (uint32_t i = 0; i < num_iterations; ++i) { + cleaner_executions++; + m_pl->read_list_lock(); + PAIR best_pair = NULL; + int n_seen = 0; + long best_score = 0; + const PAIR first_pair = m_pl->m_cleaner_head; + if (first_pair == NULL) { + // nothing in the cachetable, just get out now + m_pl->read_list_unlock(); + break; + } + // here we select a PAIR for cleaning + // look at some number of PAIRS, and + // pick what we think is the best one for cleaning + //***** IMPORTANT ****** + // we MUST not pick a PAIR whose rating is 0. We have + // numerous assumptions in other parts of the code that + // this is the case: + // - this is how rollback nodes and leaf nodes are not selected for cleaning + // - this is how a thread that is calling unpin_and_remove will prevent + // the cleaner thread from picking its PAIR (see comments in that function) + do { + // + // We are already holding onto best_pair, if we run across a pair that + // has the same mutex due to a collision in the hashtable, we need + // to be careful. + // + if (best_pair && m_pl->m_cleaner_head->mutex == best_pair->mutex) { + // Advance the cleaner head. + long score = 0; + // only bother with this pair if it has no current users + if (m_pl->m_cleaner_head->value_rwlock.users() == 0) { + score = cleaner_thread_rate_pair(m_pl->m_cleaner_head); + if (score > best_score) { + best_score = score; + best_pair = m_pl->m_cleaner_head; + } + } + m_pl->m_cleaner_head = m_pl->m_cleaner_head->clock_next; + continue; + } + pair_lock(m_pl->m_cleaner_head); + if (m_pl->m_cleaner_head->value_rwlock.users() > 0) { + pair_unlock(m_pl->m_cleaner_head); + } + else { + n_seen++; + long score = 0; + score = cleaner_thread_rate_pair(m_pl->m_cleaner_head); + if (score > best_score) { + best_score = score; + // Since we found a new best pair, we need to + // free the old best pair. + if (best_pair) { + pair_unlock(best_pair); + } + best_pair = m_pl->m_cleaner_head; + } + else { + pair_unlock(m_pl->m_cleaner_head); + } + } + // Advance the cleaner head. + m_pl->m_cleaner_head = m_pl->m_cleaner_head->clock_next; + } while (m_pl->m_cleaner_head != first_pair && n_seen < CLEANER_N_TO_CHECK); + m_pl->read_list_unlock(); + + // + // at this point, if we have found a PAIR for cleaning, + // that is, best_pair != NULL, we do the clean + // + // if best_pair !=NULL, then best_pair->mutex is held + // no list lock is held + // + if (best_pair) { + CACHEFILE cf = best_pair->cachefile; + // try to add a background job to the manager + // if we can't, that means the cachefile is flushing, so + // we simply continue the for loop and this iteration + // becomes a no-op + r = bjm_add_background_job(cf->bjm); + if (r) { + pair_unlock(best_pair); + continue; + } + best_pair->value_rwlock.write_lock(true); + pair_unlock(best_pair); + // verify a key assumption. + assert(cleaner_thread_rate_pair(best_pair) > 0); + // check the checkpoint_pending bit + m_pl->read_pending_cheap_lock(); + bool checkpoint_pending = best_pair->checkpoint_pending; + best_pair->checkpoint_pending = false; + m_pl->read_pending_cheap_unlock(); + if (checkpoint_pending) { + write_locked_pair_for_checkpoint(m_ct, best_pair, true); + } + + bool cleaner_callback_called = false; + + // it's theoretically possible that after writing a PAIR for checkpoint, the + // PAIR's heuristic tells us nothing needs to be done. It is not possible + // in Dr. Noga, but unit tests verify this behavior works properly. + if (cleaner_thread_rate_pair(best_pair) > 0) { + r = best_pair->cleaner_callback(best_pair->value_data, + best_pair->key, + best_pair->fullhash, + best_pair->write_extraargs); + assert_zero(r); + cleaner_callback_called = true; + } + + // The cleaner callback must have unlocked the pair, so we + // don't need to unlock it if the cleaner callback is called. + if (!cleaner_callback_called) { + pair_lock(best_pair); + best_pair->value_rwlock.write_unlock(); + pair_unlock(best_pair); + } + // We need to make sure the cachefile sticks around so a close + // can't come destroy it. That's the purpose of this + // "add/remove_background_job" business, which means the + // cachefile is still valid here, even though the cleaner + // callback unlocks the pair. + bjm_remove_background_job(cf->bjm); + } + else { + // If we didn't find anything this time around the cachetable, + // we probably won't find anything if we run around again, so + // just break out from the for-loop now and + // we'll try again when the cleaner thread runs again. + break; + } + } + return 0; +} + +static_assert(std::is_pod::value, "pair_list isn't POD"); + +const uint32_t INITIAL_PAIR_LIST_SIZE = 1<<20; +uint32_t PAIR_LOCK_SIZE = 1<<20; + +void toku_pair_list_set_lock_size(uint32_t num_locks) { + PAIR_LOCK_SIZE = num_locks; +} + +static void evict_pair_from_cachefile(PAIR p) { + CACHEFILE cf = p->cachefile; + if (p->cf_next) { + p->cf_next->cf_prev = p->cf_prev; + } + if (p->cf_prev) { + p->cf_prev->cf_next = p->cf_next; + } + else if (p->cachefile->cf_head == p) { + cf->cf_head = p->cf_next; + } + p->cf_prev = p->cf_next = NULL; + cf->num_pairs--; +} + +// Allocates the hash table of pairs inside this pair list. +// +void pair_list::init() { + m_table_size = INITIAL_PAIR_LIST_SIZE; + m_num_locks = PAIR_LOCK_SIZE; + m_n_in_table = 0; + m_clock_head = NULL; + m_cleaner_head = NULL; + m_checkpoint_head = NULL; + m_pending_head = NULL; + m_table = NULL; + + + pthread_rwlockattr_t attr; + pthread_rwlockattr_init(&attr); +#if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP) + pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); +#else + // TODO: need to figure out how to make writer-preferential rwlocks + // happen on osx +#endif + toku_pthread_rwlock_init(&m_list_lock, &attr); + toku_pthread_rwlock_init(&m_pending_lock_expensive, &attr); + toku_pthread_rwlock_init(&m_pending_lock_cheap, &attr); + XCALLOC_N(m_table_size, m_table); + XCALLOC_N(m_num_locks, m_mutexes); + for (uint64_t i = 0; i < m_num_locks; i++) { + toku_mutex_init(&m_mutexes[i].aligned_mutex, NULL); + } +} + +// Frees the pair_list hash table. It is expected to be empty by +// the time this is called. Returns an error if there are any +// pairs in any of the hash table slots. +void pair_list::destroy() { + // Check if any entries exist in the hash table. + for (uint32_t i = 0; i < m_table_size; ++i) { + invariant_null(m_table[i]); + } + for (uint64_t i = 0; i < m_num_locks; i++) { + toku_mutex_destroy(&m_mutexes[i].aligned_mutex); + } + toku_pthread_rwlock_destroy(&m_list_lock); + toku_pthread_rwlock_destroy(&m_pending_lock_expensive); + toku_pthread_rwlock_destroy(&m_pending_lock_cheap); + toku_free(m_table); + toku_free(m_mutexes); +} + +// adds a PAIR to the cachetable's structures, +// but does NOT add it to the list maintained by +// the cachefile +void pair_list::add_to_cachetable_only(PAIR p) { + // sanity check to make sure that the PAIR does not already exist + PAIR pp = this->find_pair(p->cachefile, p->key, p->fullhash); + assert(pp == NULL); + + this->add_to_clock(p); + this->add_to_hash_chain(p); + m_n_in_table++; +} + +// This places the given pair inside of the pair list. +// +// requires caller to have grabbed write lock on list. +// requires caller to have p->mutex held as well +// +void pair_list::put(PAIR p) { + this->add_to_cachetable_only(p); + this->add_to_cf_list(p); +} + +// This removes the given pair from completely from the pair list. +// +// requires caller to have grabbed write lock on list, and p->mutex held +// +void pair_list::evict_completely(PAIR p) { + this->evict_from_cachetable(p); + this->evict_from_cachefile(p); +} + +// Removes the PAIR from the cachetable's lists, +// but does NOT impact the list maintained by the cachefile +void pair_list::evict_from_cachetable(PAIR p) { + this->pair_remove(p); + this->pending_pairs_remove(p); + this->remove_from_hash_chain(p); + + assert(m_n_in_table > 0); + m_n_in_table--; +} + +// Removes the PAIR from the cachefile's list of PAIRs +void pair_list::evict_from_cachefile(PAIR p) { + evict_pair_from_cachefile(p); +} + +// +// Remove pair from linked list for cleaner/clock +// +// +// requires caller to have grabbed write lock on list. +// +void pair_list::pair_remove (PAIR p) { + if (p->clock_prev == p) { + invariant(m_clock_head == p); + invariant(p->clock_next == p); + invariant(m_cleaner_head == p); + invariant(m_checkpoint_head == p); + m_clock_head = NULL; + m_cleaner_head = NULL; + m_checkpoint_head = NULL; + } + else { + if (p == m_clock_head) { + m_clock_head = m_clock_head->clock_next; + } + if (p == m_cleaner_head) { + m_cleaner_head = m_cleaner_head->clock_next; + } + if (p == m_checkpoint_head) { + m_checkpoint_head = m_checkpoint_head->clock_next; + } + p->clock_prev->clock_next = p->clock_next; + p->clock_next->clock_prev = p->clock_prev; + } + p->clock_prev = p->clock_next = NULL; +} + +//Remove a pair from the list of pairs that were marked with the +//pending bit for the in-progress checkpoint. +// +// requires that if the caller is the checkpoint thread, then a read lock +// is grabbed on the list. Otherwise, must have write lock on list. +// +void pair_list::pending_pairs_remove (PAIR p) { + if (p->pending_next) { + p->pending_next->pending_prev = p->pending_prev; + } + if (p->pending_prev) { + p->pending_prev->pending_next = p->pending_next; + } + else if (m_pending_head==p) { + m_pending_head = p->pending_next; + } + p->pending_prev = p->pending_next = NULL; +} + +void pair_list::remove_from_hash_chain(PAIR p) { + // Remove it from the hash chain. + unsigned int h = p->fullhash&(m_table_size - 1); + paranoid_invariant(m_table[h] != NULL); + if (m_table[h] == p) { + m_table[h] = p->hash_chain; + } + else { + PAIR curr = m_table[h]; + while (curr->hash_chain != p) { + curr = curr->hash_chain; + } + // remove p from the singular linked list + curr->hash_chain = p->hash_chain; + } + p->hash_chain = NULL; +} + +// Returns a pair from the pair list, using the given +// pair. If the pair cannot be found, null is returned. +// +// requires caller to have grabbed either a read lock on the list or +// bucket's mutex. +// +PAIR pair_list::find_pair(CACHEFILE file, CACHEKEY key, uint32_t fullhash) { + PAIR found_pair = nullptr; + for (PAIR p = m_table[fullhash&(m_table_size - 1)]; p; p = p->hash_chain) { + if (p->key.b == key.b && p->cachefile == file) { + found_pair = p; + break; + } + } + return found_pair; +} + +// Add PAIR to linked list shared by cleaner thread and clock +// +// requires caller to have grabbed write lock on list. +// +void pair_list::add_to_clock (PAIR p) { + // requires that p is not currently in the table. + // inserts p into the clock list at the tail. + + p->count = CLOCK_INITIAL_COUNT; + //assert either both head and tail are set or they are both NULL + // tail and head exist + if (m_clock_head) { + assert(m_cleaner_head); + assert(m_checkpoint_head); + // insert right before the head + p->clock_next = m_clock_head; + p->clock_prev = m_clock_head->clock_prev; + + p->clock_prev->clock_next = p; + p->clock_next->clock_prev = p; + + } + // this is the first element in the list + else { + m_clock_head = p; + p->clock_next = p->clock_prev = m_clock_head; + m_cleaner_head = p; + m_checkpoint_head = p; + } +} + +// add the pair to the linked list that of PAIRs belonging +// to the same cachefile. This linked list is used +// in cachetable_flush_cachefile. +void pair_list::add_to_cf_list(PAIR p) { + CACHEFILE cf = p->cachefile; + if (cf->cf_head) { + cf->cf_head->cf_prev = p; + } + p->cf_next = cf->cf_head; + p->cf_prev = NULL; + cf->cf_head = p; + cf->num_pairs++; +} + +// Add PAIR to the hashtable +// +// requires caller to have grabbed write lock on list +// and to have grabbed the p->mutex. +void pair_list::add_to_hash_chain(PAIR p) { + uint32_t h = p->fullhash & (m_table_size - 1); + p->hash_chain = m_table[h]; + m_table[h] = p; +} + +// test function +// +// grabs and releases write list lock +// +void pair_list::verify() { + this->write_list_lock(); + uint32_t num_found = 0; + + // First clear all the verify flags by going through the hash chains + { + uint32_t i; + for (i = 0; i < m_table_size; i++) { + PAIR p; + for (p = m_table[i]; p; p = p->hash_chain) { + num_found++; + } + } + } + assert(num_found == m_n_in_table); + num_found = 0; + // Now go through the clock chain, make sure everything in the LRU chain is hashed. + { + PAIR p; + bool is_first = true; + for (p = m_clock_head; m_clock_head != NULL && (p != m_clock_head || is_first); p=p->clock_next) { + is_first=false; + PAIR p2; + uint32_t fullhash = p->fullhash; + //assert(fullhash==toku_cachetable_hash(p->cachefile, p->key)); + for (p2 = m_table[fullhash&(m_table_size-1)]; p2; p2=p2->hash_chain) { + if (p2==p) { + /* found it */ + num_found++; + goto next; + } + } + fprintf(stderr, "Something in the clock chain is not hashed\n"); + assert(0); + next:; + } + assert (num_found == m_n_in_table); + } + this->write_list_unlock(); +} + +// If given pointers are not null, assign the hash table size of +// this pair list and the number of pairs in this pair list. +// +// +// grabs and releases read list lock +// +void pair_list::get_state(int *num_entries, int *hash_size) { + this->read_list_lock(); + if (num_entries) { + *num_entries = m_n_in_table; + } + if (hash_size) { + *hash_size = m_table_size; + } + this->read_list_unlock(); +} + +void pair_list::read_list_lock() { + toku_pthread_rwlock_rdlock(&m_list_lock); +} + +void pair_list::read_list_unlock() { + toku_pthread_rwlock_rdunlock(&m_list_lock); +} + +void pair_list::write_list_lock() { + toku_pthread_rwlock_wrlock(&m_list_lock); +} + +void pair_list::write_list_unlock() { + toku_pthread_rwlock_wrunlock(&m_list_lock); +} + +void pair_list::read_pending_exp_lock() { + toku_pthread_rwlock_rdlock(&m_pending_lock_expensive); +} + +void pair_list::read_pending_exp_unlock() { + toku_pthread_rwlock_rdunlock(&m_pending_lock_expensive); +} + +void pair_list::write_pending_exp_lock() { + toku_pthread_rwlock_wrlock(&m_pending_lock_expensive); +} + +void pair_list::write_pending_exp_unlock() { + toku_pthread_rwlock_wrunlock(&m_pending_lock_expensive); +} + +void pair_list::read_pending_cheap_lock() { + toku_pthread_rwlock_rdlock(&m_pending_lock_cheap); +} + +void pair_list::read_pending_cheap_unlock() { + toku_pthread_rwlock_rdunlock(&m_pending_lock_cheap); +} + +void pair_list::write_pending_cheap_lock() { + toku_pthread_rwlock_wrlock(&m_pending_lock_cheap); +} + +void pair_list::write_pending_cheap_unlock() { + toku_pthread_rwlock_wrunlock(&m_pending_lock_cheap); +} + +toku_mutex_t* pair_list::get_mutex_for_pair(uint32_t fullhash) { + return &m_mutexes[fullhash&(m_num_locks - 1)].aligned_mutex; +} + +void pair_list::pair_lock_by_fullhash(uint32_t fullhash) { + toku_mutex_lock(&m_mutexes[fullhash&(m_num_locks - 1)].aligned_mutex); +} + +void pair_list::pair_unlock_by_fullhash(uint32_t fullhash) { + toku_mutex_unlock(&m_mutexes[fullhash&(m_num_locks - 1)].aligned_mutex); +} + + +ENSURE_POD(evictor); + +// +// This is the function that runs eviction on its own thread. +// +static void *eviction_thread(void *evictor_v) { + evictor* CAST_FROM_VOIDP(evictor, evictor_v); + evictor->run_eviction_thread(); + return evictor_v; +} + +// +// Starts the eviction thread, assigns external object references, +// and initializes all counters and condition variables. +// +int evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period) { + TOKU_VALGRIND_HG_DISABLE_CHECKING(&m_ev_thread_is_running, sizeof m_ev_thread_is_running); + TOKU_VALGRIND_HG_DISABLE_CHECKING(&m_size_evicting, sizeof m_size_evicting); + + // set max difference to around 500MB + int64_t max_diff = (1 << 29); + + m_low_size_watermark = _size_limit; + // these values are selected kind of arbitrarily right now as + // being a percentage more than low_size_watermark, which is provided + // by the caller. + m_low_size_hysteresis = (11 * _size_limit)/10; //10% more + if ((m_low_size_hysteresis - m_low_size_watermark) > max_diff) { + m_low_size_hysteresis = m_low_size_watermark + max_diff; + } + m_high_size_hysteresis = (5 * _size_limit)/4; // 20% more + if ((m_high_size_hysteresis - m_low_size_hysteresis) > max_diff) { + m_high_size_hysteresis = m_low_size_hysteresis + max_diff; + } + m_high_size_watermark = (3 * _size_limit)/2; // 50% more + if ((m_high_size_watermark - m_high_size_hysteresis) > max_diff) { + m_high_size_watermark = m_high_size_hysteresis + max_diff; + } + + m_size_reserved = unreservable_memory(_size_limit); + m_size_current = 0; + m_size_cloned_data = 0; + m_size_evicting = 0; + + m_size_nonleaf = create_partitioned_counter(); + m_size_leaf = create_partitioned_counter(); + m_size_rollback = create_partitioned_counter(); + m_size_cachepressure = create_partitioned_counter(); + m_wait_pressure_count = create_partitioned_counter(); + m_wait_pressure_time = create_partitioned_counter(); + m_long_wait_pressure_count = create_partitioned_counter(); + m_long_wait_pressure_time = create_partitioned_counter(); + + m_pl = _pl; + m_cf_list = _cf_list; + m_kibbutz = _kibbutz; + toku_mutex_init(&m_ev_thread_lock, NULL); + toku_cond_init(&m_flow_control_cond, NULL); + toku_cond_init(&m_ev_thread_cond, NULL); + m_num_sleepers = 0; + m_ev_thread_is_running = false; + m_period_in_seconds = eviction_period; + + unsigned int seed = (unsigned int) time(NULL); + int r = myinitstate_r(seed, m_random_statebuf, sizeof m_random_statebuf, &m_random_data); + assert_zero(r); + + // start the background thread + m_run_thread = true; + m_num_eviction_thread_runs = 0; + m_ev_thread_init = false; + r = toku_pthread_create(&m_ev_thread, NULL, eviction_thread, this); + if (r == 0) { + m_ev_thread_init = true; + } + m_evictor_init = true; + return r; +} + +// +// This stops the eviction thread and clears the condition variable. +// +// NOTE: This should only be called if there are no evictions in progress. +// +void evictor::destroy() { + if (!m_evictor_init) { + return; + } + assert(m_size_evicting == 0); + // + // commented out of Ming, because we could not finish + // #5672. Once #5672 is solved, we should restore this + // + //assert(m_size_current == 0); + + // Stop the eviction thread. + if (m_ev_thread_init) { + toku_mutex_lock(&m_ev_thread_lock); + m_run_thread = false; + this->signal_eviction_thread(); + toku_mutex_unlock(&m_ev_thread_lock); + void *ret; + int r = toku_pthread_join(m_ev_thread, &ret); + assert_zero(r); + assert(!m_ev_thread_is_running); + } + destroy_partitioned_counter(m_size_nonleaf); + m_size_nonleaf = NULL; + destroy_partitioned_counter(m_size_leaf); + m_size_leaf = NULL; + destroy_partitioned_counter(m_size_rollback); + m_size_rollback = NULL; + destroy_partitioned_counter(m_size_cachepressure); + m_size_cachepressure = NULL; + + destroy_partitioned_counter(m_wait_pressure_count); m_wait_pressure_count = NULL; + destroy_partitioned_counter(m_wait_pressure_time); m_wait_pressure_time = NULL; + destroy_partitioned_counter(m_long_wait_pressure_count); m_long_wait_pressure_count = NULL; + destroy_partitioned_counter(m_long_wait_pressure_time); m_long_wait_pressure_time = NULL; + + toku_cond_destroy(&m_flow_control_cond); + toku_cond_destroy(&m_ev_thread_cond); + toku_mutex_destroy(&m_ev_thread_lock); +} + +// +// Increases status variables and the current size variable +// of the evictor based on the given pair attribute. +// +void evictor::add_pair_attr(PAIR_ATTR attr) { + assert(attr.is_valid); + add_to_size_current(attr.size); + increment_partitioned_counter(m_size_nonleaf, attr.nonleaf_size); + increment_partitioned_counter(m_size_leaf, attr.leaf_size); + increment_partitioned_counter(m_size_rollback, attr.rollback_size); + increment_partitioned_counter(m_size_cachepressure, attr.cache_pressure_size); +} + +// +// Decreases status variables and the current size variable +// of the evictor based on the given pair attribute. +// +void evictor::remove_pair_attr(PAIR_ATTR attr) { + assert(attr.is_valid); + remove_from_size_current(attr.size); + increment_partitioned_counter(m_size_nonleaf, 0 - attr.nonleaf_size); + increment_partitioned_counter(m_size_leaf, 0 - attr.leaf_size); + increment_partitioned_counter(m_size_rollback, 0 - attr.rollback_size); + increment_partitioned_counter(m_size_cachepressure, 0 - attr.cache_pressure_size); +} + +// +// Updates this evictor's stats to match the "new" pair attribute given +// while also removing the given "old" pair attribute. +// +void evictor::change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr) { + this->add_pair_attr(new_attr); + this->remove_pair_attr(old_attr); +} + +// +// Adds the given size to the evictor's estimation of +// the size of the cachetable. +// +void evictor::add_to_size_current(long size) { + (void) toku_sync_fetch_and_add(&m_size_current, size); +} + +// +// Subtracts the given size from the evictor's current +// approximation of the cachetable size. +// +void evictor::remove_from_size_current(long size) { + (void) toku_sync_fetch_and_sub(&m_size_current, size); +} + +// +// Adds the size of cloned data to necessary variables in the evictor +// +void evictor::add_cloned_data_size(long size) { + (void) toku_sync_fetch_and_add(&m_size_cloned_data, size); + add_to_size_current(size); +} + +// +// Removes the size of cloned data to necessary variables in the evictor +// +void evictor::remove_cloned_data_size(long size) { + (void) toku_sync_fetch_and_sub(&m_size_cloned_data, size); + remove_from_size_current(size); +} + +// +// TODO: (Zardosht) comment this function +// +uint64_t evictor::reserve_memory(double fraction, uint64_t upper_bound) { + toku_mutex_lock(&m_ev_thread_lock); + uint64_t reserved_memory = fraction * (m_low_size_watermark - m_size_reserved); + if (0) { // debug + fprintf(stderr, "%s %" PRIu64 " %" PRIu64 "\n", __PRETTY_FUNCTION__, reserved_memory, upper_bound); + } + if (upper_bound > 0 && reserved_memory > upper_bound) { + reserved_memory = upper_bound; + } + m_size_reserved += reserved_memory; + (void) toku_sync_fetch_and_add(&m_size_current, reserved_memory); + this->signal_eviction_thread(); + toku_mutex_unlock(&m_ev_thread_lock); + + if (this->should_client_thread_sleep()) { + this->wait_for_cache_pressure_to_subside(); + } + return reserved_memory; +} + +// +// TODO: (Zardosht) comment this function +// +void evictor::release_reserved_memory(uint64_t reserved_memory){ + (void) toku_sync_fetch_and_sub(&m_size_current, reserved_memory); + toku_mutex_lock(&m_ev_thread_lock); + m_size_reserved -= reserved_memory; + // signal the eviction thread in order to possibly wake up sleeping clients + if (m_num_sleepers > 0) { + this->signal_eviction_thread(); + } + toku_mutex_unlock(&m_ev_thread_lock); +} + +// +// This function is the eviction thread. It runs for the lifetime of +// the evictor. Goes to sleep for period_in_seconds +// by waiting on m_ev_thread_cond. +// +void evictor::run_eviction_thread(){ + toku_mutex_lock(&m_ev_thread_lock); + while (m_run_thread) { + m_num_eviction_thread_runs++; // for test purposes only + m_ev_thread_is_running = true; + // responsibility of run_eviction to release and + // regrab ev_thread_lock as it sees fit + this->run_eviction(); + m_ev_thread_is_running = false; + + if (m_run_thread) { + // + // sleep until either we are signaled + // via signal_eviction_thread or + // m_period_in_seconds amount of time has passed + // + if (m_period_in_seconds) { + toku_timespec_t wakeup_time; + struct timeval tv; + gettimeofday(&tv, 0); + wakeup_time.tv_sec = tv.tv_sec; + wakeup_time.tv_nsec = tv.tv_usec * 1000LL; + wakeup_time.tv_sec += m_period_in_seconds; + toku_cond_timedwait( + &m_ev_thread_cond, + &m_ev_thread_lock, + &wakeup_time + ); + } + // for test purposes, we have an option of + // not waiting on a period, but rather sleeping indefinitely + else { + toku_cond_wait(&m_ev_thread_cond, &m_ev_thread_lock); + } + } + } + toku_mutex_unlock(&m_ev_thread_lock); +} + +// +// runs eviction. +// on entry, ev_thread_lock is grabbed, on exit, ev_thread_lock must still be grabbed +// it is the responsibility of this function to release and reacquire ev_thread_lock as it sees fit. +// +void evictor::run_eviction(){ + // + // These variables will help us detect if everything in the clock is currently being accessed. + // We must detect this case otherwise we will end up in an infinite loop below. + // + bool exited_early = false; + uint32_t num_pairs_examined_without_evicting = 0; + + while (this->eviction_needed()) { + if (m_num_sleepers > 0 && this->should_sleeping_clients_wakeup()) { + toku_cond_broadcast(&m_flow_control_cond); + } + // release ev_thread_lock so that eviction may run without holding mutex + toku_mutex_unlock(&m_ev_thread_lock); + + // first try to do an eviction from stale cachefiles + bool some_eviction_ran = m_cf_list->evict_some_stale_pair(this); + if (!some_eviction_ran) { + m_pl->read_list_lock(); + PAIR curr_in_clock = m_pl->m_clock_head; + // if nothing to evict, we need to exit + if (!curr_in_clock) { + m_pl->read_list_unlock(); + toku_mutex_lock(&m_ev_thread_lock); + exited_early = true; + goto exit; + } + if (num_pairs_examined_without_evicting > m_pl->m_n_in_table) { + // we have a cycle where everything in the clock is in use + // do not return an error + // just let memory be overfull + m_pl->read_list_unlock(); + toku_mutex_lock(&m_ev_thread_lock); + exited_early = true; + goto exit; + } + bool eviction_run = run_eviction_on_pair(curr_in_clock); + if (eviction_run) { + // reset the count + num_pairs_examined_without_evicting = 0; + } + else { + num_pairs_examined_without_evicting++; + } + // at this point, either curr_in_clock is still in the list because it has not been fully evicted, + // and we need to move ct->m_clock_head over. Otherwise, curr_in_clock has been fully evicted + // and we do NOT need to move ct->m_clock_head, as the removal of curr_in_clock + // modified ct->m_clock_head + if (m_pl->m_clock_head && (m_pl->m_clock_head == curr_in_clock)) { + m_pl->m_clock_head = m_pl->m_clock_head->clock_next; + } + m_pl->read_list_unlock(); + } + toku_mutex_lock(&m_ev_thread_lock); + } + +exit: + if (m_num_sleepers > 0 && (exited_early || this->should_sleeping_clients_wakeup())) { + toku_cond_broadcast(&m_flow_control_cond); + } + return; +} + +// +// NOTE: Cachetable lock held on entry. +// Runs eviction on the given PAIR. This may be a +// partial eviction or full eviction. +// +// on entry, pair mutex is NOT held, but pair list's read list lock +// IS held +// on exit, the same conditions must apply +// +bool evictor::run_eviction_on_pair(PAIR curr_in_clock) { + uint32_t n_in_table; + int64_t size_current; + bool ret_val = false; + // function meant to be called on PAIR that is not being accessed right now + CACHEFILE cf = curr_in_clock->cachefile; + int r = bjm_add_background_job(cf->bjm); + if (r) { + goto exit; + } + pair_lock(curr_in_clock); + // these are the circumstances under which we don't run eviction on a pair: + // - if other users are waiting on the lock + // - if the PAIR is referenced by users + // - if the PAIR's disk_nb_mutex is in use, implying that it is + // undergoing a checkpoint + if (curr_in_clock->value_rwlock.users() || + curr_in_clock->refcount > 0 || + nb_mutex_users(&curr_in_clock->disk_nb_mutex)) + { + pair_unlock(curr_in_clock); + bjm_remove_background_job(cf->bjm); + goto exit; + } + + // extract and use these values so that we don't risk them changing + // out from underneath us in calculations below. + n_in_table = m_pl->m_n_in_table; + size_current = m_size_current; + + // now that we have the pair mutex we care about, we can + // release the read list lock and reacquire it at the end of the function + m_pl->read_list_unlock(); + ret_val = true; + if (curr_in_clock->count > 0) { + toku::context pe_ctx(CTX_PARTIAL_EVICTION); + + uint32_t curr_size = curr_in_clock->attr.size; + // if the size of this PAIR is greater than the average size of PAIRs + // in the cachetable, then decrement it, otherwise, decrement + // probabilistically + if (curr_size*n_in_table >= size_current) { + curr_in_clock->count--; + } else { + // generate a random number between 0 and 2^16 + assert(size_current <= (INT64_MAX / ((1<<16)-1))); // to protect against possible overflows + int32_t rnd = myrandom_r(&m_random_data) % (1<<16); + // The if-statement below will be true with probability of + // curr_size/(average size of PAIR in cachetable) + // Here is how the math is done: + // average_size = size_current/n_in_table + // curr_size/average_size = curr_size*n_in_table/size_current + // we evaluate if a random number from 0 to 2^16 is less than + // than curr_size/average_size * 2^16. So, our if-clause should be + // if (2^16*curr_size/average_size > rnd) + // this evaluates to: + // if (2^16*curr_size*n_in_table/size_current > rnd) + // by multiplying each side of the equation by size_current, we get + // if (2^16*curr_size*n_in_table > rnd*size_current) + // and dividing each side by 2^16, + // we get the if-clause below + // + if ((((int64_t)curr_size) * n_in_table) >= (((int64_t)rnd) * size_current)>>16) { + curr_in_clock->count--; + } + } + // call the partial eviction callback + curr_in_clock->value_rwlock.write_lock(true); + + void *value = curr_in_clock->value_data; + void* disk_data = curr_in_clock->disk_data; + void *write_extraargs = curr_in_clock->write_extraargs; + enum partial_eviction_cost cost; + long bytes_freed_estimate = 0; + curr_in_clock->pe_est_callback( + value, + disk_data, + &bytes_freed_estimate, + &cost, + write_extraargs + ); + if (cost == PE_CHEAP) { + pair_unlock(curr_in_clock); + curr_in_clock->size_evicting_estimate = 0; + this->do_partial_eviction(curr_in_clock); + bjm_remove_background_job(cf->bjm); + } + else if (cost == PE_EXPENSIVE) { + // only bother running an expensive partial eviction + // if it is expected to free space + if (bytes_freed_estimate > 0) { + pair_unlock(curr_in_clock); + curr_in_clock->size_evicting_estimate = bytes_freed_estimate; + toku_mutex_lock(&m_ev_thread_lock); + m_size_evicting += bytes_freed_estimate; + toku_mutex_unlock(&m_ev_thread_lock); + toku_kibbutz_enq( + m_kibbutz, + cachetable_partial_eviction, + curr_in_clock + ); + } + else { + curr_in_clock->value_rwlock.write_unlock(); + pair_unlock(curr_in_clock); + bjm_remove_background_job(cf->bjm); + } + } + else { + assert(false); + } + } + else { + toku::context pe_ctx(CTX_FULL_EVICTION); + + // responsibility of try_evict_pair to eventually remove background job + // pair's mutex is still grabbed here + this->try_evict_pair(curr_in_clock); + } + // regrab the read list lock, because the caller assumes + // that it is held. The contract requires this. + m_pl->read_list_lock(); +exit: + return ret_val; +} + +struct pair_unpin_with_new_attr_extra { + pair_unpin_with_new_attr_extra(evictor *e, PAIR p) : + ev(e), pair(p) { + } + evictor *ev; + PAIR pair; +}; + +static void pair_unpin_with_new_attr(PAIR_ATTR new_attr, void *extra) { + struct pair_unpin_with_new_attr_extra *info = + reinterpret_cast(extra); + PAIR p = info->pair; + evictor *ev = info->ev; + + // change the attr in the evictor, then update the value in the pair + ev->change_pair_attr(p->attr, new_attr); + p->attr = new_attr; + + // unpin + pair_lock(p); + p->value_rwlock.write_unlock(); + pair_unlock(p); +} + +// +// on entry and exit, pair's mutex is not held +// on exit, PAIR is unpinned +// +void evictor::do_partial_eviction(PAIR p) { + // Copy the old attr + PAIR_ATTR old_attr = p->attr; + long long size_evicting_estimate = p->size_evicting_estimate; + + struct pair_unpin_with_new_attr_extra extra(this, p); + p->pe_callback(p->value_data, old_attr, p->write_extraargs, + // passed as the finalize continuation, which allows the + // pe_callback to unpin the node before doing expensive cleanup + pair_unpin_with_new_attr, &extra); + + // now that the pe_callback (and its pair_unpin_with_new_attr continuation) + // have finished, we can safely decrease size_evicting + this->decrease_size_evicting(size_evicting_estimate); +} + +// +// CT lock held on entry +// background job has been added for p->cachefile on entry +// responsibility of this function to make sure that background job is removed +// +// on entry, pair's mutex is held, on exit, the pair's mutex is NOT held +// +void evictor::try_evict_pair(PAIR p) { + CACHEFILE cf = p->cachefile; + // evictions without a write or unpinned pair's that are clean + // can be run in the current thread + + // the only caller, run_eviction_on_pair, should call this function + // only if no one else is trying to use it + assert(!p->value_rwlock.users()); + p->value_rwlock.write_lock(true); + // if the PAIR is dirty, the running eviction requires writing the + // PAIR out. if the disk_nb_mutex is grabbed, then running + // eviction requires waiting for the disk_nb_mutex to become available, + // which may be expensive. Hence, if either is true, we + // do the eviction on a writer thread + if (!p->dirty && (nb_mutex_writers(&p->disk_nb_mutex) == 0)) { + p->size_evicting_estimate = 0; + // + // This method will unpin PAIR and release PAIR mutex + // + // because the PAIR is not dirty, we can safely pass + // false for the for_checkpoint parameter + this->evict_pair(p, false); + bjm_remove_background_job(cf->bjm); + } + else { + pair_unlock(p); + toku_mutex_lock(&m_ev_thread_lock); + assert(m_size_evicting >= 0); + p->size_evicting_estimate = p->attr.size; + m_size_evicting += p->size_evicting_estimate; + assert(m_size_evicting >= 0); + toku_mutex_unlock(&m_ev_thread_lock); + toku_kibbutz_enq(m_kibbutz, cachetable_evicter, p); + } +} + +// +// Requires: This thread must hold the write lock (nb_mutex) for the pair. +// The pair's mutex (p->mutex) is also held. +// on exit, neither is held +// +void evictor::evict_pair(PAIR p, bool for_checkpoint) { + if (p->dirty) { + pair_unlock(p); + cachetable_write_locked_pair(this, p, for_checkpoint); + pair_lock(p); + } + // one thing we can do here is extract the size_evicting estimate, + // have decrease_size_evicting take the estimate and not the pair, + // and do this work after we have called + // cachetable_maybe_remove_and_free_pair + this->decrease_size_evicting(p->size_evicting_estimate); + // if we are to remove this pair, we need the write list lock, + // to get it in a way that avoids deadlocks, we must first release + // the pair's mutex, then grab the write list lock, then regrab the + // pair's mutex. The pair cannot go anywhere because + // the pair is still pinned + nb_mutex_lock(&p->disk_nb_mutex, p->mutex); + pair_unlock(p); + m_pl->write_list_lock(); + pair_lock(p); + p->value_rwlock.write_unlock(); + nb_mutex_unlock(&p->disk_nb_mutex); + // at this point, we have the pair list's write list lock + // and we have the pair's mutex (p->mutex) held + + // this ensures that a clone running in the background first completes + bool removed = false; + if (p->value_rwlock.users() == 0 && p->refcount == 0) { + // assumption is that if we are about to remove the pair + // that no one has grabbed the disk_nb_mutex, + // and that there is no cloned_value_data, because + // no one is writing a cloned value out. + assert(nb_mutex_users(&p->disk_nb_mutex) == 0); + assert(p->cloned_value_data == NULL); + cachetable_remove_pair(m_pl, this, p); + removed = true; + } + pair_unlock(p); + m_pl->write_list_unlock(); + // do not want to hold the write list lock while freeing a pair + if (removed) { + cachetable_free_pair(p); + } +} + +// +// this function handles the responsibilities for writer threads when they +// decrease size_evicting. The responsibilities are: +// - decrease m_size_evicting in a thread safe manner +// - in some circumstances, signal the eviction thread +// +void evictor::decrease_size_evicting(long size_evicting_estimate) { + if (size_evicting_estimate > 0) { + toku_mutex_lock(&m_ev_thread_lock); + int64_t buffer = m_high_size_hysteresis - m_low_size_watermark; + // if size_evicting is transitioning from greater than buffer to below buffer, and + // some client threads are sleeping, we need to wake up the eviction thread. + // Here is why. In this scenario, we are in one of two cases: + // - size_current - size_evicting < low_size_watermark + // If this is true, then size_current < high_size_hysteresis, which + // means we need to wake up sleeping clients + // - size_current - size_evicting > low_size_watermark, + // which means more evictions must be run. + // The consequences of both cases are the responsibility + // of the eviction thread. + // + bool need_to_signal_ev_thread = + (m_num_sleepers > 0) && + !m_ev_thread_is_running && + (m_size_evicting > buffer) && + ((m_size_evicting - size_evicting_estimate) <= buffer); + m_size_evicting -= size_evicting_estimate; + assert(m_size_evicting >= 0); + if (need_to_signal_ev_thread) { + this->signal_eviction_thread(); + } + toku_mutex_unlock(&m_ev_thread_lock); + } +} + +// +// Wait for cache table space to become available +// size_current is number of bytes currently occupied by data (referred to by pairs) +// size_evicting is number of bytes queued up to be evicted +// +void evictor::wait_for_cache_pressure_to_subside() { + uint64_t t0 = toku_current_time_microsec(); + toku_mutex_lock(&m_ev_thread_lock); + m_num_sleepers++; + this->signal_eviction_thread(); + toku_cond_wait(&m_flow_control_cond, &m_ev_thread_lock); + m_num_sleepers--; + toku_mutex_unlock(&m_ev_thread_lock); + uint64_t t1 = toku_current_time_microsec(); + increment_partitioned_counter(m_wait_pressure_count, 1); + uint64_t tdelta = t1 - t0; + increment_partitioned_counter(m_wait_pressure_time, tdelta); + if (tdelta > 1000000) { + increment_partitioned_counter(m_long_wait_pressure_count, 1); + increment_partitioned_counter(m_long_wait_pressure_time, tdelta); + } +} + +// +// Get the status of the current estimated size of the cachetable, +// and the evictor's set limit. +// +void evictor::get_state(long *size_current_ptr, long *size_limit_ptr) { + if (size_current_ptr) { + *size_current_ptr = m_size_current; + } + if (size_limit_ptr) { + *size_limit_ptr = m_low_size_watermark; + } +} + +// +// Force the eviction thread to do some work. +// +// This function does not require any mutex to be held. +// As a result, scheduling is not guaranteed, but that is tolerable. +// +void evictor::signal_eviction_thread() { + toku_cond_signal(&m_ev_thread_cond); +} + +// +// Returns true if the cachetable is so over subscribed, that a client thread should sleep +// +// This function may be called in a thread-unsafe manner. Locks are not +// required to read size_current. The result is that +// the values may be a little off, but we think that is tolerable. +// +bool evictor::should_client_thread_sleep(){ + return unsafe_read_size_current() > m_high_size_watermark; +} + +// +// Returns true if a sleeping client should be woken up because +// the cachetable is not overly subscribed +// +// This function may be called in a thread-unsafe manner. Locks are not +// required to read size_current. The result is that +// the values may be a little off, but we think that is tolerable. +// +bool evictor::should_sleeping_clients_wakeup() { + return unsafe_read_size_current() <= m_high_size_hysteresis; +} + +// +// Returns true if a client thread should try to wake up the eviction +// thread because the client thread has noticed too much data taken +// up in the cachetable. +// +// This function may be called in a thread-unsafe manner. Locks are not +// required to read size_current or size_evicting. The result is that +// the values may be a little off, but we think that is tolerable. +// If the caller wants to ensure that ev_thread_is_running and size_evicting +// are accurate, then the caller must hold ev_thread_lock before +// calling this function. +// +bool evictor::should_client_wake_eviction_thread() { + return + !m_ev_thread_is_running && + ((unsafe_read_size_current() - m_size_evicting) > m_low_size_hysteresis); +} + +// +// Determines if eviction is needed. If the current size of +// the cachetable exceeds the sum of our fixed size limit and +// the amount of data currently being evicted, then eviction is needed +// +bool evictor::eviction_needed() { + return (m_size_current - m_size_evicting) > m_low_size_watermark; +} + +inline int64_t evictor::unsafe_read_size_current(void) const { + return m_size_current; +} + +void evictor::fill_engine_status() { + STATUS_VALUE(CT_SIZE_CURRENT) = m_size_current; + STATUS_VALUE(CT_SIZE_LIMIT) = m_low_size_hysteresis; + STATUS_VALUE(CT_SIZE_WRITING) = m_size_evicting; + STATUS_VALUE(CT_SIZE_NONLEAF) = read_partitioned_counter(m_size_nonleaf); + STATUS_VALUE(CT_SIZE_LEAF) = read_partitioned_counter(m_size_leaf); + STATUS_VALUE(CT_SIZE_ROLLBACK) = read_partitioned_counter(m_size_rollback); + STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = read_partitioned_counter(m_size_cachepressure); + STATUS_VALUE(CT_SIZE_CLONED) = m_size_cloned_data; + STATUS_VALUE(CT_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_wait_pressure_count); + STATUS_VALUE(CT_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_wait_pressure_time); + STATUS_VALUE(CT_LONG_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_long_wait_pressure_count); + STATUS_VALUE(CT_LONG_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_long_wait_pressure_time); +} + +//////////////////////////////////////////////////////////////////////////////// + +ENSURE_POD(checkpointer); + +// +// Sets the cachetable reference in this checkpointer class, this is temporary. +// +int checkpointer::init(pair_list *_pl, + TOKULOGGER _logger, + evictor *_ev, + cachefile_list *files) { + m_list = _pl; + m_logger = _logger; + m_ev = _ev; + m_cf_list = files; + bjm_init(&m_checkpoint_clones_bjm); + + // Default is no checkpointing. + m_checkpointer_cron_init = false; + int r = toku_minicron_setup(&m_checkpointer_cron, 0, checkpoint_thread, this); + if (r == 0) { + m_checkpointer_cron_init = true; + } + m_checkpointer_init = true; + return r; +} + +void checkpointer::destroy() { + if (!m_checkpointer_init) { + return; + } + if (m_checkpointer_cron_init && !this->has_been_shutdown()) { + // for test code only, production code uses toku_cachetable_minicron_shutdown() + int r = this->shutdown(); + assert(r == 0); + } + bjm_destroy(m_checkpoint_clones_bjm); +} + +// +// Sets how often the checkpoint thread will run, in seconds +// +void checkpointer::set_checkpoint_period(uint32_t new_period) { + toku_minicron_change_period(&m_checkpointer_cron, new_period*1000); +} + +// +// Sets how often the checkpoint thread will run. +// +uint32_t checkpointer::get_checkpoint_period() { + return toku_minicron_get_period_in_seconds_unlocked(&m_checkpointer_cron); +} + +// +// Stops the checkpoint thread. +// +int checkpointer::shutdown() { + return toku_minicron_shutdown(&m_checkpointer_cron); +} + +// +// If checkpointing is running, this returns false. +// +bool checkpointer::has_been_shutdown() { + return toku_minicron_has_been_shutdown(&m_checkpointer_cron); +} + +TOKULOGGER checkpointer::get_logger() { + return m_logger; +} + +void checkpointer::increment_num_txns() { + m_checkpoint_num_txns++; +} + +struct iterate_begin_checkpoint { + LSN lsn_of_checkpoint_in_progress; + iterate_begin_checkpoint(LSN lsn) : lsn_of_checkpoint_in_progress(lsn) { } + static int fn(const CACHEFILE &cf, const uint32_t UU(idx), struct iterate_begin_checkpoint *info) { + assert(cf->begin_checkpoint_userdata); + if (cf->for_checkpoint) { + cf->begin_checkpoint_userdata(info->lsn_of_checkpoint_in_progress, cf->userdata); + } + return 0; + } +}; + +// +// Update the user data in any cachefiles in our checkpoint list. +// +void checkpointer::update_cachefiles() { + struct iterate_begin_checkpoint iterate(m_lsn_of_checkpoint_in_progress); + int r = m_cf_list->m_active_fileid.iterate(&iterate); + assert_zero(r); +} + +struct iterate_note_pin { + static int fn(const CACHEFILE &cf, uint32_t UU(idx), void **UU(extra)) { + assert(cf->note_pin_by_checkpoint); + cf->note_pin_by_checkpoint(cf, cf->userdata); + cf->for_checkpoint = true; + return 0; + } +}; + +// +// Sets up and kicks off a checkpoint. +// +void checkpointer::begin_checkpoint() { + // 1. Initialize the accountability counters. + m_checkpoint_num_txns = 0; + + // 2. Make list of cachefiles to be included in the checkpoint. + m_cf_list->read_lock(); + m_cf_list->m_active_fileid.iterate(nullptr); + m_checkpoint_num_files = m_cf_list->m_active_fileid.size(); + m_cf_list->read_unlock(); + + // 3. Create log entries for this checkpoint. + if (m_logger) { + this->log_begin_checkpoint(); + } + + bjm_reset(m_checkpoint_clones_bjm); + + m_list->write_pending_exp_lock(); + m_list->read_list_lock(); + m_cf_list->read_lock(); // needed for update_cachefiles + m_list->write_pending_cheap_lock(); + // 4. Turn on all the relevant checkpoint pending bits. + this->turn_on_pending_bits(); + + // 5. + this->update_cachefiles(); + m_list->write_pending_cheap_unlock(); + m_cf_list->read_unlock(); + m_list->read_list_unlock(); + m_list->write_pending_exp_unlock(); +} + +struct iterate_log_fassociate { + static int fn(const CACHEFILE &cf, uint32_t UU(idx), void **UU(extra)) { + assert(cf->log_fassociate_during_checkpoint); + cf->log_fassociate_during_checkpoint(cf, cf->userdata); + return 0; + } +}; + +// +// Assuming the logger exists, this will write out the folloing +// information to the log. +// +// 1. Writes the BEGIN_CHECKPOINT to the log. +// 2. Writes the list of open dictionaries to the log. +// 3. Writes the list of open transactions to the log. +// 4. Writes the list of dicionaries that have had rollback logs suppresed. +// +// NOTE: This also has the side effecto of setting the LSN +// of checkpoint in progress. +// +void checkpointer::log_begin_checkpoint() { + int r = 0; + + // Write the BEGIN_CHECKPOINT to the log. + LSN begin_lsn={ .lsn = (uint64_t) -1 }; // we'll need to store the lsn of the checkpoint begin in all the trees that are checkpointed. + TXN_MANAGER mgr = toku_logger_get_txn_manager(m_logger); + TXNID last_xid = toku_txn_manager_get_last_xid(mgr); + toku_log_begin_checkpoint(m_logger, &begin_lsn, 0, 0, last_xid); + m_lsn_of_checkpoint_in_progress = begin_lsn; + + // Log the list of open dictionaries. + m_cf_list->m_active_fileid.iterate(nullptr); + + // Write open transactions to the log. + r = toku_txn_manager_iter_over_live_txns( + m_logger->txn_manager, + log_open_txn, + this + ); + assert(r == 0); +} + +// +// Sets the pending bits of EVERY PAIR in the cachetable, regardless of +// whether the PAIR is clean or not. It will be the responsibility of +// end_checkpoint or client threads to simply clear the pending bit +// if the PAIR is clean. +// +// On entry and exit , the pair list's read list lock is grabbed, and +// both pending locks are grabbed +// +void checkpointer::turn_on_pending_bits() { + PAIR p = NULL; + uint32_t i; + for (i = 0, p = m_list->m_checkpoint_head; i < m_list->m_n_in_table; i++, p = p->clock_next) { + assert(!p->checkpoint_pending); + //Only include pairs belonging to cachefiles in the checkpoint + if (!p->cachefile->for_checkpoint) { + continue; + } + // Mark everything as pending a checkpoint + // + // The rule for the checkpoint_pending bit is as follows: + // - begin_checkpoint may set checkpoint_pending to true + // even though the pair lock on the node is not held. + // - any thread that wants to clear the pending bit must own + // the PAIR lock. Otherwise, + // we may end up clearing the pending bit before the + // current lock is ever released. + p->checkpoint_pending = true; + if (m_list->m_pending_head) { + m_list->m_pending_head->pending_prev = p; + } + p->pending_next = m_list->m_pending_head; + p->pending_prev = NULL; + m_list->m_pending_head = p; + } + invariant(p == m_list->m_checkpoint_head); +} + +void checkpointer::add_background_job() { + int r = bjm_add_background_job(m_checkpoint_clones_bjm); + assert_zero(r); +} +void checkpointer::remove_background_job() { + bjm_remove_background_job(m_checkpoint_clones_bjm); +} + +void checkpointer::end_checkpoint(void (*testcallback_f)(void*), void* testextra) { + toku::scoped_malloc checkpoint_cfs_buf(m_checkpoint_num_files * sizeof(CACHEFILE)); + CACHEFILE *checkpoint_cfs = reinterpret_cast(checkpoint_cfs_buf.get()); + + this->fill_checkpoint_cfs(checkpoint_cfs); + this->checkpoint_pending_pairs(); + this->checkpoint_userdata(checkpoint_cfs); + // For testing purposes only. Dictionary has been fsync-ed to disk but log has not yet been written. + if (testcallback_f) { + testcallback_f(testextra); + } + this->log_end_checkpoint(); + this->end_checkpoint_userdata(checkpoint_cfs); + + // Delete list of cachefiles in the checkpoint, + this->remove_cachefiles(checkpoint_cfs); +} + +struct iterate_checkpoint_cfs { + CACHEFILE *checkpoint_cfs; + uint32_t checkpoint_num_files; + uint32_t curr_index; + iterate_checkpoint_cfs(CACHEFILE *cfs, uint32_t num_files) : + checkpoint_cfs(cfs), checkpoint_num_files(num_files), curr_index(0) { + } + static int fn(const CACHEFILE &cf, uint32_t UU(idx), struct iterate_checkpoint_cfs *info) { + if (cf->for_checkpoint) { + assert(info->curr_index < info->checkpoint_num_files); + info->checkpoint_cfs[info->curr_index] = cf; + info->curr_index++; + } + return 0; + } +}; + +void checkpointer::fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs) { + struct iterate_checkpoint_cfs iterate(checkpoint_cfs, m_checkpoint_num_files); + + m_cf_list->read_lock(); + m_cf_list->m_active_fileid.iterate(&iterate); + assert(iterate.curr_index == m_checkpoint_num_files); + m_cf_list->read_unlock(); +} + +void checkpointer::checkpoint_pending_pairs() { + PAIR p; + m_list->read_list_lock(); + while ((p = m_list->m_pending_head)!=0) { + // TODO: Investigate why we move pending head outisde of the pending_pairs_remove() call. + m_list->m_pending_head = m_list->m_pending_head->pending_next; + m_list->pending_pairs_remove(p); + // if still pending, clear the pending bit and write out the node + pair_lock(p); + m_list->read_list_unlock(); + write_pair_for_checkpoint_thread(m_ev, p); + pair_unlock(p); + m_list->read_list_lock(); + } + assert(!m_list->m_pending_head); + m_list->read_list_unlock(); + bjm_wait_for_jobs_to_finish(m_checkpoint_clones_bjm); +} + +void checkpointer::checkpoint_userdata(CACHEFILE* checkpoint_cfs) { + // have just written data blocks, so next write the translation and header for each open dictionary + for (uint32_t i = 0; i < m_checkpoint_num_files; i++) { + CACHEFILE cf = checkpoint_cfs[i]; + assert(cf->for_checkpoint); + assert(cf->checkpoint_userdata); + toku_cachetable_set_checkpointing_user_data_status(1); + cf->checkpoint_userdata(cf, cf->fd, cf->userdata); + toku_cachetable_set_checkpointing_user_data_status(0); + } +} + +void checkpointer::log_end_checkpoint() { + if (m_logger) { + toku_log_end_checkpoint(m_logger, NULL, + 1, // want the end_checkpoint to be fsync'd + m_lsn_of_checkpoint_in_progress, + 0, + m_checkpoint_num_files, + m_checkpoint_num_txns); + toku_logger_note_checkpoint(m_logger, m_lsn_of_checkpoint_in_progress); + } +} + +void checkpointer::end_checkpoint_userdata(CACHEFILE* checkpoint_cfs) { + // everything has been written to file and fsynced + // ... call checkpoint-end function in block translator + // to free obsolete blocks on disk used by previous checkpoint + //cachefiles_in_checkpoint is protected by the checkpoint_safe_lock + for (uint32_t i = 0; i < m_checkpoint_num_files; i++) { + CACHEFILE cf = checkpoint_cfs[i]; + assert(cf->for_checkpoint); + assert(cf->end_checkpoint_userdata); + cf->end_checkpoint_userdata(cf, cf->fd, cf->userdata); + } +} + +// +// Deletes all the cachefiles in this checkpointers cachefile list. +// +void checkpointer::remove_cachefiles(CACHEFILE* checkpoint_cfs) { + // making this a while loop because note_unpin_by_checkpoint may destroy the cachefile + for (uint32_t i = 0; i < m_checkpoint_num_files; i++) { + CACHEFILE cf = checkpoint_cfs[i]; + // Checking for function existing so that this function + // can be called from cachetable tests. + assert(cf->for_checkpoint); + cf->for_checkpoint = false; + assert(cf->note_unpin_by_checkpoint); + // Clear the bit saying theis file is in the checkpoint. + cf->note_unpin_by_checkpoint(cf, cf->userdata); + } +} + + +//////////////////////////////////////////////////////// +// +// cachefiles list +// +static_assert(std::is_pod::value, "cachefile_list isn't POD"); + +void cachefile_list::init() { + m_next_filenum_to_use.fileid = 0; + m_next_hash_id_to_use = 0; + toku_pthread_rwlock_init(&m_lock, NULL); + m_active_filenum.create(); + m_active_fileid.create(); + m_stale_fileid.create(); +} + +void cachefile_list::destroy() { + m_active_filenum.destroy(); + m_active_fileid.destroy(); + m_stale_fileid.destroy(); + toku_pthread_rwlock_destroy(&m_lock); +} + +void cachefile_list::read_lock() { + toku_pthread_rwlock_rdlock(&m_lock); +} + +void cachefile_list::read_unlock() { + toku_pthread_rwlock_rdunlock(&m_lock); +} + +void cachefile_list::write_lock() { + toku_pthread_rwlock_wrlock(&m_lock); +} + +void cachefile_list::write_unlock() { + toku_pthread_rwlock_wrunlock(&m_lock); +} + +struct iterate_find_iname { + const char *iname_in_env; + CACHEFILE found_cf; + iterate_find_iname(const char *iname) : iname_in_env(iname), found_cf(nullptr) { } + static int fn(const CACHEFILE &cf, uint32_t UU(idx), struct iterate_find_iname *info) { + if (cf->fname_in_env && strcmp(cf->fname_in_env, info->iname_in_env) == 0) { + info->found_cf = cf; + return -1; + } + return 0; + } +}; + +int cachefile_list::cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf) { + struct iterate_find_iname iterate(iname_in_env); + + read_lock(); + int r = m_active_fileid.iterate(&iterate); + if (iterate.found_cf != nullptr) { + assert(strcmp(iterate.found_cf->fname_in_env, iname_in_env) == 0); + *cf = iterate.found_cf; + r = 0; + } else { + r = ENOENT; + } + read_unlock(); + return r; +} + +static int cachefile_find_by_filenum(const CACHEFILE &a_cf, const FILENUM &b) { + const FILENUM a = a_cf->filenum; + if (a.fileid < b.fileid) { + return -1; + } else if (a.fileid == b.fileid) { + return 0; + } else { + return 1; + } +} + +int cachefile_list::cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf) { + read_lock(); + int r = m_active_filenum.find_zero(filenum, cf, nullptr); + if (r == DB_NOTFOUND) { + r = ENOENT; + } else { + invariant_zero(r); + } + read_unlock(); + return r; +} + +static int cachefile_find_by_fileid(const CACHEFILE &a_cf, const struct fileid &b) { + return toku_fileid_cmp(a_cf->fileid, b); +} + +void cachefile_list::add_cf_unlocked(CACHEFILE cf) { + int r; + r = m_active_filenum.insert(cf, cf->filenum, nullptr); + assert_zero(r); + r = m_active_fileid.insert(cf, cf->fileid, nullptr); + assert_zero(r); +} + +void cachefile_list::add_stale_cf(CACHEFILE cf) { + write_lock(); + int r = m_stale_fileid.insert(cf, cf->fileid, nullptr); + assert_zero(r); + write_unlock(); +} + +void cachefile_list::remove_cf(CACHEFILE cf) { + write_lock(); + + uint32_t idx; + int r; + r = m_active_filenum.find_zero(cf->filenum, nullptr, &idx); + assert_zero(r); + r = m_active_filenum.delete_at(idx); + assert_zero(r); + + r = m_active_fileid.find_zero(cf->fileid, nullptr, &idx); + assert_zero(r); + r = m_active_fileid.delete_at(idx); + assert_zero(r); + + write_unlock(); +} + +void cachefile_list::remove_stale_cf_unlocked(CACHEFILE cf) { + uint32_t idx; + int r; + r = m_stale_fileid.find_zero(cf->fileid, nullptr, &idx); + assert_zero(r); + r = m_stale_fileid.delete_at(idx); + assert_zero(r); +} + +FILENUM cachefile_list::reserve_filenum() { + // taking a write lock because we are modifying next_filenum_to_use + write_lock(); + while (1) { + int r = m_active_filenum.find_zero(m_next_filenum_to_use, nullptr, nullptr); + if (r == 0) { + m_next_filenum_to_use.fileid++; + continue; + } + assert(r == DB_NOTFOUND); + break; + } + FILENUM filenum = m_next_filenum_to_use; + m_next_filenum_to_use.fileid++; + write_unlock(); + return filenum; +} + +uint32_t cachefile_list::get_new_hash_id_unlocked() { + uint32_t retval = m_next_hash_id_to_use; + m_next_hash_id_to_use++; + return retval; +} + +CACHEFILE cachefile_list::find_cachefile_unlocked(struct fileid* fileid) { + CACHEFILE cf = nullptr; + int r = m_active_fileid.find_zero(*fileid, &cf, nullptr); + if (r == 0) { + assert(!cf->unlink_on_close); + } + return cf; +} + +CACHEFILE cachefile_list::find_stale_cachefile_unlocked(struct fileid* fileid) { + CACHEFILE cf = nullptr; + int r = m_stale_fileid.find_zero(*fileid, &cf, nullptr); + if (r == 0) { + assert(!cf->unlink_on_close); + } + return cf; +} + +void cachefile_list::verify_unused_filenum(FILENUM filenum) { + int r = m_active_filenum.find_zero(filenum, nullptr, nullptr); + assert(r == DB_NOTFOUND); +} + +// returns true if some eviction ran, false otherwise +bool cachefile_list::evict_some_stale_pair(evictor* ev) { + write_lock(); + if (m_stale_fileid.size() == 0) { + write_unlock(); + return false; + } + + CACHEFILE stale_cf = nullptr; + int r = m_stale_fileid.fetch(0, &stale_cf); + assert_zero(r); + + // we should not have a cf in the stale list + // that does not have any pairs + PAIR p = stale_cf->cf_head; + paranoid_invariant(p != NULL); + evict_pair_from_cachefile(p); + + // now that we have evicted something, + // let's check if the cachefile is needed anymore + // + // it is not needed if the latest eviction caused + // the cf_head for that cf to become null + bool destroy_cf = stale_cf->cf_head == nullptr; + if (destroy_cf) { + remove_stale_cf_unlocked(stale_cf); + } + + write_unlock(); + + ev->remove_pair_attr(p->attr); + cachetable_free_pair(p); + if (destroy_cf) { + cachefile_destroy(stale_cf); + } + return true; +} + +void cachefile_list::free_stale_data(evictor* ev) { + write_lock(); + while (m_stale_fileid.size() != 0) { + CACHEFILE stale_cf = nullptr; + int r = m_stale_fileid.fetch(0, &stale_cf); + assert_zero(r); + + // we should not have a cf in the stale list + // that does not have any pairs + PAIR p = stale_cf->cf_head; + paranoid_invariant(p != NULL); + + evict_pair_from_cachefile(p); + ev->remove_pair_attr(p->attr); + cachetable_free_pair(p); + + // now that we have evicted something, + // let's check if the cachefile is needed anymore + if (stale_cf->cf_head == NULL) { + remove_stale_cf_unlocked(stale_cf); + cachefile_destroy(stale_cf); + } + } + write_unlock(); +} + +void __attribute__((__constructor__)) toku_cachetable_helgrind_ignore(void); +void +toku_cachetable_helgrind_ignore(void) { + TOKU_VALGRIND_HG_DISABLE_CHECKING(&cachetable_miss, sizeof cachetable_miss); + TOKU_VALGRIND_HG_DISABLE_CHECKING(&cachetable_misstime, sizeof cachetable_misstime); + TOKU_VALGRIND_HG_DISABLE_CHECKING(&cachetable_prefetches, sizeof cachetable_prefetches); + TOKU_VALGRIND_HG_DISABLE_CHECKING(&cachetable_evictions, sizeof cachetable_evictions); + TOKU_VALGRIND_HG_DISABLE_CHECKING(&cleaner_executions, sizeof cleaner_executions); + TOKU_VALGRIND_HG_DISABLE_CHECKING(&ct_status, sizeof ct_status); +} + +#undef STATUS_VALUE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/cachetable.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/cachetable.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/cachetable.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/cachetable.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,649 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "ft/logger/logger.h" +#include "ft/serialize/block_table.h" +#include "ft/txn/txn.h" +#include "util/minicron.h" + +// Maintain a cache mapping from cachekeys to values (void*) +// Some of the keys can be pinned. Don't pin too many or for too long. +// If the cachetable is too full, it will call the flush_callback() function with the key, the value, and the otherargs +// and then remove the key-value pair from the cache. +// The callback won't be any of the currently pinned keys. +// Also when flushing an object, the cachetable drops all references to it, +// so you may need to free() it. +// Note: The cachetable should use a common pool of memory, flushing things across cachetables. +// (The first implementation doesn't) +// If you pin something twice, you must unpin it twice. +// table_size is the initial size of the cache table hash table (in number of entries) +// size limit is the upper bound of the sum of size of the entries in the cache table (total number of bytes) + +typedef BLOCKNUM CACHEKEY; + +class checkpointer; +typedef class checkpointer *CHECKPOINTER; +typedef struct cachetable *CACHETABLE; +typedef struct cachefile *CACHEFILE; +typedef struct ctpair *PAIR; + +// This struct hold information about values stored in the cachetable. +// As one can tell from the names, we are probably violating an +// abstraction layer by placing names. +// +// The purpose of having this struct is to have a way for the +// cachetable to accumulate the some totals we are interested in. +// Breaking this abstraction layer by having these names was the +// easiest way. +// +typedef struct pair_attr_s { + long size; // size PAIR's value takes in memory + long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status + long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status + long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status + long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts + bool is_valid; +} PAIR_ATTR; + +static inline PAIR_ATTR make_pair_attr(long size) { + PAIR_ATTR result={ + .size = size, + .nonleaf_size = 0, + .leaf_size = 0, + .rollback_size = 0, + .cache_pressure_size = 0, + .is_valid = true + }; + return result; +} + +void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period); +uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct); +void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations); +uint32_t toku_get_cleaner_iterations (CACHETABLE ct); +uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct); + +// cachetable operations + +// create and initialize a cache table +// size_limit is the upper limit on the size of the size of the values in the table +// pass 0 if you want the default +int toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, struct tokulogger *logger); + +// Create a new cachetable. +// Effects: a new cachetable is created and initialized. +// The cachetable pointer is stored into result. +// The sum of the sizes of the memory objects is set to size_limit, in whatever +// units make sense to the user of the cachetable. +// Returns: If success, returns 0 and result points to the new cachetable. Otherwise, +// returns an error number. + +// Returns a pointer to the checkpointer within the given cachetable. +CHECKPOINTER toku_cachetable_get_checkpointer(CACHETABLE ct); + +// What is the cachefile that goes with a particular filenum? +// During a transaction, we cannot reuse a filenum. +int toku_cachefile_of_filenum (CACHETABLE t, FILENUM filenum, CACHEFILE *cf); + +// What is the cachefile that goes with a particular iname (relative to env)? +// During a transaction, we cannot reuse an iname. +int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CACHEFILE *cf); + +// Get the iname (within the cwd) associated with the cachefile +// Return the filename +char *toku_cachefile_fname_in_cwd (CACHEFILE cf); + +void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, struct tokulogger *logger); + +void toku_cachetable_end_checkpoint(CHECKPOINTER cp, struct tokulogger *logger, + void (*testcallback_f)(void*), void * testextra); + + +// Shuts down checkpoint thread +// Requires no locks be held that are taken by the checkpoint function +void toku_cachetable_minicron_shutdown(CACHETABLE ct); + +// Prepare to close the cachetable. This informs the cachetable that it is about to be closed +// so that it can tune its checkpoint resource use. +void toku_cachetable_prepare_close(CACHETABLE ct); + +// Close the cachetable. +// Effects: All of the memory objects are flushed to disk, and the cachetable is destroyed. +void toku_cachetable_close(CACHETABLE *ct); + +// Open a file and bind the file to a new cachefile object. (For use by test programs only.) +int toku_cachetable_openf(CACHEFILE *,CACHETABLE, const char *fname_in_env, int flags, mode_t mode); + +// Bind a file to a new cachefile object. +int toku_cachetable_openfd(CACHEFILE *,CACHETABLE, int fd, + const char *fname_relative_to_env); +int toku_cachetable_openfd_with_filenum (CACHEFILE *,CACHETABLE, int fd, + const char *fname_in_env, + FILENUM filenum, bool* was_open); + +// reserve a unique filenum +FILENUM toku_cachetable_reserve_filenum(CACHETABLE ct); + +// Effect: Reserve a fraction of the cachetable memory. +// Returns the amount reserved. +// To return the memory to the cachetable, call toku_cachetable_release_reserved_memory +// Requires 0 +#include +#include +#include +#include + +////////////////////////////////////////////////////////////////////////////// +// +// This file contains the classes and structs that make up the cachetable. +// The structs are: +// - cachefile +// - ctpair +// - pair_list +// - cachefile_list +// - checkpointer +// - evictor +// - cleaner +// +// The rest of this comment assumes familiarity with the locks used in these +// classes/structs and what the locks protect. Nevertheless, here is +// a list of the locks that we have: +// - pair_list->list_lock +// - pair_list->pending_lock_expensive +// - pair_list->pending_lock_cheap +// - cachefile_list->lock +// - PAIR->mutex +// - PAIR->value_rwlock +// - PAIR->disk_nb_mutex +// +// Here are rules for how the locks interact: +// - To grab any of the pair_list's locks, or the cachefile_list's lock, +// the cachetable must be in existence +// - To grab the PAIR mutex, we must know the PAIR will not dissappear: +// - the PAIR must be pinned (value_rwlock or disk_nb_mutex is held) +// - OR, the pair_list's list lock is held +// - As a result, to get rid of a PAIR from the pair_list, we must hold +// both the pair_list's list_lock and the PAIR's mutex +// - To grab PAIR->value_rwlock, we must hold the PAIR's mutex +// - To grab PAIR->disk_nb_mutex, we must hold the PAIR's mutex +// and hold PAIR->value_rwlock +// +// Now let's talk about ordering. Here is an order from outer to inner (top locks must be grabbed first) +// - pair_list->pending_lock_expensive +// - pair_list->list_lock +// - cachefile_list->lock +// - PAIR->mutex +// - pair_list->pending_lock_cheap <-- after grabbing this lock, +// NO other locks +// should be grabbed. +// - when grabbing PAIR->value_rwlock or PAIR->disk_nb_mutex, +// if the acquisition will not block, then it does not matter if any other locks held, +// BUT if the acquisition will block, then NO other locks may be held besides +// PAIR->mutex. +// +// HERE ARE TWO EXAMPLES: +// To pin a PAIR on a client thread, the following must be done: +// - first grab the list lock and find the PAIR +// - with the list lock grabbed, grab PAIR->mutex +// - with PAIR->mutex held: +// - release list lock +// - pin PAIR +// - with PAIR pinned, grab pending_lock_cheap, +// - copy and clear PAIR->checkpoint_pending, +// - resolve checkpointing if necessary +// - return to user. +// The list lock may be held while pinning the PAIR if +// the PAIR has no contention. Otherwise, we may have +// get a deadlock with another thread that has the PAIR pinned, +// tries to pin some other PAIR, and in doing so, grabs the list lock. +// +// To unpin a PAIR on a client thread: +// - because the PAIR is pinned, we don't need the pair_list's list_lock +// - so, simply acquire PAIR->mutex +// - unpin the PAIR +// - return +// +////////////////////////////////////////////////////////////////////////////// +class evictor; +class pair_list; + +/////////////////////////////////////////////////////////////////////////////// +// +// Maps to a file on disk. +// +struct cachefile { + // these next two fields are protected by cachetable's list lock + // they are managed whenever we add or remove a pair from + // the cachetable. As of Riddler, this linked list is only used to + // make cachetable_flush_cachefile more efficient + PAIR cf_head; // doubly linked list that is NOT circular + uint32_t num_pairs; // count on number of pairs in the cachetable belong to this cachefile + + bool for_checkpoint; //True if part of the in-progress checkpoint + + // If set and the cachefile closes, the file will be removed. + // Clients must not operate on the cachefile after setting this, + // nor attempt to open any cachefile with the same fname (dname) + // until this cachefile has been fully closed and unlinked. + bool unlink_on_close; + int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */ + CACHETABLE cachetable; + struct fileid fileid; + // the filenum is used as an identifer of the cachefile + // for logging and recovery + FILENUM filenum; + // number used to generate hashes for blocks in the cachefile + // used in toku_cachetable_hash + // this used to be the filenum.fileid, but now it is separate + uint32_t hash_id; + char *fname_in_env; /* Used for logging */ + + void *userdata; + void (*log_fassociate_during_checkpoint)(CACHEFILE cf, void *userdata); // When starting a checkpoint we must log all open files. + void (*close_userdata)(CACHEFILE cf, int fd, void *userdata, bool lsnvalid, LSN); // when closing the last reference to a cachefile, first call this function. + void (*free_userdata)(CACHEFILE cf, void *userdata); // when closing the last reference to a cachefile, first call this function. + void (*begin_checkpoint_userdata)(LSN lsn_of_checkpoint, void *userdata); // before checkpointing cachefiles call this function. + void (*checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // when checkpointing a cachefile, call this function. + void (*end_checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // after checkpointing cachefiles call this function. + void (*note_pin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory + void (*note_unpin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory + BACKGROUND_JOB_MANAGER bjm; +}; + + +/////////////////////////////////////////////////////////////////////////////// +// +// The pair represents the data stored in the cachetable. +// +struct ctpair { + // these fields are essentially constants. They do not change. + CACHEFILE cachefile; + CACHEKEY key; + uint32_t fullhash; + CACHETABLE_FLUSH_CALLBACK flush_callback; + CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback; + CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback; + CACHETABLE_CLEANER_CALLBACK cleaner_callback; + CACHETABLE_CLONE_CALLBACK clone_callback; + CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK checkpoint_complete_callback; + void *write_extraargs; + + // access to these fields are protected by disk_nb_mutex + void* cloned_value_data; // cloned copy of value_data used for checkpointing + long cloned_value_size; // size of cloned_value_data, used for accounting of size_current + void* disk_data; // data used to fetch/flush value_data to and from disk. + + // access to these fields are protected by value_rwlock + void* value_data; // data used by client threads, FTNODEs and ROLLBACK_LOG_NODEs + PAIR_ATTR attr; + enum cachetable_dirty dirty; + + // protected by PAIR->mutex + uint32_t count; // clock count + uint32_t refcount; // if > 0, then this PAIR is referenced by + // callers to the cachetable, and therefore cannot + // be evicted + uint32_t num_waiting_on_refs; // number of threads waiting on refcount to go to zero + toku_cond_t refcount_wait; // cond used to wait for refcount to go to zero + + // locks + toku::frwlock value_rwlock; + struct nb_mutex disk_nb_mutex; // single writer, protects disk_data, is used for writing cloned nodes for checkpoint + toku_mutex_t* mutex; // gotten from the pair list + + // Access to checkpoint_pending is protected by two mechanisms, + // the value_rwlock and the pair_list's pending locks (expensive and cheap). + // checkpoint_pending may be true of false. + // Here are the rules for reading/modifying this bit. + // - To transition this field from false to true during begin_checkpoint, + // we must be holding both of the pair_list's pending locks. + // - To transition this field from true to false during end_checkpoint, + // we must be holding the value_rwlock. + // - For a non-checkpoint thread to read the value, we must hold both the + // value_rwlock and one of the pair_list's pending locks + // - For the checkpoint thread to read the value, we must + // hold the value_rwlock + // + bool checkpoint_pending; // If this is on, then we have got to resolve checkpointing modifying it. + + // these are variables that are only used to transfer information to background threads + // we cache them here to avoid a malloc. In the future, we should investigate if this + // is necessary, as having these fields here is not technically necessary + long size_evicting_estimate; + evictor* ev; + pair_list* list; + + // A PAIR is stored in a pair_list (which happens to be PAIR->list). + // These variables are protected by the list lock in the pair_list + // + // clock_next,clock_prev represent a circular doubly-linked list. + PAIR clock_next,clock_prev; // In clock. + PAIR hash_chain; + + // pending_next,pending_next represent a non-circular doubly-linked list. + PAIR pending_next; + PAIR pending_prev; + + // cf_next, cf_prev represent a non-circular doubly-linked list. + // entries in linked list for PAIRs in a cachefile, these are protected + // by the list lock of the PAIR's pair_list. They are used to make + // cachetable_flush_cachefile cheaper so that we don't need + // to search the entire cachetable to find a particular cachefile's + // PAIRs + PAIR cf_next; + PAIR cf_prev; +}; + +// +// This initializes the fields and members of the pair. +// +void pair_init(PAIR p, + CACHEFILE cachefile, + CACHEKEY key, + void *value, + PAIR_ATTR attr, + enum cachetable_dirty dirty, + uint32_t fullhash, + CACHETABLE_WRITE_CALLBACK write_callback, + evictor *ev, + pair_list *list); + + +/////////////////////////////////////////////////////////////////////////////// +// +// The pair list maintains the set of PAIR's that make up +// the cachetable. +// +class pair_list { +public: + // + // the following fields are protected by the list lock + // + uint32_t m_n_in_table; // number of pairs in the hash table + uint32_t m_table_size; // number of buckets in the hash table + uint32_t m_num_locks; + PAIR *m_table; // hash table + toku_mutex_aligned_t *m_mutexes; + // + // The following fields are the heads of various linked lists. + // They also protected by the list lock, but their + // usage is not as straightforward. For each of them, + // only ONE thread is allowed iterate over them with + // a read lock on the list lock. All other threads + // that want to modify elements in the lists or iterate over + // the lists must hold the write list lock. Here is the + // association between what threads may hold a read lock + // on the list lock while iterating: + // - clock_head -> eviction thread (evictor) + // - cleaner_head -> cleaner thread (cleaner) + // - pending_head -> checkpoint thread (checkpointer) + // + PAIR m_clock_head; // of clock . head is the next thing to be up for decrement. + PAIR m_cleaner_head; // for cleaner thread. head is the next thing to look at for possible cleaning. + PAIR m_checkpoint_head; // for begin checkpoint to iterate over PAIRs and mark as pending_checkpoint + PAIR m_pending_head; // list of pairs marked with checkpoint_pending + + // this field is public so we are still POD + + // usage of this lock is described above + toku_pthread_rwlock_t m_list_lock; + // + // these locks are the "pending locks" referenced + // in comments about PAIR->checkpoint_pending. There + // are two of them, but both serve the same purpose, which + // is to protect the transition of a PAIR's checkpoint pending + // value from false to true during begin_checkpoint. + // We use two locks, because threads that want to read the + // checkpoint_pending value may hold a lock for varying periods of time. + // Threads running eviction may need to protect checkpoint_pending + // while writing a node to disk, which is an expensive operation, + // so it uses pending_lock_expensive. Client threads that + // want to pin PAIRs will want to protect checkpoint_pending + // just long enough to read the value and wipe it out. This is + // a cheap operation, and as a result, uses pending_lock_cheap. + // + // By having two locks, and making begin_checkpoint first + // grab pending_lock_expensive and then pending_lock_cheap, + // we ensure that threads that want to pin nodes can grab + // only pending_lock_cheap, and never block behind threads + // holding pending_lock_expensive and writing a node out to disk + // + toku_pthread_rwlock_t m_pending_lock_expensive; + toku_pthread_rwlock_t m_pending_lock_cheap; + void init(); + void destroy(); + void evict_completely(PAIR pair); + void evict_from_cachetable(PAIR pair); + void evict_from_cachefile(PAIR pair); + void add_to_cachetable_only(PAIR p); + void put(PAIR pair); + PAIR find_pair(CACHEFILE file, CACHEKEY key, uint32_t hash); + void pending_pairs_remove (PAIR p); + void verify(); + void get_state(int *num_entries, int *hash_size); + void read_list_lock(); + void read_list_unlock(); + void write_list_lock(); + void write_list_unlock(); + void read_pending_exp_lock(); + void read_pending_exp_unlock(); + void write_pending_exp_lock(); + void write_pending_exp_unlock(); + void read_pending_cheap_lock(); + void read_pending_cheap_unlock(); + void write_pending_cheap_lock(); + void write_pending_cheap_unlock(); + toku_mutex_t* get_mutex_for_pair(uint32_t fullhash); + void pair_lock_by_fullhash(uint32_t fullhash); + void pair_unlock_by_fullhash(uint32_t fullhash); + +private: + void pair_remove (PAIR p); + void remove_from_hash_chain(PAIR p); + void add_to_cf_list (PAIR p); + void add_to_clock (PAIR p); + void add_to_hash_chain(PAIR p); +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// Wrapper for the head of our cachefile list. +// +class cachefile_list { +public: + void init(); + void destroy(); + void read_lock(); + void read_unlock(); + void write_lock(); + void write_unlock(); + int cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf); + int cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf); + void add_cf_unlocked(CACHEFILE newcf); + void add_stale_cf(CACHEFILE newcf); + void remove_cf(CACHEFILE cf); + void remove_stale_cf_unlocked(CACHEFILE cf); + FILENUM reserve_filenum(); + uint32_t get_new_hash_id_unlocked(); + CACHEFILE find_cachefile_unlocked(struct fileid* fileid); + CACHEFILE find_stale_cachefile_unlocked(struct fileid* fileid); + void verify_unused_filenum(FILENUM filenum); + bool evict_some_stale_pair(evictor* ev); + void free_stale_data(evictor* ev); + // access to these fields are protected by the lock + FILENUM m_next_filenum_to_use; + uint32_t m_next_hash_id_to_use; + toku_pthread_rwlock_t m_lock; // this field is publoc so we are still POD + toku::omt m_active_filenum; + toku::omt m_active_fileid; + toku::omt m_stale_fileid; +private: + CACHEFILE find_cachefile_in_list_unlocked(CACHEFILE start, struct fileid* fileid); +}; + + +/////////////////////////////////////////////////////////////////////////////// +// +// The checkpointer handles starting and finishing checkpoints of the +// cachetable's data. +// +class checkpointer { +public: + int init(pair_list *_pl, TOKULOGGER _logger, evictor *_ev, cachefile_list *files); + void destroy(); + void set_checkpoint_period(uint32_t new_period); + uint32_t get_checkpoint_period(); + int shutdown(); + bool has_been_shutdown(); + void begin_checkpoint(); + void add_background_job(); + void remove_background_job(); + void end_checkpoint(void (*testcallback_f)(void*), void* testextra); + TOKULOGGER get_logger(); + // used during begin_checkpoint + void increment_num_txns(); +private: + uint32_t m_checkpoint_num_txns; // how many transactions are in the checkpoint + TOKULOGGER m_logger; + LSN m_lsn_of_checkpoint_in_progress; + uint32_t m_checkpoint_num_files; // how many cachefiles are in the checkpoint + struct minicron m_checkpointer_cron; // the periodic checkpointing thread + cachefile_list *m_cf_list; + pair_list *m_list; + evictor *m_ev; + bool m_checkpointer_cron_init; + bool m_checkpointer_init; + + // variable used by the checkpoint thread to know + // when all work induced by cloning on client threads is done + BACKGROUND_JOB_MANAGER m_checkpoint_clones_bjm; + // private methods for begin_checkpoint + void update_cachefiles(); + void log_begin_checkpoint(); + void turn_on_pending_bits(); + // private methods for end_checkpoint + void fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs); + void checkpoint_pending_pairs(); + void checkpoint_userdata(CACHEFILE* checkpoint_cfs); + void log_end_checkpoint(); + void end_checkpoint_userdata(CACHEFILE* checkpoint_cfs); + void remove_cachefiles(CACHEFILE* checkpoint_cfs); + + // Unit test struct needs access to private members. + friend struct checkpointer_test; +}; + +// +// This is how often we want the eviction thread +// to run, in seconds. +// +const int EVICTION_PERIOD = 1; + +/////////////////////////////////////////////////////////////////////////////// +// +// The evictor handles the removal of pairs from the pair list/cachetable. +// +class evictor { +public: + int init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period); + void destroy(); + void add_pair_attr(PAIR_ATTR attr); + void remove_pair_attr(PAIR_ATTR attr); + void change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr); + void add_cloned_data_size(long size); + void remove_cloned_data_size(long size); + uint64_t reserve_memory(double fraction, uint64_t upper_bound); + void release_reserved_memory(uint64_t reserved_memory); + void run_eviction_thread(); + void do_partial_eviction(PAIR p); + void evict_pair(PAIR p, bool checkpoint_pending); + void wait_for_cache_pressure_to_subside(); + void signal_eviction_thread(); + bool should_client_thread_sleep(); + bool should_client_wake_eviction_thread(); + // function needed for testing + void get_state(long *size_current_ptr, long *size_limit_ptr); + void fill_engine_status(); +private: + void add_to_size_current(long size); + void remove_from_size_current(long size); + void run_eviction(); + bool run_eviction_on_pair(PAIR p); + void try_evict_pair(PAIR p); + void decrease_size_evicting(long size_evicting_estimate); + bool should_sleeping_clients_wakeup(); + bool eviction_needed(); + + // We have some intentional races with these variables because we're ok with reading something a little bit old. + // Provide some hooks for reading variables in an unsafe way so that there are function names we can stick in a valgrind suppression. + int64_t unsafe_read_size_current(void) const; + int64_t unsafe_read_size_evicting(void) const; + + pair_list* m_pl; + cachefile_list* m_cf_list; + int64_t m_size_current; // the sum of the sizes of the pairs in the cachetable + int64_t m_size_cloned_data; // stores amount of cloned data we have, only used for engine status + // changes to these two values are protected + // by ev_thread_lock + int64_t m_size_reserved; // How much memory is reserved (e.g., by the loader) + int64_t m_size_evicting; // the sum of the sizes of the pairs being written + + // these are constants + int64_t m_low_size_watermark; // target max size of cachetable that eviction thread aims for + int64_t m_low_size_hysteresis; // if cachetable grows to this size, client threads wake up eviction thread upon adding data + int64_t m_high_size_watermark; // if cachetable grows to this size, client threads sleep upon adding data + int64_t m_high_size_hysteresis; // if > cachetable size, then sleeping client threads may wake up + + // used to calculate random numbers + struct random_data m_random_data; + char m_random_statebuf[64]; + + // mutex that protects fields listed immedietly below + toku_mutex_t m_ev_thread_lock; + // the eviction thread + toku_pthread_t m_ev_thread; + // condition variable that controls the sleeping period + // of the eviction thread + toku_cond_t m_ev_thread_cond; + // number of client threads that are currently sleeping + // due to an over-subscribed cachetable + uint32_t m_num_sleepers; + // states if the eviction thread should run. set to true + // in init, set to false during destroy + bool m_run_thread; + // bool that states if the eviction thread is currently running + bool m_ev_thread_is_running; + // period which the eviction thread sleeps + uint32_t m_period_in_seconds; + // condition variable on which client threads wait on when sleeping + // due to an over-subscribed cachetable + toku_cond_t m_flow_control_cond; + + // variables for engine status + PARTITIONED_COUNTER m_size_nonleaf; + PARTITIONED_COUNTER m_size_leaf; + PARTITIONED_COUNTER m_size_rollback; + PARTITIONED_COUNTER m_size_cachepressure; + PARTITIONED_COUNTER m_wait_pressure_count; + PARTITIONED_COUNTER m_wait_pressure_time; + PARTITIONED_COUNTER m_long_wait_pressure_count; + PARTITIONED_COUNTER m_long_wait_pressure_time; + + KIBBUTZ m_kibbutz; + + // this variable is ONLY used for testing purposes + uint64_t m_num_eviction_thread_runs; + + bool m_ev_thread_init; + bool m_evictor_init; + + friend class evictor_test_helpers; + friend class evictor_unit_test; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// Iterates over the clean head in the pair list, calling the cleaner +// callback on each node in that list. +// +class cleaner { +public: + int init(uint32_t cleaner_iterations, pair_list* _pl, CACHETABLE _ct); + void destroy(void); + uint32_t get_iterations(void); + void set_iterations(uint32_t new_iterations); + uint32_t get_period_unlocked(void); + void set_period(uint32_t new_period); + int run_cleaner(void); + +private: + pair_list* m_pl; + CACHETABLE m_ct; + struct minicron m_cleaner_cron; // the periodic cleaner thread + uint32_t m_cleaner_iterations; // how many times to run the cleaner per + // cleaner period (minicron has a + // minimum period of 1s so if you want + // more frequent cleaner runs you must + // use this) + bool m_cleaner_cron_init; + bool m_cleaner_init; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// The cachetable is as close to an ENV as we get. +// +struct cachetable { + pair_list list; + cleaner cl; + evictor ev; + checkpointer cp; + cachefile_list cf_list; + + KIBBUTZ client_kibbutz; // pool of worker threads and jobs to do asynchronously for the client. + KIBBUTZ ct_kibbutz; // pool of worker threads and jobs to do asynchronously for the cachetable + KIBBUTZ checkpointing_kibbutz; // small pool for checkpointing cloned pairs + + char *env_dir; +}; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/checkpoint.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/checkpoint.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,405 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#ident "$Id$" + +/*********** + * The purpose of this file is to implement the high-level logic for + * taking a checkpoint. + * + * There are three locks used for taking a checkpoint. They are listed below. + * + * NOTE: The reader-writer locks may be held by either multiple clients + * or the checkpoint function. (The checkpoint function has the role + * of the writer, the clients have the reader roles.) + * + * - multi_operation_lock + * This is a new reader-writer lock. + * This lock is held by the checkpoint function only for as long as is required to + * to set all the "pending" bits and to create the checkpoint-in-progress versions + * of the header and translation table (btt). + * The following operations must take the multi_operation_lock: + * - any set of operations that must be atomic with respect to begin checkpoint + * + * - checkpoint_safe_lock + * This is a new reader-writer lock. + * This lock is held for the entire duration of the checkpoint. + * It is used to prevent more than one checkpoint from happening at a time + * (the checkpoint function is non-re-entrant), and to prevent certain operations + * that should not happen during a checkpoint. + * The following operations must take the checkpoint_safe lock: + * - delete a dictionary + * - rename a dictionary + * The application can use this lock to disable checkpointing during other sensitive + * operations, such as making a backup copy of the database. + * + * Once the "pending" bits are set and the snapshots are taken of the header and btt, + * most normal database operations are permitted to resume. + * + * + * + *****/ + +#include + +#include + +#include "portability/toku_portability.h" +#include "portability/toku_atomic.h" + +#include "ft/cachetable/cachetable.h" +#include "ft/cachetable/checkpoint.h" +#include "ft/ft.h" +#include "ft/logger/log-internal.h" +#include "ft/logger/recover.h" +#include "util/frwlock.h" +#include "util/status.h" + +/////////////////////////////////////////////////////////////////////////////////// +// Engine status +// +// Status is intended for display to humans to help understand system behavior. +// It does not need to be perfectly thread-safe. + +static CHECKPOINT_STATUS_S cp_status; + +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(cp_status, k, c, t, "checkpoint: " l, inc) + +static void +status_init(void) { + // Note, this function initializes the keyname, type, and legend fields. + // Value fields are initialized to zero by compiler. + + STATUS_INIT(CP_PERIOD, CHECKPOINT_PERIOD, UINT64, "period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CP_FOOTPRINT, nullptr, UINT64, "footprint", TOKU_ENGINE_STATUS); + STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN, CHECKPOINT_LAST_BEGAN, UNIXTIME, "last checkpoint began ", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE, CHECKPOINT_LAST_COMPLETE_BEGAN, UNIXTIME, "last complete checkpoint began ", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CP_TIME_LAST_CHECKPOINT_END, CHECKPOINT_LAST_COMPLETE_ENDED, UNIXTIME, "last complete checkpoint ended", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CP_TIME_CHECKPOINT_DURATION, CHECKPOINT_DURATION, UINT64, "time spent during checkpoint (begin and end phases)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CP_TIME_CHECKPOINT_DURATION_LAST, CHECKPOINT_DURATION_LAST, UINT64, "time spent during last checkpoint (begin and end phases)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CP_LAST_LSN, nullptr, UINT64, "last complete checkpoint LSN", TOKU_ENGINE_STATUS); + STATUS_INIT(CP_CHECKPOINT_COUNT, CHECKPOINT_TAKEN, UINT64, "checkpoints taken ", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CP_CHECKPOINT_COUNT_FAIL, CHECKPOINT_FAILED, UINT64, "checkpoints failed", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CP_WAITERS_NOW, nullptr, UINT64, "waiters now", TOKU_ENGINE_STATUS); + STATUS_INIT(CP_WAITERS_MAX, nullptr, UINT64, "waiters max", TOKU_ENGINE_STATUS); + STATUS_INIT(CP_CLIENT_WAIT_ON_MO, nullptr, UINT64, "non-checkpoint client wait on mo lock", TOKU_ENGINE_STATUS); + STATUS_INIT(CP_CLIENT_WAIT_ON_CS, nullptr, UINT64, "non-checkpoint client wait on cs lock", TOKU_ENGINE_STATUS); + + STATUS_INIT(CP_BEGIN_TIME, CHECKPOINT_BEGIN_TIME, UINT64, "checkpoint begin time", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CP_LONG_BEGIN_COUNT, CHECKPOINT_LONG_BEGIN_COUNT, UINT64, "long checkpoint begin count", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CP_LONG_BEGIN_TIME, CHECKPOINT_LONG_BEGIN_TIME, UINT64, "long checkpoint begin time", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + + cp_status.initialized = true; +} +#undef STATUS_INIT + +#define STATUS_VALUE(x) cp_status.status[x].value.num + +void +toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) { + if (!cp_status.initialized) + status_init(); + STATUS_VALUE(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct); + *statp = cp_status; +} + + + +static LSN last_completed_checkpoint_lsn; + +static toku_mutex_t checkpoint_safe_mutex; +static toku::frwlock checkpoint_safe_lock; +static toku_pthread_rwlock_t multi_operation_lock; +static toku_pthread_rwlock_t low_priority_multi_operation_lock; + +static bool initialized = false; // sanity check +static volatile bool locked_mo = false; // true when the multi_operation write lock is held (by checkpoint) +static volatile bool locked_cs = false; // true when the checkpoint_safe write lock is held (by checkpoint) +static volatile uint64_t toku_checkpoint_long_threshold = 1000000; + +// Note following static functions are called from checkpoint internal logic only, +// and use the "writer" calls for locking and unlocking. + +static void +multi_operation_lock_init(void) { + pthread_rwlockattr_t attr; + pthread_rwlockattr_init(&attr); +#if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP) + pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); +#else + // TODO: need to figure out how to make writer-preferential rwlocks + // happen on osx +#endif + toku_pthread_rwlock_init(&multi_operation_lock, &attr); + toku_pthread_rwlock_init(&low_priority_multi_operation_lock, &attr); + pthread_rwlockattr_destroy(&attr); + locked_mo = false; +} + +static void +multi_operation_lock_destroy(void) { + toku_pthread_rwlock_destroy(&multi_operation_lock); + toku_pthread_rwlock_destroy(&low_priority_multi_operation_lock); +} + +static void +multi_operation_checkpoint_lock(void) { + toku_pthread_rwlock_wrlock(&low_priority_multi_operation_lock); + toku_pthread_rwlock_wrlock(&multi_operation_lock); + locked_mo = true; +} + +static void +multi_operation_checkpoint_unlock(void) { + locked_mo = false; + toku_pthread_rwlock_wrunlock(&multi_operation_lock); + toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock); +} + +static void +checkpoint_safe_lock_init(void) { + toku_mutex_init(&checkpoint_safe_mutex, NULL); + checkpoint_safe_lock.init(&checkpoint_safe_mutex); + locked_cs = false; +} + +static void +checkpoint_safe_lock_destroy(void) { + checkpoint_safe_lock.deinit(); + toku_mutex_destroy(&checkpoint_safe_mutex); +} + +static void +checkpoint_safe_checkpoint_lock(void) { + toku_mutex_lock(&checkpoint_safe_mutex); + checkpoint_safe_lock.write_lock(false); + toku_mutex_unlock(&checkpoint_safe_mutex); + locked_cs = true; +} + +static void +checkpoint_safe_checkpoint_unlock(void) { + locked_cs = false; + toku_mutex_lock(&checkpoint_safe_mutex); + checkpoint_safe_lock.write_unlock(); + toku_mutex_unlock(&checkpoint_safe_mutex); +} + +// toku_xxx_client_(un)lock() functions are only called from client code, +// never from checkpoint code, and use the "reader" interface to the lock functions. + +void +toku_multi_operation_client_lock(void) { + if (locked_mo) + (void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_MO), 1); + toku_pthread_rwlock_rdlock(&multi_operation_lock); +} + +void +toku_multi_operation_client_unlock(void) { + toku_pthread_rwlock_rdunlock(&multi_operation_lock); +} + +void toku_low_priority_multi_operation_client_lock(void) { + toku_pthread_rwlock_rdlock(&low_priority_multi_operation_lock); +} + +void toku_low_priority_multi_operation_client_unlock(void) { + toku_pthread_rwlock_rdunlock(&low_priority_multi_operation_lock); +} + +void +toku_checkpoint_safe_client_lock(void) { + if (locked_cs) + (void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_CS), 1); + toku_mutex_lock(&checkpoint_safe_mutex); + checkpoint_safe_lock.read_lock(); + toku_mutex_unlock(&checkpoint_safe_mutex); + toku_multi_operation_client_lock(); +} + +void +toku_checkpoint_safe_client_unlock(void) { + toku_mutex_lock(&checkpoint_safe_mutex); + checkpoint_safe_lock.read_unlock(); + toku_mutex_unlock(&checkpoint_safe_mutex); + toku_multi_operation_client_unlock(); +} + +// Initialize the checkpoint mechanism, must be called before any client operations. +void +toku_checkpoint_init(void) { + multi_operation_lock_init(); + checkpoint_safe_lock_init(); + initialized = true; +} + +void +toku_checkpoint_destroy(void) { + multi_operation_lock_destroy(); + checkpoint_safe_lock_destroy(); + initialized = false; +} + +#define SET_CHECKPOINT_FOOTPRINT(x) STATUS_VALUE(CP_FOOTPRINT) = footprint_offset + x + + +// Take a checkpoint of all currently open dictionaries +int +toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger, + void (*callback_f)(void*), void * extra, + void (*callback2_f)(void*), void * extra2, + checkpoint_caller_t caller_id) { + int footprint_offset = (int) caller_id * 1000; + + assert(initialized); + + (void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_WAITERS_NOW), 1); + checkpoint_safe_checkpoint_lock(); + (void) toku_sync_fetch_and_sub(&STATUS_VALUE(CP_WAITERS_NOW), 1); + + if (STATUS_VALUE(CP_WAITERS_NOW) > STATUS_VALUE(CP_WAITERS_MAX)) + STATUS_VALUE(CP_WAITERS_MAX) = STATUS_VALUE(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock + + SET_CHECKPOINT_FOOTPRINT(10); + multi_operation_checkpoint_lock(); + SET_CHECKPOINT_FOOTPRINT(20); + toku_ft_open_close_lock(); + + SET_CHECKPOINT_FOOTPRINT(30); + STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL); + uint64_t t_checkpoint_begin_start = toku_current_time_microsec(); + toku_cachetable_begin_checkpoint(cp, logger); + uint64_t t_checkpoint_begin_end = toku_current_time_microsec(); + + toku_ft_open_close_unlock(); + multi_operation_checkpoint_unlock(); + + SET_CHECKPOINT_FOOTPRINT(40); + if (callback_f) { + callback_f(extra); // callback is called with checkpoint_safe_lock still held + } + toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2); + + SET_CHECKPOINT_FOOTPRINT(50); + if (logger) { + last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn; + toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn); + STATUS_VALUE(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn; + } + + SET_CHECKPOINT_FOOTPRINT(60); + STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_END) = time(NULL); + STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN); + STATUS_VALUE(CP_CHECKPOINT_COUNT)++; + uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start; + STATUS_VALUE(CP_BEGIN_TIME) += duration; + if (duration >= toku_checkpoint_long_threshold) { + STATUS_VALUE(CP_LONG_BEGIN_TIME) += duration; + STATUS_VALUE(CP_LONG_BEGIN_COUNT) += 1; + } + STATUS_VALUE(CP_TIME_CHECKPOINT_DURATION) += (uint64_t) ((time_t) STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN)); + STATUS_VALUE(CP_TIME_CHECKPOINT_DURATION_LAST) = (uint64_t) ((time_t) STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN)); + STATUS_VALUE(CP_FOOTPRINT) = 0; + + checkpoint_safe_checkpoint_unlock(); + return 0; +} + +#include +void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void); +void +toku_checkpoint_helgrind_ignore(void) { + TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status); + TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo); + TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs); +} + +#undef SET_CHECKPOINT_FOOTPRINT +#undef STATUS_VALUE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/checkpoint.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/checkpoint.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable/checkpoint.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable/checkpoint.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,200 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#ident "$Id$" + +#include + +#include "ft/cachetable/cachetable.h" + +//Effect: Change [end checkpoint (n) - begin checkpoint (n+1)] delay to +// new_period seconds. 0 means disable. +void toku_set_checkpoint_period(CACHETABLE ct, uint32_t new_period); + +uint32_t toku_get_checkpoint_period_unlocked(CACHETABLE ct); + + +/****** + * + * NOTE: checkpoint_safe_lock is highest level lock + * multi_operation_lock is next level lock + * ydb_big_lock is next level lock + * + * Locks must always be taken in this sequence (highest level first). + * + */ + + +/****** + * Client code must hold the checkpoint_safe lock during the following operations: + * - delete a dictionary via DB->remove + * - delete a dictionary via DB_TXN->abort(txn) (where txn created a dictionary) + * - rename a dictionary //TODO: Handlerton rename needs to take this + * //TODO: Handlerton rename needs to be recoded for transaction recovery + *****/ + +void toku_checkpoint_safe_client_lock(void); + +void toku_checkpoint_safe_client_unlock(void); + + + +/****** + * These functions are called from the ydb level. + * Client code must hold the multi_operation lock during the following operations: + * - insertion into multiple indexes + * - replace into (simultaneous delete/insert on a single key) + *****/ + +void toku_multi_operation_client_lock(void); +void toku_low_priority_multi_operation_client_lock(void); + +void toku_multi_operation_client_unlock(void); +void toku_low_priority_multi_operation_client_unlock(void); + + +// Initialize the checkpoint mechanism, must be called before any client operations. +// Must pass in function pointers to take/release ydb lock. +void toku_checkpoint_init(void); + +void toku_checkpoint_destroy(void); + +typedef enum {SCHEDULED_CHECKPOINT = 0, // "normal" checkpoint taken on checkpoint thread + CLIENT_CHECKPOINT = 1, // induced by client, such as FLUSH LOGS or SAVEPOINT + INDEXER_CHECKPOINT = 2, + STARTUP_CHECKPOINT = 3, + UPGRADE_CHECKPOINT = 4, + RECOVERY_CHECKPOINT = 5, + SHUTDOWN_CHECKPOINT = 6} checkpoint_caller_t; + +// Take a checkpoint of all currently open dictionaries +// Callbacks are called during checkpoint procedure while checkpoint_safe lock is still held. +// Callbacks are primarily intended for use in testing. +// caller_id identifies why the checkpoint is being taken. +int toku_checkpoint(CHECKPOINTER cp, struct tokulogger *logger, + void (*callback_f)(void *extra), void *extra, + void (*callback2_f)(void *extra2), void *extra2, + checkpoint_caller_t caller_id); + +/****** + * These functions are called from the ydb level. + * They return status information and have no side effects. + * Some status information may be incorrect because no locks are taken to collect status. + * (If checkpoint is in progress, it may overwrite status info while it is being read.) + *****/ +typedef enum { + CP_PERIOD, + CP_FOOTPRINT, + CP_TIME_LAST_CHECKPOINT_BEGIN, + CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE, + CP_TIME_LAST_CHECKPOINT_END, + CP_TIME_CHECKPOINT_DURATION, + CP_TIME_CHECKPOINT_DURATION_LAST, + CP_LAST_LSN, + CP_CHECKPOINT_COUNT, + CP_CHECKPOINT_COUNT_FAIL, + CP_WAITERS_NOW, // how many threads are currently waiting for the checkpoint_safe lock to perform a checkpoint + CP_WAITERS_MAX, // max threads ever simultaneously waiting for the checkpoint_safe lock to perform a checkpoint + CP_CLIENT_WAIT_ON_MO, // how many times a client thread waited to take the multi_operation lock, not for checkpoint + CP_CLIENT_WAIT_ON_CS, // how many times a client thread waited for the checkpoint_safe lock, not for checkpoint + CP_BEGIN_TIME, + CP_LONG_BEGIN_TIME, + CP_LONG_BEGIN_COUNT, + CP_STATUS_NUM_ROWS // number of rows in this status array. must be last. +} cp_status_entry; + +typedef struct { + bool initialized; + TOKU_ENGINE_STATUS_ROW_S status[CP_STATUS_NUM_ROWS]; +} CHECKPOINT_STATUS_S, *CHECKPOINT_STATUS; + +void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS stat); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,4981 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include -#include -#include -#include "cachetable.h" -#include -#include "checkpoint.h" -#include "log-internal.h" -#include "cachetable-internal.h" -#include -#include -#include -#include -#include -#include -#include -#include - -/////////////////////////////////////////////////////////////////////////////////// -// Engine status -// -// Status is intended for display to humans to help understand system behavior. -// It does not need to be perfectly thread-safe. - -// These should be in the cachetable object, but we make them file-wide so that gdb can get them easily. -// They were left here after engine status cleanup (#2949, rather than moved into the status struct) -// so they are still easily available to the debugger and to save lots of typing. -static uint64_t cachetable_miss; -static uint64_t cachetable_misstime; // time spent waiting for disk read -static uint64_t cachetable_prefetches; // how many times has a block been prefetched into the cachetable? -static uint64_t cachetable_evictions; -static uint64_t cleaner_executions; // number of times the cleaner thread's loop has executed - -static CACHETABLE_STATUS_S ct_status; - -// Note, toku_cachetable_get_status() is below, after declaration of cachetable. - -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ct_status, k, c, t, "cachetable: " l, inc) - -static void -status_init(void) { - // Note, this function initializes the keyname, type, and legend fields. - // Value fields are initialized to zero by compiler. - - STATUS_INIT(CT_MISS, CACHETABLE_MISS, UINT64, "miss", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_MISSTIME, CACHETABLE_MISS_TIME, UINT64, "miss time", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_PREFETCHES, CACHETABLE_PREFETCHES, UINT64, "prefetches", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_SIZE_CURRENT, CACHETABLE_SIZE_CURRENT, UINT64, "size current", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_SIZE_LIMIT, CACHETABLE_SIZE_LIMIT, UINT64, "size limit", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_SIZE_WRITING, CACHETABLE_SIZE_WRITING, UINT64, "size writing", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_SIZE_NONLEAF, CACHETABLE_SIZE_NONLEAF, UINT64, "size nonleaf", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_SIZE_LEAF, CACHETABLE_SIZE_LEAF, UINT64, "size leaf", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_SIZE_ROLLBACK, CACHETABLE_SIZE_ROLLBACK, UINT64, "size rollback", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_SIZE_CACHEPRESSURE, CACHETABLE_SIZE_CACHEPRESSURE, UINT64, "size cachepressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_EVICTIONS, CACHETABLE_EVICTIONS, UINT64, "evictions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_CLEANER_EXECUTIONS, CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_CLEANER_PERIOD, CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_CLEANER_ITERATIONS, CACHETABLE_CLEANER_ITERATIONS, UINT64, "cleaner iterations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - - STATUS_INIT(CT_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_TIME, UINT64, "time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_LONG_WAIT_PRESSURE_COUNT, CACHETABLE_LONG_WAIT_PRESSURE_COUNT, UINT64, "number of long waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CT_LONG_WAIT_PRESSURE_TIME, CACHETABLE_LONG_WAIT_PRESSURE_TIME, UINT64, "long time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - ct_status.initialized = true; -} -#undef STATUS_INIT - -#define STATUS_VALUE(x) ct_status.status[x].value.num - -static void * const zero_value = nullptr; -static PAIR_ATTR const zero_attr = { - .size = 0, - .nonleaf_size = 0, - .leaf_size = 0, - .rollback_size = 0, - .cache_pressure_size = 0, - .is_valid = true -}; - - -static inline void ctpair_destroy(PAIR p) { - p->value_rwlock.deinit(); - paranoid_invariant(p->refcount == 0); - nb_mutex_destroy(&p->disk_nb_mutex); - toku_cond_destroy(&p->refcount_wait); - toku_free(p); -} - -static inline void pair_lock(PAIR p) { - toku_mutex_lock(p->mutex); -} - -static inline void pair_unlock(PAIR p) { - toku_mutex_unlock(p->mutex); -} - -// adds a reference to the PAIR -// on input and output, PAIR mutex is held -static void pair_add_ref_unlocked(PAIR p) { - p->refcount++; -} - -// releases a reference to the PAIR -// on input and output, PAIR mutex is held -static void pair_release_ref_unlocked(PAIR p) { - paranoid_invariant(p->refcount > 0); - p->refcount--; - if (p->refcount == 0 && p->num_waiting_on_refs > 0) { - toku_cond_broadcast(&p->refcount_wait); - } -} - -static void pair_wait_for_ref_release_unlocked(PAIR p) { - p->num_waiting_on_refs++; - while (p->refcount > 0) { - toku_cond_wait(&p->refcount_wait, p->mutex); - } - p->num_waiting_on_refs--; -} - -bool toku_ctpair_is_write_locked(PAIR pair) { - return pair->value_rwlock.writers() == 1; -} - -void -toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS statp) { - if (!ct_status.initialized) { - status_init(); - } - STATUS_VALUE(CT_MISS) = cachetable_miss; - STATUS_VALUE(CT_MISSTIME) = cachetable_misstime; - STATUS_VALUE(CT_PREFETCHES) = cachetable_prefetches; - STATUS_VALUE(CT_EVICTIONS) = cachetable_evictions; - STATUS_VALUE(CT_CLEANER_EXECUTIONS) = cleaner_executions; - STATUS_VALUE(CT_CLEANER_PERIOD) = toku_get_cleaner_period_unlocked(ct); - STATUS_VALUE(CT_CLEANER_ITERATIONS) = toku_get_cleaner_iterations_unlocked(ct); - ct->ev.fill_engine_status(); - *statp = ct_status; -} - -// FIXME global with no toku prefix -void remove_background_job_from_cf(CACHEFILE cf) -{ - bjm_remove_background_job(cf->bjm); -} - -// FIXME global with no toku prefix -void cachefile_kibbutz_enq (CACHEFILE cf, void (*f)(void*), void *extra) -// The function f must call remove_background_job_from_cf when it completes -{ - int r = bjm_add_background_job(cf->bjm); - // if client is adding a background job, then it must be done - // at a time when the manager is accepting background jobs, otherwise - // the client is screwing up - assert_zero(r); - toku_kibbutz_enq(cf->cachetable->client_kibbutz, f, extra); -} - -static int -checkpoint_thread (void *checkpointer_v) -// Effect: If checkpoint_period>0 thn periodically run a checkpoint. -// If someone changes the checkpoint_period (calling toku_set_checkpoint_period), then the checkpoint will run sooner or later. -// If someone sets the checkpoint_shutdown boolean , then this thread exits. -// This thread notices those changes by waiting on a condition variable. -{ - CHECKPOINTER CAST_FROM_VOIDP(cp, checkpointer_v); - int r = toku_checkpoint(cp, cp->get_logger(), NULL, NULL, NULL, NULL, SCHEDULED_CHECKPOINT); - invariant_zero(r); - return r; -} - -void toku_set_checkpoint_period (CACHETABLE ct, uint32_t new_period) { - ct->cp.set_checkpoint_period(new_period); -} - -uint32_t toku_get_checkpoint_period_unlocked (CACHETABLE ct) { - return ct->cp.get_checkpoint_period(); -} - -void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period) { - ct->cl.set_period(new_period); -} - -uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct) { - return ct->cl.get_period_unlocked(); -} - -void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations) { - ct->cl.set_iterations(new_iterations); -} - -uint32_t toku_get_cleaner_iterations (CACHETABLE ct) { - return ct->cl.get_iterations(); -} - -uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct) { - return ct->cl.get_iterations(); -} - -// reserve 25% as "unreservable". The loader cannot have it. -#define unreservable_memory(size) ((size)/4) - -int toku_cachetable_create(CACHETABLE *ct_result, long size_limit, LSN UU(initial_lsn), TOKULOGGER logger) { - int result = 0; - int r; - - if (size_limit == 0) { - size_limit = 128*1024*1024; - } - - CACHETABLE XCALLOC(ct); - ct->list.init(); - ct->cf_list.init(); - - int num_processors = toku_os_get_number_active_processors(); - int checkpointing_nworkers = (num_processors/4) ? num_processors/4 : 1; - r = toku_kibbutz_create(num_processors, &ct->client_kibbutz); - if (r != 0) { - result = r; - goto cleanup; - } - r = toku_kibbutz_create(2*num_processors, &ct->ct_kibbutz); - if (r != 0) { - result = r; - goto cleanup; - } - r = toku_kibbutz_create(checkpointing_nworkers, &ct->checkpointing_kibbutz); - if (r != 0) { - result = r; - goto cleanup; - } - // must be done after creating ct_kibbutz - r = ct->ev.init(size_limit, &ct->list, &ct->cf_list, ct->ct_kibbutz, EVICTION_PERIOD); - if (r != 0) { - result = r; - goto cleanup; - } - r = ct->cp.init(&ct->list, logger, &ct->ev, &ct->cf_list); - if (r != 0) { - result = r; - goto cleanup; - } - r = ct->cl.init(1, &ct->list, ct); // by default, start with one iteration - if (r != 0) { - result = r; - goto cleanup; - } - ct->env_dir = toku_xstrdup("."); -cleanup: - if (result == 0) { - *ct_result = ct; - } else { - toku_cachetable_close(&ct); - } - return result; -} - -// Returns a pointer to the checkpoint contained within -// the given cachetable. -CHECKPOINTER toku_cachetable_get_checkpointer(CACHETABLE ct) { - return &ct->cp; -} - -uint64_t toku_cachetable_reserve_memory(CACHETABLE ct, double fraction, uint64_t upper_bound) { - uint64_t reserved_memory = ct->ev.reserve_memory(fraction, upper_bound); - return reserved_memory; -} - -void toku_cachetable_release_reserved_memory(CACHETABLE ct, uint64_t reserved_memory) { - ct->ev.release_reserved_memory(reserved_memory); -} - -void -toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir) { - toku_free(ct->env_dir); - ct->env_dir = toku_xstrdup(env_dir); -} - -// What cachefile goes with particular iname (iname relative to env)? -// The transaction that is adding the reference might not have a reference -// to the ft, therefore the cachefile might be closing. -// If closing, we want to return that it is not there, but must wait till after -// the close has finished. -// Once the close has finished, there must not be a cachefile with that name -// in the cachetable. -int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CACHEFILE *cf) { - return ct->cf_list.cachefile_of_iname_in_env(iname_in_env, cf); -} - -// What cachefile goes with particular fd? -// This function can only be called if the ft is still open, so file must -// still be open -int toku_cachefile_of_filenum (CACHETABLE ct, FILENUM filenum, CACHEFILE *cf) { - return ct->cf_list.cachefile_of_filenum(filenum, cf); -} - -// TEST-ONLY function -// If something goes wrong, close the fd. After this, the caller shouldn't close the fd, but instead should close the cachefile. -int toku_cachetable_openfd (CACHEFILE *cfptr, CACHETABLE ct, int fd, const char *fname_in_env) { - FILENUM filenum = toku_cachetable_reserve_filenum(ct); - bool was_open; - return toku_cachetable_openfd_with_filenum(cfptr, ct, fd, fname_in_env, filenum, &was_open); -} - -// Get a unique filenum from the cachetable -FILENUM -toku_cachetable_reserve_filenum(CACHETABLE ct) { - return ct->cf_list.reserve_filenum(); -} - -static void create_new_cachefile( - CACHETABLE ct, - FILENUM filenum, - uint32_t hash_id, - int fd, - const char *fname_in_env, - struct fileid fileid, - CACHEFILE *cfptr - ) { - // File is not open. Make a new cachefile. - CACHEFILE newcf = NULL; - XCALLOC(newcf); - newcf->cachetable = ct; - newcf->hash_id = hash_id; - newcf->fileid = fileid; - - newcf->filenum = filenum; - newcf->fd = fd; - newcf->fname_in_env = toku_xstrdup(fname_in_env); - bjm_init(&newcf->bjm); - *cfptr = newcf; -} - -int toku_cachetable_openfd_with_filenum (CACHEFILE *cfptr, CACHETABLE ct, int fd, - const char *fname_in_env, - FILENUM filenum, bool* was_open) { - int r; - CACHEFILE newcf; - struct fileid fileid; - - assert(filenum.fileid != FILENUM_NONE.fileid); - r = toku_os_get_unique_file_id(fd, &fileid); - if (r != 0) { - r = get_error_errno(); - close(fd); - return r; - } - ct->cf_list.write_lock(); - CACHEFILE existing_cf = ct->cf_list.find_cachefile_unlocked(&fileid); - if (existing_cf) { - *was_open = true; - // Reuse an existing cachefile and close the caller's fd, whose - // responsibility has been passed to us. - r = close(fd); - assert(r == 0); - *cfptr = existing_cf; - r = 0; - goto exit; - } - *was_open = false; - ct->cf_list.verify_unused_filenum(filenum); - // now let's try to find it in the stale cachefiles - existing_cf = ct->cf_list.find_stale_cachefile_unlocked(&fileid); - // found the stale file, - if (existing_cf) { - // fix up the fields in the cachefile - existing_cf->filenum = filenum; - existing_cf->fd = fd; - existing_cf->fname_in_env = toku_xstrdup(fname_in_env); - bjm_init(&existing_cf->bjm); - - // now we need to move all the PAIRs in it back into the cachetable - ct->list.write_list_lock(); - for (PAIR curr_pair = existing_cf->cf_head; curr_pair; curr_pair = curr_pair->cf_next) { - pair_lock(curr_pair); - ct->list.add_to_cachetable_only(curr_pair); - pair_unlock(curr_pair); - } - ct->list.write_list_unlock(); - // move the cachefile back to the list of active cachefiles - ct->cf_list.remove_stale_cf_unlocked(existing_cf); - ct->cf_list.add_cf_unlocked(existing_cf); - *cfptr = existing_cf; - r = 0; - goto exit; - } - - create_new_cachefile( - ct, - filenum, - ct->cf_list.get_new_hash_id_unlocked(), - fd, - fname_in_env, - fileid, - &newcf - ); - - ct->cf_list.add_cf_unlocked(newcf); - - *cfptr = newcf; - r = 0; - exit: - ct->cf_list.write_unlock(); - return r; -} - -static void cachetable_flush_cachefile (CACHETABLE, CACHEFILE cf, bool evict_completely); - -//TEST_ONLY_FUNCTION -int toku_cachetable_openf (CACHEFILE *cfptr, CACHETABLE ct, const char *fname_in_env, int flags, mode_t mode) { - char *fname_in_cwd = toku_construct_full_name(2, ct->env_dir, fname_in_env); - int fd = open(fname_in_cwd, flags+O_BINARY, mode); - int r; - if (fd < 0) { - r = get_error_errno(); - } else { - r = toku_cachetable_openfd (cfptr, ct, fd, fname_in_env); - } - toku_free(fname_in_cwd); - return r; -} - -char * -toku_cachefile_fname_in_env (CACHEFILE cf) { - return cf->fname_in_env; -} - -int -toku_cachefile_get_fd (CACHEFILE cf) { - return cf->fd; -} - -static void cachefile_destroy(CACHEFILE cf) { - if (cf->free_userdata) { - cf->free_userdata(cf, cf->userdata); - } - toku_free(cf); -} - -void toku_cachefile_close(CACHEFILE *cfp, bool oplsn_valid, LSN oplsn) { - CACHEFILE cf = *cfp; - CACHETABLE ct = cf->cachetable; - - bjm_wait_for_jobs_to_finish(cf->bjm); - - // Clients should never attempt to close a cachefile that is being - // checkpointed. We notify clients this is happening in the - // note_pin_by_checkpoint callback. - assert(!cf->for_checkpoint); - - // Flush the cachefile and remove all of its pairs from the cachetable, - // but keep the PAIRs linked in the cachefile. We will store the cachefile - // away in case it gets opened immedietely - // - // if we are unlinking on close, then we want to evict completely, - // otherwise, we will keep the PAIRs and cachefile around in case - // a subsequent open comes soon - cachetable_flush_cachefile(ct, cf, cf->unlink_on_close); - - // Call the close userdata callback to notify the client this cachefile - // and its underlying file are going to be closed - if (cf->close_userdata) { - cf->close_userdata(cf, cf->fd, cf->userdata, oplsn_valid, oplsn); - } - // fsync and close the fd. - toku_file_fsync_without_accounting(cf->fd); - int r = close(cf->fd); - assert(r == 0); - cf->fd = -1; - - // destroy the parts of the cachefile - // that do not persist across opens/closes - bjm_destroy(cf->bjm); - cf->bjm = NULL; - - // remove the cf from the list of active cachefiles - ct->cf_list.remove_cf(cf); - cf->filenum = FILENUM_NONE; - - // Unlink the file if the bit was set - if (cf->unlink_on_close) { - char *fname_in_cwd = toku_cachetable_get_fname_in_cwd(cf->cachetable, cf->fname_in_env); - r = unlink(fname_in_cwd); - assert_zero(r); - toku_free(fname_in_cwd); - } - toku_free(cf->fname_in_env); - cf->fname_in_env = NULL; - - // we destroy the cf if the unlink bit was set or if no PAIRs exist - // if no PAIRs exist, there is no sense in keeping the cachefile around - bool destroy_cf = cf->unlink_on_close || (cf->cf_head == NULL); - if (destroy_cf) { - cachefile_destroy(cf); - } - else { - ct->cf_list.add_stale_cf(cf); - } -} - -// This hash function comes from Jenkins: http://burtleburtle.net/bob/c/lookup3.c -// The idea here is to mix the bits thoroughly so that we don't have to do modulo by a prime number. -// Instead we can use a bitmask on a table of size power of two. -// This hash function does yield improved performance on ./db-benchmark-test-tokudb and ./scanscan -static inline uint32_t rot(uint32_t x, uint32_t k) { - return (x<>(32-k)); -} -static inline uint32_t final (uint32_t a, uint32_t b, uint32_t c) { - c ^= b; c -= rot(b,14); - a ^= c; a -= rot(c,11); - b ^= a; b -= rot(a,25); - c ^= b; c -= rot(b,16); - a ^= c; a -= rot(c,4); - b ^= a; b -= rot(a,14); - c ^= b; c -= rot(b,24); - return c; -} - -uint32_t toku_cachetable_hash (CACHEFILE cachefile, BLOCKNUM key) -// Effect: Return a 32-bit hash key. The hash key shall be suitable for using with bitmasking for a table of size power-of-two. -{ - return final(cachefile->hash_id, (uint32_t)(key.b>>32), (uint32_t)key.b); -} - -#define CLOCK_SATURATION 15 -#define CLOCK_INITIAL_COUNT 3 - -// Requires pair's mutex to be held -static void pair_touch (PAIR p) { - p->count = (p->count < CLOCK_SATURATION) ? p->count+1 : CLOCK_SATURATION; -} - -// Remove a pair from the cachetable, requires write list lock to be held and p->mutex to be held -// Effects: the pair is removed from the LRU list and from the cachetable's hash table. -// The size of the objects in the cachetable is adjusted by the size of the pair being -// removed. -static void cachetable_remove_pair (pair_list* list, evictor* ev, PAIR p) { - list->evict_completely(p); - ev->remove_pair_attr(p->attr); -} - -static void cachetable_free_pair(PAIR p) { - CACHETABLE_FLUSH_CALLBACK flush_callback = p->flush_callback; - CACHEKEY key = p->key; - void *value = p->value_data; - void* disk_data = p->disk_data; - void *write_extraargs = p->write_extraargs; - PAIR_ATTR old_attr = p->attr; - - cachetable_evictions++; - PAIR_ATTR new_attr = p->attr; - // Note that flush_callback is called with write_me false, so the only purpose of this - // call is to tell the ft layer to evict the node (keep_me is false). - // Also, because we have already removed the PAIR from the cachetable in - // cachetable_remove_pair, we cannot pass in p->cachefile and p->cachefile->fd - // for the first two parameters, as these may be invalid (#5171), so, we - // pass in NULL and -1, dummy values - flush_callback(NULL, -1, key, value, &disk_data, write_extraargs, old_attr, &new_attr, false, false, true, false); - - ctpair_destroy(p); -} - -// assumes value_rwlock and disk_nb_mutex held on entry -// responsibility of this function is to only write a locked PAIR to disk -// and NOTHING else. We do not manipulate the state of the PAIR -// of the cachetable here (with the exception of ct->size_current for clones) -// -// No pair_list lock should be held, and the PAIR mutex should not be held -// -static void cachetable_only_write_locked_data( - evictor* ev, - PAIR p, - bool for_checkpoint, - PAIR_ATTR* new_attr, - bool is_clone - ) -{ - CACHETABLE_FLUSH_CALLBACK flush_callback = p->flush_callback; - CACHEFILE cachefile = p->cachefile; - CACHEKEY key = p->key; - void *value = is_clone ? p->cloned_value_data : p->value_data; - void *disk_data = p->disk_data; - void *write_extraargs = p->write_extraargs; - PAIR_ATTR old_attr; - // we do this for drd. If we are a cloned pair and only - // have the disk_nb_mutex, it is a race to access p->attr. - // Luckily, old_attr here is only used for some test applications, - // so inaccurate non-size fields are ok. - if (is_clone) { - old_attr = make_pair_attr(p->cloned_value_size); - } - else { - old_attr = p->attr; - } - bool dowrite = true; - - // write callback - flush_callback( - cachefile, - cachefile->fd, - key, - value, - &disk_data, - write_extraargs, - old_attr, - new_attr, - dowrite, - is_clone ? false : true, // keep_me (only keep if this is not cloned pointer) - for_checkpoint, - is_clone //is_clone - ); - p->disk_data = disk_data; - if (is_clone) { - p->cloned_value_data = NULL; - ev->remove_from_size_current(p->cloned_value_size); - p->cloned_value_size = 0; - } -} - - -// -// This function writes a PAIR's value out to disk. Currently, it is called -// by get_and_pin functions that write a PAIR out for checkpoint, by -// evictor threads that evict dirty PAIRS, and by the checkpoint thread -// that needs to write out a dirty node for checkpoint. -// -// Requires on entry for p->mutex to NOT be held, otherwise -// calling cachetable_only_write_locked_data will be very expensive -// -static void cachetable_write_locked_pair( - evictor* ev, - PAIR p, - bool for_checkpoint - ) -{ - PAIR_ATTR old_attr = p->attr; - PAIR_ATTR new_attr = p->attr; - // grabbing the disk_nb_mutex here ensures that - // after this point, no one is writing out a cloned value - // if we grab the disk_nb_mutex inside the if clause, - // then we may try to evict a PAIR that is in the process - // of having its clone be written out - pair_lock(p); - nb_mutex_lock(&p->disk_nb_mutex, p->mutex); - pair_unlock(p); - // make sure that assumption about cloned_value_data is true - // if we have grabbed the disk_nb_mutex, then that means that - // there should be no cloned value data - assert(p->cloned_value_data == NULL); - if (p->dirty) { - cachetable_only_write_locked_data(ev, p, for_checkpoint, &new_attr, false); - // - // now let's update variables - // - if (new_attr.is_valid) { - p->attr = new_attr; - ev->change_pair_attr(old_attr, new_attr); - } - } - // the pair is no longer dirty once written - p->dirty = CACHETABLE_CLEAN; - pair_lock(p); - nb_mutex_unlock(&p->disk_nb_mutex); - pair_unlock(p); -} - -// Worker thread function to writes and evicts a pair from memory to its cachefile -static void cachetable_evicter(void* extra) { - PAIR p = (PAIR)extra; - pair_list* pl = p->list; - CACHEFILE cf = p->cachefile; - pl->read_pending_exp_lock(); - bool for_checkpoint = p->checkpoint_pending; - p->checkpoint_pending = false; - // per the contract of evictor::evict_pair, - // the pair's mutex, p->mutex, must be held on entry - pair_lock(p); - p->ev->evict_pair(p, for_checkpoint); - pl->read_pending_exp_unlock(); - bjm_remove_background_job(cf->bjm); -} - -static void cachetable_partial_eviction(void* extra) { - PAIR p = (PAIR)extra; - CACHEFILE cf = p->cachefile; - p->ev->do_partial_eviction(p); - bjm_remove_background_job(cf->bjm); -} - -void toku_cachetable_swap_pair_values(PAIR old_pair, PAIR new_pair) { - void* old_value = old_pair->value_data; - void* new_value = new_pair->value_data; - old_pair->value_data = new_value; - new_pair->value_data = old_value; -} - -void toku_cachetable_maybe_flush_some(CACHETABLE ct) { - // TODO: Maybe move this... - ct->ev.signal_eviction_thread(); -} - -// Initializes a pair's members. -// -void pair_init(PAIR p, - CACHEFILE cachefile, - CACHEKEY key, - void *value, - PAIR_ATTR attr, - enum cachetable_dirty dirty, - uint32_t fullhash, - CACHETABLE_WRITE_CALLBACK write_callback, - evictor *ev, - pair_list *list) -{ - p->cachefile = cachefile; - p->key = key; - p->value_data = value; - p->cloned_value_data = NULL; - p->cloned_value_size = 0; - p->disk_data = NULL; - p->attr = attr; - p->dirty = dirty; - p->fullhash = fullhash; - - p->flush_callback = write_callback.flush_callback; - p->pe_callback = write_callback.pe_callback; - p->pe_est_callback = write_callback.pe_est_callback; - p->cleaner_callback = write_callback.cleaner_callback; - p->clone_callback = write_callback.clone_callback; - p->checkpoint_complete_callback = write_callback.checkpoint_complete_callback; - p->write_extraargs = write_callback.write_extraargs; - - p->count = 0; // Is zero the correct init value? - p->refcount = 0; - p->num_waiting_on_refs = 0; - toku_cond_init(&p->refcount_wait, NULL); - p->checkpoint_pending = false; - - p->mutex = list->get_mutex_for_pair(fullhash); - assert(p->mutex); - p->value_rwlock.init(p->mutex); - nb_mutex_init(&p->disk_nb_mutex); - - p->size_evicting_estimate = 0; // Is zero the correct init value? - - p->ev = ev; - p->list = list; - - p->clock_next = p->clock_prev = NULL; - p->pending_next = p->pending_prev = NULL; - p->cf_next = p->cf_prev = NULL; - p->hash_chain = NULL; -} - -// has ct locked on entry -// This function MUST NOT release and reacquire the cachetable lock -// Its callers (toku_cachetable_put_with_dep_pairs) depend on this behavior. -// -// Requires pair list's write lock to be held on entry. -// the pair's mutex must be held as wel -// -// -static PAIR cachetable_insert_at(CACHETABLE ct, - CACHEFILE cachefile, CACHEKEY key, void *value, - uint32_t fullhash, - PAIR_ATTR attr, - CACHETABLE_WRITE_CALLBACK write_callback, - enum cachetable_dirty dirty) { - PAIR MALLOC(p); - assert(p); - memset(p, 0, sizeof *p); - pair_init(p, - cachefile, - key, - value, - attr, - dirty, - fullhash, - write_callback, - &ct->ev, - &ct->list - ); - - ct->list.put(p); - ct->ev.add_pair_attr(attr); - return p; -} - -// on input, the write list lock must be held AND -// the pair's mutex must be held as wel -static void cachetable_insert_pair_at(CACHETABLE ct, PAIR p, PAIR_ATTR attr) { - ct->list.put(p); - ct->ev.add_pair_attr(attr); -} - - -// has ct locked on entry -// This function MUST NOT release and reacquire the cachetable lock -// Its callers (toku_cachetable_put_with_dep_pairs) depend on this behavior. -// -// Requires pair list's write lock to be held on entry -// -static void cachetable_put_internal( - CACHEFILE cachefile, - PAIR p, - void *value, - PAIR_ATTR attr, - CACHETABLE_PUT_CALLBACK put_callback - ) -{ - CACHETABLE ct = cachefile->cachetable; - // - // - // TODO: (Zardosht), make code run in debug only - // - // - //PAIR dummy_p = ct->list.find_pair(cachefile, key, fullhash); - //invariant_null(dummy_p); - cachetable_insert_pair_at(ct, p, attr); - invariant_notnull(put_callback); - put_callback(p->key, value, p); -} - -// Pair mutex (p->mutex) is may or may not be held on entry, -// Holding the pair mutex on entry is not important -// for performance or corrrectness -// Pair is pinned on entry -static void -clone_pair(evictor* ev, PAIR p) { - PAIR_ATTR old_attr = p->attr; - PAIR_ATTR new_attr; - long clone_size = 0; - - // act of cloning should be fast, - // not sure if we have to release - // and regrab the cachetable lock, - // but doing it for now - p->clone_callback( - p->value_data, - &p->cloned_value_data, - &clone_size, - &new_attr, - true, - p->write_extraargs - ); - - // now we need to do the same actions we would do - // if the PAIR had been written to disk - // - // because we hold the value_rwlock, - // it doesn't matter whether we clear - // the pending bit before the clone - // or after the clone - p->dirty = CACHETABLE_CLEAN; - if (new_attr.is_valid) { - p->attr = new_attr; - ev->change_pair_attr(old_attr, new_attr); - } - p->cloned_value_size = clone_size; - ev->add_to_size_current(p->cloned_value_size); -} - -static void checkpoint_cloned_pair(void* extra) { - PAIR p = (PAIR)extra; - CACHETABLE ct = p->cachefile->cachetable; - PAIR_ATTR new_attr; - // note that pending lock is not needed here because - // we KNOW we are in the middle of a checkpoint - // and that a begin_checkpoint cannot happen - cachetable_only_write_locked_data( - p->ev, - p, - true, //for_checkpoint - &new_attr, - true //is_clone - ); - pair_lock(p); - nb_mutex_unlock(&p->disk_nb_mutex); - pair_unlock(p); - ct->cp.remove_background_job(); -} - -static void -checkpoint_cloned_pair_on_writer_thread(CACHETABLE ct, PAIR p) { - toku_kibbutz_enq(ct->checkpointing_kibbutz, checkpoint_cloned_pair, p); -} - - -// -// Given a PAIR p with the value_rwlock altready held, do the following: -// - If the PAIR needs to be written out to disk for checkpoint: -// - If the PAIR is cloneable, clone the PAIR and place the work -// of writing the PAIR on a background thread. -// - If the PAIR is not cloneable, write the PAIR to disk for checkpoint -// on the current thread -// -// On entry, pair's mutex is NOT held -// -static void -write_locked_pair_for_checkpoint(CACHETABLE ct, PAIR p, bool checkpoint_pending) -{ - if (checkpoint_pending && p->checkpoint_complete_callback) { - p->checkpoint_complete_callback(p->value_data); - } - if (p->dirty && checkpoint_pending) { - if (p->clone_callback) { - pair_lock(p); - nb_mutex_lock(&p->disk_nb_mutex, p->mutex); - pair_unlock(p); - assert(!p->cloned_value_data); - clone_pair(&ct->ev, p); - assert(p->cloned_value_data); - // place it on the background thread and continue - // responsibility of writer thread to release disk_nb_mutex - ct->cp.add_background_job(); - checkpoint_cloned_pair_on_writer_thread(ct, p); - } - else { - // The pair is not cloneable, just write the pair to disk - // we already have p->value_rwlock and we just do the write in our own thread. - cachetable_write_locked_pair(&ct->ev, p, true); // keeps the PAIR's write lock - } - } -} - -// On entry and exit: hold the pair's mutex (p->mutex) -// Method: take write lock -// maybe write out the node -// Else release write lock -// -static void -write_pair_for_checkpoint_thread (evictor* ev, PAIR p) -{ - // Grab an exclusive lock on the pair. - // If we grab an expensive lock, then other threads will return - // TRY_AGAIN rather than waiting. In production, the only time - // another thread will check if grabbing a lock is expensive is when - // we have a clone_callback (FTNODEs), so the act of checkpointing - // will be cheap. Also, much of the time we'll just be clearing - // pending bits and that's definitely cheap. (see #5427) - p->value_rwlock.write_lock(false); - if (p->checkpoint_pending && p->checkpoint_complete_callback) { - p->checkpoint_complete_callback(p->value_data); - } - if (p->dirty && p->checkpoint_pending) { - if (p->clone_callback) { - nb_mutex_lock(&p->disk_nb_mutex, p->mutex); - assert(!p->cloned_value_data); - clone_pair(ev, p); - assert(p->cloned_value_data); - } - else { - // The pair is not cloneable, just write the pair to disk - // we already have p->value_rwlock and we just do the write in our own thread. - // this will grab and release disk_nb_mutex - pair_unlock(p); - cachetable_write_locked_pair(ev, p, true); // keeps the PAIR's write lock - pair_lock(p); - } - p->checkpoint_pending = false; - - // now release value_rwlock, before we write the PAIR out - // so that the PAIR is available to client threads - p->value_rwlock.write_unlock(); // didn't call cachetable_evict_pair so we have to unlock it ourselves. - if (p->clone_callback) { - // note that pending lock is not needed here because - // we KNOW we are in the middle of a checkpoint - // and that a begin_checkpoint cannot happen - PAIR_ATTR attr; - pair_unlock(p); - cachetable_only_write_locked_data( - ev, - p, - true, //for_checkpoint - &attr, - true //is_clone - ); - pair_lock(p); - nb_mutex_unlock(&p->disk_nb_mutex); - } - } - else { - // - // we may clear the pending bit here because we have - // both the cachetable lock and the PAIR lock. - // The rule, as mentioned in toku_cachetable_begin_checkpoint, - // is that to clear the bit, we must have both the PAIR lock - // and the pending lock - // - p->checkpoint_pending = false; - p->value_rwlock.write_unlock(); - } -} - -// -// For each PAIR associated with these CACHEFILEs and CACHEKEYs -// if the checkpoint_pending bit is set and the PAIR is dirty, write the PAIR -// to disk. -// We assume the PAIRs passed in have been locked by the client that made calls -// into the cachetable that eventually make it here. -// -static void checkpoint_dependent_pairs( - CACHETABLE ct, - uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint - PAIR* dependent_pairs, - bool* checkpoint_pending, - enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs - ) -{ - for (uint32_t i =0; i < num_dependent_pairs; i++) { - PAIR curr_dep_pair = dependent_pairs[i]; - // we need to update the dirtyness of the dependent pair, - // because the client may have dirtied it while holding its lock, - // and if the pair is pending a checkpoint, it needs to be written out - if (dependent_dirty[i]) curr_dep_pair->dirty = CACHETABLE_DIRTY; - if (checkpoint_pending[i]) { - write_locked_pair_for_checkpoint(ct, curr_dep_pair, checkpoint_pending[i]); - } - } -} - -void toku_cachetable_put_with_dep_pairs( - CACHEFILE cachefile, - CACHETABLE_GET_KEY_AND_FULLHASH get_key_and_fullhash, - void *value, - PAIR_ATTR attr, - CACHETABLE_WRITE_CALLBACK write_callback, - void *get_key_and_fullhash_extra, - uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint - PAIR* dependent_pairs, - enum cachetable_dirty* dependent_dirty, // array stating dirty/cleanness of dependent pairs - CACHEKEY* key, - uint32_t* fullhash, - CACHETABLE_PUT_CALLBACK put_callback - ) -{ - // - // need to get the key and filehash - // - CACHETABLE ct = cachefile->cachetable; - if (ct->ev.should_client_thread_sleep()) { - ct->ev.wait_for_cache_pressure_to_subside(); - } - if (ct->ev.should_client_wake_eviction_thread()) { - ct->ev.signal_eviction_thread(); - } - - PAIR p = NULL; - XMALLOC(p); - memset(p, 0, sizeof *p); - - ct->list.write_list_lock(); - get_key_and_fullhash(key, fullhash, get_key_and_fullhash_extra); - pair_init( - p, - cachefile, - *key, - value, - attr, - CACHETABLE_DIRTY, - *fullhash, - write_callback, - &ct->ev, - &ct->list - ); - pair_lock(p); - p->value_rwlock.write_lock(true); - cachetable_put_internal( - cachefile, - p, - value, - attr, - put_callback - ); - pair_unlock(p); - bool checkpoint_pending[num_dependent_pairs]; - ct->list.write_pending_cheap_lock(); - for (uint32_t i = 0; i < num_dependent_pairs; i++) { - checkpoint_pending[i] = dependent_pairs[i]->checkpoint_pending; - dependent_pairs[i]->checkpoint_pending = false; - } - ct->list.write_pending_cheap_unlock(); - ct->list.write_list_unlock(); - - // - // now that we have inserted the row, let's checkpoint the - // dependent nodes, if they need checkpointing - // - checkpoint_dependent_pairs( - ct, - num_dependent_pairs, - dependent_pairs, - checkpoint_pending, - dependent_dirty - ); -} - -void toku_cachetable_put(CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, void*value, PAIR_ATTR attr, - CACHETABLE_WRITE_CALLBACK write_callback, - CACHETABLE_PUT_CALLBACK put_callback - ) { - CACHETABLE ct = cachefile->cachetable; - if (ct->ev.should_client_thread_sleep()) { - ct->ev.wait_for_cache_pressure_to_subside(); - } - if (ct->ev.should_client_wake_eviction_thread()) { - ct->ev.signal_eviction_thread(); - } - - PAIR p = NULL; - XMALLOC(p); - memset(p, 0, sizeof *p); - - ct->list.write_list_lock(); - pair_init( - p, - cachefile, - key, - value, - attr, - CACHETABLE_DIRTY, - fullhash, - write_callback, - &ct->ev, - &ct->list - ); - pair_lock(p); - p->value_rwlock.write_lock(true); - cachetable_put_internal( - cachefile, - p, - value, - attr, - put_callback - ); - pair_unlock(p); - ct->list.write_list_unlock(); -} - -static uint64_t get_tnow(void) { - struct timeval tv; - int r = gettimeofday(&tv, NULL); assert(r == 0); - return tv.tv_sec * 1000000ULL + tv.tv_usec; -} - -// -// cachetable lock and PAIR lock are held on entry -// On exit, cachetable lock is still held, but PAIR lock -// is either released. -// -// No locks are held on entry (besides the rwlock write lock of the PAIR) -// -static void -do_partial_fetch( - CACHETABLE ct, - CACHEFILE cachefile, - PAIR p, - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, - void *read_extraargs, - bool keep_pair_locked - ) -{ - PAIR_ATTR old_attr = p->attr; - PAIR_ATTR new_attr = zero_attr; - // As of Dr. No, only clean PAIRs may have pieces missing, - // so we do a sanity check here. - assert(!p->dirty); - - pair_lock(p); - invariant(p->value_rwlock.writers()); - nb_mutex_lock(&p->disk_nb_mutex, p->mutex); - pair_unlock(p); - int r = pf_callback(p->value_data, p->disk_data, read_extraargs, cachefile->fd, &new_attr); - lazy_assert_zero(r); - p->attr = new_attr; - ct->ev.change_pair_attr(old_attr, new_attr); - pair_lock(p); - nb_mutex_unlock(&p->disk_nb_mutex); - if (!keep_pair_locked) { - p->value_rwlock.write_unlock(); - } - pair_unlock(p); -} - -void toku_cachetable_pf_pinned_pair( - void* value, - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, - void* read_extraargs, - CACHEFILE cf, - CACHEKEY key, - uint32_t fullhash - ) -{ - PAIR_ATTR attr; - PAIR p = NULL; - CACHETABLE ct = cf->cachetable; - ct->list.pair_lock_by_fullhash(fullhash); - p = ct->list.find_pair(cf, key, fullhash); - assert(p != NULL); - assert(p->value_data == value); - assert(p->value_rwlock.writers()); - nb_mutex_lock(&p->disk_nb_mutex, p->mutex); - pair_unlock(p); - - int fd = cf->fd; - pf_callback(value, p->disk_data, read_extraargs, fd, &attr); - - pair_lock(p); - nb_mutex_unlock(&p->disk_nb_mutex); - pair_unlock(p); -} - -int toku_cachetable_get_and_pin ( - CACHEFILE cachefile, - CACHEKEY key, - uint32_t fullhash, - void**value, - long *sizep, - CACHETABLE_WRITE_CALLBACK write_callback, - CACHETABLE_FETCH_CALLBACK fetch_callback, - CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, - bool may_modify_value, - void* read_extraargs // parameter for fetch_callback, pf_req_callback, and pf_callback - ) -{ - pair_lock_type lock_type = may_modify_value ? PL_WRITE_EXPENSIVE : PL_READ; - // We have separate parameters of read_extraargs and write_extraargs because - // the lifetime of the two parameters are different. write_extraargs may be used - // long after this function call (e.g. after a flush to disk), whereas read_extraargs - // will not be used after this function returns. As a result, the caller may allocate - // read_extraargs on the stack, whereas write_extraargs must be allocated - // on the heap. - return toku_cachetable_get_and_pin_with_dep_pairs ( - cachefile, - key, - fullhash, - value, - sizep, - write_callback, - fetch_callback, - pf_req_callback, - pf_callback, - lock_type, - read_extraargs, - 0, // number of dependent pairs that we may need to checkpoint - NULL, // array of dependent pairs - NULL // array stating dirty/cleanness of dependent pairs - ); -} - -// Read a pair from a cachefile into memory using the pair's fetch callback -// on entry, pair mutex (p->mutex) is NOT held, but pair is pinned -static void cachetable_fetch_pair( - CACHETABLE ct, - CACHEFILE cf, - PAIR p, - CACHETABLE_FETCH_CALLBACK fetch_callback, - void* read_extraargs, - bool keep_pair_locked - ) -{ - // helgrind - CACHEKEY key = p->key; - uint32_t fullhash = p->fullhash; - - void *toku_value = NULL; - void *disk_data = NULL; - PAIR_ATTR attr; - - // FIXME this should be enum cachetable_dirty, right? - int dirty = 0; - - pair_lock(p); - nb_mutex_lock(&p->disk_nb_mutex, p->mutex); - pair_unlock(p); - - int r; - r = fetch_callback(cf, p, cf->fd, key, fullhash, &toku_value, &disk_data, &attr, &dirty, read_extraargs); - if (dirty) { - p->dirty = CACHETABLE_DIRTY; - } - assert(r == 0); - - p->value_data = toku_value; - p->disk_data = disk_data; - p->attr = attr; - ct->ev.add_pair_attr(attr); - pair_lock(p); - nb_mutex_unlock(&p->disk_nb_mutex); - if (!keep_pair_locked) { - p->value_rwlock.write_unlock(); - } - pair_unlock(p); -} - -static bool get_checkpoint_pending(PAIR p, pair_list* pl) { - bool checkpoint_pending = false; - pl->read_pending_cheap_lock(); - checkpoint_pending = p->checkpoint_pending; - p->checkpoint_pending = false; - pl->read_pending_cheap_unlock(); - return checkpoint_pending; -} - -static void checkpoint_pair_and_dependent_pairs( - CACHETABLE ct, - PAIR p, - bool p_is_pending_checkpoint, - uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint - PAIR* dependent_pairs, - bool* dependent_pairs_pending_checkpoint, - enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs - ) -{ - - // - // A checkpoint must not begin while we are checking dependent pairs or pending bits. - // Here is why. - // - // Now that we have all of the locks on the pairs we - // care about, we can take care of the necessary checkpointing. - // For each pair, we simply need to write the pair if it is - // pending a checkpoint. If no pair is pending a checkpoint, - // then all of this work will be done with the cachetable lock held, - // so we don't need to worry about a checkpoint beginning - // in the middle of any operation below. If some pair - // is pending a checkpoint, then the checkpoint thread - // will not complete its current checkpoint until it can - // successfully grab a lock on the pending pair and - // remove it from its list of pairs pending a checkpoint. - // This cannot be done until we release the lock - // that we have, which is not done in this function. - // So, the point is, it is impossible for a checkpoint - // to begin while we write any of these locked pairs - // for checkpoint, even though writing a pair releases - // the cachetable lock. - // - write_locked_pair_for_checkpoint(ct, p, p_is_pending_checkpoint); - - checkpoint_dependent_pairs( - ct, - num_dependent_pairs, - dependent_pairs, - dependent_pairs_pending_checkpoint, - dependent_dirty - ); -} - -static void unpin_pair(PAIR p, bool read_lock_grabbed) { - if (read_lock_grabbed) { - p->value_rwlock.read_unlock(); - } - else { - p->value_rwlock.write_unlock(); - } -} - - -// on input, the pair's mutex is held, -// on output, the pair's mutex is not held. -// if true, we must try again, and pair is not pinned -// if false, we succeeded, the pair is pinned -static bool try_pin_pair( - PAIR p, - CACHETABLE ct, - CACHEFILE cachefile, - pair_lock_type lock_type, - uint32_t num_dependent_pairs, - PAIR* dependent_pairs, - enum cachetable_dirty* dependent_dirty, - CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, - void* read_extraargs, - bool already_slept - ) -{ - bool dep_checkpoint_pending[num_dependent_pairs]; - bool try_again = true; - bool expensive = (lock_type == PL_WRITE_EXPENSIVE); - if (lock_type != PL_READ) { - p->value_rwlock.write_lock(expensive); - } - else { - p->value_rwlock.read_lock(); - } - pair_touch(p); - pair_unlock(p); - - bool partial_fetch_required = pf_req_callback(p->value_data,read_extraargs); - - if (partial_fetch_required) { - toku::context pf_ctx(CTX_PARTIAL_FETCH); - - if (ct->ev.should_client_thread_sleep() && !already_slept) { - pair_lock(p); - unpin_pair(p, (lock_type == PL_READ)); - pair_unlock(p); - try_again = true; - goto exit; - } - if (ct->ev.should_client_wake_eviction_thread()) { - ct->ev.signal_eviction_thread(); - } - // - // Just because the PAIR exists does necessarily mean the all the data the caller requires - // is in memory. A partial fetch may be required, which is evaluated above - // if the variable is true, a partial fetch is required so we must grab the PAIR's write lock - // and then call a callback to retrieve what we need - // - assert(partial_fetch_required); - // As of Dr. No, only clean PAIRs may have pieces missing, - // so we do a sanity check here. - assert(!p->dirty); - - if (lock_type == PL_READ) { - pair_lock(p); - p->value_rwlock.read_unlock(); - p->value_rwlock.write_lock(true); - pair_unlock(p); - } - else if (lock_type == PL_WRITE_CHEAP) { - pair_lock(p); - p->value_rwlock.write_unlock(); - p->value_rwlock.write_lock(true); - pair_unlock(p); - } - - partial_fetch_required = pf_req_callback(p->value_data,read_extraargs); - if (partial_fetch_required) { - do_partial_fetch(ct, cachefile, p, pf_callback, read_extraargs, true); - } - if (lock_type == PL_READ) { - // - // TODO: Zardosht, somehow ensure that a partial eviction cannot happen - // between these two calls - // - pair_lock(p); - p->value_rwlock.write_unlock(); - p->value_rwlock.read_lock(); - pair_unlock(p); - } - else if (lock_type == PL_WRITE_CHEAP) { - pair_lock(p); - p->value_rwlock.write_unlock(); - p->value_rwlock.write_lock(false); - pair_unlock(p); - } - // small hack here for #5439, - // for queries, pf_req_callback does some work for the caller, - // that information may be out of date after a write_unlock - // followed by a relock, so we do it again. - bool pf_required = pf_req_callback(p->value_data,read_extraargs); - assert(!pf_required); - } - - if (lock_type != PL_READ) { - ct->list.read_pending_cheap_lock(); - bool p_checkpoint_pending = p->checkpoint_pending; - p->checkpoint_pending = false; - for (uint32_t i = 0; i < num_dependent_pairs; i++) { - dep_checkpoint_pending[i] = dependent_pairs[i]->checkpoint_pending; - dependent_pairs[i]->checkpoint_pending = false; - } - ct->list.read_pending_cheap_unlock(); - checkpoint_pair_and_dependent_pairs( - ct, - p, - p_checkpoint_pending, - num_dependent_pairs, - dependent_pairs, - dep_checkpoint_pending, - dependent_dirty - ); - } - - try_again = false; -exit: - return try_again; -} - -int toku_cachetable_get_and_pin_with_dep_pairs ( - CACHEFILE cachefile, - CACHEKEY key, - uint32_t fullhash, - void**value, - long *sizep, - CACHETABLE_WRITE_CALLBACK write_callback, - CACHETABLE_FETCH_CALLBACK fetch_callback, - CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, - pair_lock_type lock_type, - void* read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback - uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint - PAIR* dependent_pairs, - enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs - ) -// See cachetable.h -{ - CACHETABLE ct = cachefile->cachetable; - bool wait = false; - bool already_slept = false; - bool dep_checkpoint_pending[num_dependent_pairs]; - - // - // If in the process of pinning the node we add data to the cachetable via a partial fetch - // or a full fetch, we may need to first sleep because there is too much data in the - // cachetable. In those cases, we set the bool wait to true and goto try_again, so that - // we can do our sleep and then restart the function. - // -beginning: - if (wait) { - // We shouldn't be holding the read list lock while - // waiting for the evictor to remove pairs. - already_slept = true; - ct->ev.wait_for_cache_pressure_to_subside(); - } - - ct->list.pair_lock_by_fullhash(fullhash); - PAIR p = ct->list.find_pair(cachefile, key, fullhash); - if (p) { - // on entry, holds p->mutex (which is locked via pair_lock_by_fullhash) - // on exit, does not hold p->mutex - bool try_again = try_pin_pair( - p, - ct, - cachefile, - lock_type, - num_dependent_pairs, - dependent_pairs, - dependent_dirty, - pf_req_callback, - pf_callback, - read_extraargs, - already_slept - ); - if (try_again) { - wait = true; - goto beginning; - } - else { - goto got_value; - } - } - else { - toku::context fetch_ctx(CTX_FULL_FETCH); - - ct->list.pair_unlock_by_fullhash(fullhash); - // we only want to sleep once per call to get_and_pin. If we have already - // slept and there is still cache pressure, then we might as - // well just complete the call, because the sleep did not help - // By sleeping only once per get_and_pin, we prevent starvation and ensure - // that we make progress (however slow) on each thread, which allows - // assumptions of the form 'x will eventually happen'. - // This happens in extreme scenarios. - if (ct->ev.should_client_thread_sleep() && !already_slept) { - wait = true; - goto beginning; - } - if (ct->ev.should_client_wake_eviction_thread()) { - ct->ev.signal_eviction_thread(); - } - // Since the pair was not found, we need the write list - // lock to add it. So, we have to release the read list lock - // first. - ct->list.write_list_lock(); - ct->list.pair_lock_by_fullhash(fullhash); - p = ct->list.find_pair(cachefile, key, fullhash); - if (p != NULL) { - ct->list.write_list_unlock(); - // on entry, holds p->mutex, - // on exit, does not hold p->mutex - bool try_again = try_pin_pair( - p, - ct, - cachefile, - lock_type, - num_dependent_pairs, - dependent_pairs, - dependent_dirty, - pf_req_callback, - pf_callback, - read_extraargs, - already_slept - ); - if (try_again) { - wait = true; - goto beginning; - } - else { - goto got_value; - } - } - assert(p == NULL); - - // Insert a PAIR into the cachetable - // NOTE: At this point we still have the write list lock held. - p = cachetable_insert_at( - ct, - cachefile, - key, - zero_value, - fullhash, - zero_attr, - write_callback, - CACHETABLE_CLEAN - ); - invariant_notnull(p); - - // Pin the pair. - p->value_rwlock.write_lock(true); - pair_unlock(p); - - - if (lock_type != PL_READ) { - ct->list.read_pending_cheap_lock(); - invariant(!p->checkpoint_pending); - for (uint32_t i = 0; i < num_dependent_pairs; i++) { - dep_checkpoint_pending[i] = dependent_pairs[i]->checkpoint_pending; - dependent_pairs[i]->checkpoint_pending = false; - } - ct->list.read_pending_cheap_unlock(); - } - // We should release the lock before we perform - // these expensive operations. - ct->list.write_list_unlock(); - - if (lock_type != PL_READ) { - checkpoint_dependent_pairs( - ct, - num_dependent_pairs, - dependent_pairs, - dep_checkpoint_pending, - dependent_dirty - ); - } - uint64_t t0 = get_tnow(); - - // Retrieve the value of the PAIR from disk. - // The pair being fetched will be marked as pending if a checkpoint happens during the - // fetch because begin_checkpoint will mark as pending any pair that is locked even if it is clean. - cachetable_fetch_pair(ct, cachefile, p, fetch_callback, read_extraargs, true); - cachetable_miss++; - cachetable_misstime += get_tnow() - t0; - - // If the lock_type requested was a PL_READ, we downgrade to PL_READ, - // but if the request was for a PL_WRITE_CHEAP, we don't bother - // downgrading, because we would have to possibly resolve the - // checkpointing again, and that would just make this function even - // messier. - // - // TODO(yoni): in case of PL_WRITE_CHEAP, write and use - // p->value_rwlock.write_change_status_to_not_expensive(); (Also name it better) - // to downgrade from an expensive write lock to a cheap one - if (lock_type == PL_READ) { - pair_lock(p); - p->value_rwlock.write_unlock(); - p->value_rwlock.read_lock(); - pair_unlock(p); - // small hack here for #5439, - // for queries, pf_req_callback does some work for the caller, - // that information may be out of date after a write_unlock - // followed by a read_lock, so we do it again. - bool pf_required = pf_req_callback(p->value_data,read_extraargs); - assert(!pf_required); - } - goto got_value; - } -got_value: - *value = p->value_data; - if (sizep) *sizep = p->attr.size; - return 0; -} - -// Lookup a key in the cachetable. If it is found and it is not being written, then -// acquire a read lock on the pair, update the LRU list, and return sucess. -// -// However, if the page is clean or has checkpoint pending, don't return success. -// This will minimize the number of dirty nodes. -// Rationale: maybe_get_and_pin is used when the system has an alternative to modifying a node. -// In the context of checkpointing, we don't want to gratuituously dirty a page, because it causes an I/O. -// For example, imagine that we can modify a bit in a dirty parent, or modify a bit in a clean child, then we should modify -// the dirty parent (which will have to do I/O eventually anyway) rather than incur a full block write to modify one bit. -// Similarly, if the checkpoint is actually pending, we don't want to block on it. -int toku_cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, pair_lock_type lock_type, void**value) { - CACHETABLE ct = cachefile->cachetable; - int r = -1; - ct->list.pair_lock_by_fullhash(fullhash); - PAIR p = ct->list.find_pair(cachefile, key, fullhash); - if (p) { - const bool lock_is_expensive = (lock_type == PL_WRITE_EXPENSIVE); - bool got_lock = false; - switch (lock_type) { - case PL_READ: - if (p->value_rwlock.try_read_lock()) { - got_lock = p->dirty; - - if (!got_lock) { - p->value_rwlock.read_unlock(); - } - } - break; - case PL_WRITE_CHEAP: - case PL_WRITE_EXPENSIVE: - if (p->value_rwlock.try_write_lock(lock_is_expensive)) { - // we got the lock fast, so continue - ct->list.read_pending_cheap_lock(); - - // if pending a checkpoint, then we don't want to return - // the value to the user, because we are responsible for - // handling the checkpointing, which we do not want to do, - // because it is expensive - got_lock = p->dirty && !p->checkpoint_pending; - - ct->list.read_pending_cheap_unlock(); - if (!got_lock) { - p->value_rwlock.write_unlock(); - } - } - break; - } - if (got_lock) { - pair_touch(p); - *value = p->value_data; - r = 0; - } - } - ct->list.pair_unlock_by_fullhash(fullhash); - return r; -} - -//Used by flusher threads to possibly pin child on client thread if pinning is cheap -//Same as toku_cachetable_maybe_get_and_pin except that we don't care if the node is clean or dirty (return the node regardless). -//All other conditions remain the same. -int toku_cachetable_maybe_get_and_pin_clean (CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, pair_lock_type lock_type, void**value) { - CACHETABLE ct = cachefile->cachetable; - int r = -1; - ct->list.pair_lock_by_fullhash(fullhash); - PAIR p = ct->list.find_pair(cachefile, key, fullhash); - if (p) { - const bool lock_is_expensive = (lock_type == PL_WRITE_EXPENSIVE); - bool got_lock = false; - switch (lock_type) { - case PL_READ: - if (p->value_rwlock.try_read_lock()) { - got_lock = true; - } else if (!p->value_rwlock.read_lock_is_expensive()) { - p->value_rwlock.write_lock(lock_is_expensive); - got_lock = true; - } - if (got_lock) { - pair_touch(p); - } - pair_unlock(p); - break; - case PL_WRITE_CHEAP: - case PL_WRITE_EXPENSIVE: - if (p->value_rwlock.try_write_lock(lock_is_expensive)) { - got_lock = true; - } else if (!p->value_rwlock.write_lock_is_expensive()) { - p->value_rwlock.write_lock(lock_is_expensive); - got_lock = true; - } - if (got_lock) { - pair_touch(p); - } - pair_unlock(p); - if (got_lock) { - bool checkpoint_pending = get_checkpoint_pending(p, &ct->list); - write_locked_pair_for_checkpoint(ct, p, checkpoint_pending); - } - break; - } - if (got_lock) { - *value = p->value_data; - r = 0; - } - } else { - ct->list.pair_unlock_by_fullhash(fullhash); - } - return r; -} - -// -// internal function to unpin a PAIR. -// As of Clayface, this is may be called in two ways: -// - with flush false -// - with flush true -// The first is for when this is run during run_unlockers in -// toku_cachetable_get_and_pin_nonblocking, the second is during -// normal operations. Only during normal operations do we want to possibly -// induce evictions or sleep. -// -static int -cachetable_unpin_internal( - CACHEFILE cachefile, - PAIR p, - enum cachetable_dirty dirty, - PAIR_ATTR attr, - bool flush - ) -{ - invariant_notnull(p); - - CACHETABLE ct = cachefile->cachetable; - bool added_data_to_cachetable = false; - - // hack for #3969, only exists in case where we run unlockers - pair_lock(p); - PAIR_ATTR old_attr = p->attr; - PAIR_ATTR new_attr = attr; - if (dirty) { - p->dirty = CACHETABLE_DIRTY; - } - if (attr.is_valid) { - p->attr = attr; - } - bool read_lock_grabbed = p->value_rwlock.readers() != 0; - unpin_pair(p, read_lock_grabbed); - pair_unlock(p); - - if (attr.is_valid) { - if (new_attr.size > old_attr.size) { - added_data_to_cachetable = true; - } - ct->ev.change_pair_attr(old_attr, new_attr); - } - - // see comments above this function to understand this code - if (flush && added_data_to_cachetable) { - if (ct->ev.should_client_thread_sleep()) { - ct->ev.wait_for_cache_pressure_to_subside(); - } - if (ct->ev.should_client_wake_eviction_thread()) { - ct->ev.signal_eviction_thread(); - } - } - return 0; -} - -int toku_cachetable_unpin(CACHEFILE cachefile, PAIR p, enum cachetable_dirty dirty, PAIR_ATTR attr) { - return cachetable_unpin_internal(cachefile, p, dirty, attr, true); -} -int toku_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE cachefile, PAIR p, enum cachetable_dirty dirty, PAIR_ATTR attr) { - return cachetable_unpin_internal(cachefile, p, dirty, attr, false); -} - -static void -run_unlockers (UNLOCKERS unlockers) { - while (unlockers) { - assert(unlockers->locked); - unlockers->locked = false; - unlockers->f(unlockers->extra); - unlockers=unlockers->next; - } -} - -// -// This function tries to pin the pair without running the unlockers. -// If it can pin the pair cheaply, it does so, and returns 0. -// If the pin will be expensive, it runs unlockers, -// pins the pair, then releases the pin, -// and then returns TOKUDB_TRY_AGAIN -// -// on entry, pair mutex is held, -// on exit, pair mutex is NOT held -static int -maybe_pin_pair( - PAIR p, - pair_lock_type lock_type, - UNLOCKERS unlockers - ) -{ - int retval = 0; - bool expensive = (lock_type == PL_WRITE_EXPENSIVE); - - // we can pin the PAIR. In each case, we check to see - // if acquiring the pin is expensive. If so, we run the unlockers, set the - // retval to TOKUDB_TRY_AGAIN, pin AND release the PAIR. - // If not, then we pin the PAIR, keep retval at 0, and do not - // run the unlockers, as we intend to return the value to the user - if (lock_type == PL_READ) { - if (p->value_rwlock.read_lock_is_expensive()) { - pair_add_ref_unlocked(p); - pair_unlock(p); - run_unlockers(unlockers); - retval = TOKUDB_TRY_AGAIN; - pair_lock(p); - pair_release_ref_unlocked(p); - } - p->value_rwlock.read_lock(); - } - else if (lock_type == PL_WRITE_EXPENSIVE || lock_type == PL_WRITE_CHEAP){ - if (p->value_rwlock.write_lock_is_expensive()) { - pair_add_ref_unlocked(p); - pair_unlock(p); - run_unlockers(unlockers); - // change expensive to false because - // we will unpin the pair immedietely - // after pinning it - expensive = false; - retval = TOKUDB_TRY_AGAIN; - pair_lock(p); - pair_release_ref_unlocked(p); - } - p->value_rwlock.write_lock(expensive); - } - else { - abort(); - } - - if (retval == TOKUDB_TRY_AGAIN) { - unpin_pair(p, (lock_type == PL_READ)); - } - pair_touch(p); - pair_unlock(p); - return retval; -} - -int toku_cachetable_get_and_pin_nonblocking( - CACHEFILE cf, - CACHEKEY key, - uint32_t fullhash, - void**value, - long* UU(sizep), - CACHETABLE_WRITE_CALLBACK write_callback, - CACHETABLE_FETCH_CALLBACK fetch_callback, - CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, - pair_lock_type lock_type, - void *read_extraargs, - UNLOCKERS unlockers - ) -// See cachetable.h. -{ - CACHETABLE ct = cf->cachetable; - assert(lock_type == PL_READ || - lock_type == PL_WRITE_CHEAP || - lock_type == PL_WRITE_EXPENSIVE - ); -try_again: - ct->list.pair_lock_by_fullhash(fullhash); - PAIR p = ct->list.find_pair(cf, key, fullhash); - if (p == NULL) { - toku::context fetch_ctx(CTX_FULL_FETCH); - - // Not found - ct->list.pair_unlock_by_fullhash(fullhash); - ct->list.write_list_lock(); - ct->list.pair_lock_by_fullhash(fullhash); - p = ct->list.find_pair(cf, key, fullhash); - if (p != NULL) { - // we just did another search with the write list lock and - // found the pair this means that in between our - // releasing the read list lock and grabbing the write list lock, - // another thread snuck in and inserted the PAIR into - // the cachetable. For simplicity, we just return - // to the top and restart the function - ct->list.write_list_unlock(); - ct->list.pair_unlock_by_fullhash(fullhash); - goto try_again; - } - - p = cachetable_insert_at( - ct, - cf, - key, - zero_value, - fullhash, - zero_attr, - write_callback, - CACHETABLE_CLEAN - ); - assert(p); - // grab expensive write lock, because we are about to do a fetch - // off disk - // No one can access this pair because - // we hold the write list lock and we just injected - // the pair into the cachetable. Therefore, this lock acquisition - // will not block. - p->value_rwlock.write_lock(true); - pair_unlock(p); - run_unlockers(unlockers); // we hold the write list_lock. - ct->list.write_list_unlock(); - - // at this point, only the pair is pinned, - // and no pair mutex held, and - // no list lock is held - uint64_t t0 = get_tnow(); - cachetable_fetch_pair(ct, cf, p, fetch_callback, read_extraargs, false); - cachetable_miss++; - cachetable_misstime += get_tnow() - t0; - - if (ct->ev.should_client_thread_sleep()) { - ct->ev.wait_for_cache_pressure_to_subside(); - } - if (ct->ev.should_client_wake_eviction_thread()) { - ct->ev.signal_eviction_thread(); - } - - return TOKUDB_TRY_AGAIN; - } - else { - int r = maybe_pin_pair(p, lock_type, unlockers); - if (r == TOKUDB_TRY_AGAIN) { - return TOKUDB_TRY_AGAIN; - } - assert_zero(r); - - if (lock_type != PL_READ) { - bool checkpoint_pending = get_checkpoint_pending(p, &ct->list); - write_locked_pair_for_checkpoint(ct, p, checkpoint_pending); - } - - // At this point, we have pinned the PAIR - // and resolved its checkpointing. The pair's - // mutex is not held. The read list lock IS held. Before - // returning the PAIR to the user, we must - // still check for partial fetch - bool partial_fetch_required = pf_req_callback(p->value_data,read_extraargs); - if (partial_fetch_required) { - toku::context fetch_ctx(CTX_PARTIAL_FETCH); - - run_unlockers(unlockers); - - // we are now getting an expensive write lock, because we - // are doing a partial fetch. So, if we previously have - // either a read lock or a cheap write lock, we need to - // release and reacquire the correct lock type - if (lock_type == PL_READ) { - pair_lock(p); - p->value_rwlock.read_unlock(); - p->value_rwlock.write_lock(true); - pair_unlock(p); - } - else if (lock_type == PL_WRITE_CHEAP) { - pair_lock(p); - p->value_rwlock.write_unlock(); - p->value_rwlock.write_lock(true); - pair_unlock(p); - } - - // Now wait for the I/O to occur. - partial_fetch_required = pf_req_callback(p->value_data,read_extraargs); - if (partial_fetch_required) { - do_partial_fetch(ct, cf, p, pf_callback, read_extraargs, false); - } - else { - pair_lock(p); - p->value_rwlock.write_unlock(); - pair_unlock(p); - } - - if (ct->ev.should_client_thread_sleep()) { - ct->ev.wait_for_cache_pressure_to_subside(); - } - if (ct->ev.should_client_wake_eviction_thread()) { - ct->ev.signal_eviction_thread(); - } - - return TOKUDB_TRY_AGAIN; - } - else { - *value = p->value_data; - return 0; - } - } - // We should not get here. Above code should hit a return in all cases. - abort(); -} - -struct cachefile_prefetch_args { - PAIR p; - CACHETABLE_FETCH_CALLBACK fetch_callback; - void* read_extraargs; -}; - -struct cachefile_partial_prefetch_args { - PAIR p; - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback; - void *read_extraargs; -}; - -// Worker thread function to read a pair from a cachefile to memory -static void cachetable_reader(void* extra) { - struct cachefile_prefetch_args* cpargs = (struct cachefile_prefetch_args*)extra; - CACHEFILE cf = cpargs->p->cachefile; - CACHETABLE ct = cf->cachetable; - cachetable_fetch_pair( - ct, - cpargs->p->cachefile, - cpargs->p, - cpargs->fetch_callback, - cpargs->read_extraargs, - false - ); - bjm_remove_background_job(cf->bjm); - toku_free(cpargs); -} - -static void cachetable_partial_reader(void* extra) { - struct cachefile_partial_prefetch_args *cpargs = (struct cachefile_partial_prefetch_args*)extra; - CACHEFILE cf = cpargs->p->cachefile; - CACHETABLE ct = cf->cachetable; - do_partial_fetch(ct, cpargs->p->cachefile, cpargs->p, cpargs->pf_callback, cpargs->read_extraargs, false); - bjm_remove_background_job(cf->bjm); - toku_free(cpargs); -} - -int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, uint32_t fullhash, - CACHETABLE_WRITE_CALLBACK write_callback, - CACHETABLE_FETCH_CALLBACK fetch_callback, - CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, - void *read_extraargs, - bool *doing_prefetch) -// Effect: See the documentation for this function in cachetable.h -{ - int r = 0; - PAIR p = NULL; - if (doing_prefetch) { - *doing_prefetch = false; - } - CACHETABLE ct = cf->cachetable; - // if cachetable has too much data, don't bother prefetching - if (ct->ev.should_client_thread_sleep()) { - goto exit; - } - ct->list.pair_lock_by_fullhash(fullhash); - // lookup - p = ct->list.find_pair(cf, key, fullhash); - // if not found then create a pair and fetch it - if (p == NULL) { - cachetable_prefetches++; - ct->list.pair_unlock_by_fullhash(fullhash); - ct->list.write_list_lock(); - ct->list.pair_lock_by_fullhash(fullhash); - p = ct->list.find_pair(cf, key, fullhash); - if (p != NULL) { - ct->list.write_list_unlock(); - goto found_pair; - } - - r = bjm_add_background_job(cf->bjm); - assert_zero(r); - p = cachetable_insert_at( - ct, - cf, - key, - zero_value, - fullhash, - zero_attr, - write_callback, - CACHETABLE_CLEAN - ); - assert(p); - p->value_rwlock.write_lock(true); - pair_unlock(p); - ct->list.write_list_unlock(); - - struct cachefile_prefetch_args *MALLOC(cpargs); - cpargs->p = p; - cpargs->fetch_callback = fetch_callback; - cpargs->read_extraargs = read_extraargs; - toku_kibbutz_enq(ct->ct_kibbutz, cachetable_reader, cpargs); - if (doing_prefetch) { - *doing_prefetch = true; - } - goto exit; - } - -found_pair: - // at this point, p is found, pair's mutex is grabbed, and - // no list lock is held - // TODO(leif): should this also just go ahead and wait if all there - // are to wait for are readers? - if (p->value_rwlock.try_write_lock(true)) { - // nobody else is using the node, so we should go ahead and prefetch - pair_touch(p); - pair_unlock(p); - bool partial_fetch_required = pf_req_callback(p->value_data, read_extraargs); - - if (partial_fetch_required) { - r = bjm_add_background_job(cf->bjm); - assert_zero(r); - struct cachefile_partial_prefetch_args *MALLOC(cpargs); - cpargs->p = p; - cpargs->pf_callback = pf_callback; - cpargs->read_extraargs = read_extraargs; - toku_kibbutz_enq(ct->ct_kibbutz, cachetable_partial_reader, cpargs); - if (doing_prefetch) { - *doing_prefetch = true; - } - } - else { - pair_lock(p); - p->value_rwlock.write_unlock(); - pair_unlock(p); - } - } - else { - // Couldn't get the write lock cheaply - pair_unlock(p); - } -exit: - return 0; -} - -void toku_cachefile_verify (CACHEFILE cf) { - toku_cachetable_verify(cf->cachetable); -} - -void toku_cachetable_verify (CACHETABLE ct) { - ct->list.verify(); -} - - - -struct pair_flush_for_close{ - PAIR p; - BACKGROUND_JOB_MANAGER bjm; -}; - -static void cachetable_flush_pair_for_close(void* extra) { - struct pair_flush_for_close *CAST_FROM_VOIDP(args, extra); - PAIR p = args->p; - CACHEFILE cf = p->cachefile; - CACHETABLE ct = cf->cachetable; - PAIR_ATTR attr; - cachetable_only_write_locked_data( - &ct->ev, - p, - false, // not for a checkpoint, as we assert above - &attr, - false // not a clone - ); - p->dirty = CACHETABLE_CLEAN; - bjm_remove_background_job(args->bjm); - toku_free(args); -} - - -static void flush_pair_for_close_on_background_thread( - PAIR p, - BACKGROUND_JOB_MANAGER bjm, - CACHETABLE ct - ) -{ - pair_lock(p); - assert(p->value_rwlock.users() == 0); - assert(nb_mutex_users(&p->disk_nb_mutex) == 0); - assert(!p->cloned_value_data); - if (p->dirty == CACHETABLE_DIRTY) { - int r = bjm_add_background_job(bjm); - assert_zero(r); - struct pair_flush_for_close *XMALLOC(args); - args->p = p; - args->bjm = bjm; - toku_kibbutz_enq(ct->ct_kibbutz, cachetable_flush_pair_for_close, args); - } - pair_unlock(p); -} - -static void remove_pair_for_close(PAIR p, CACHETABLE ct, bool completely) { - pair_lock(p); - assert(p->value_rwlock.users() == 0); - assert(nb_mutex_users(&p->disk_nb_mutex) == 0); - assert(!p->cloned_value_data); - assert(p->dirty == CACHETABLE_CLEAN); - assert(p->refcount == 0); - if (completely) { - cachetable_remove_pair(&ct->list, &ct->ev, p); - pair_unlock(p); - // TODO: Eventually, we should not hold the write list lock during free - cachetable_free_pair(p); - } - else { - // if we are not evicting completely, - // we only want to remove the PAIR from the cachetable, - // that is, remove from the hashtable and various linked - // list, but we will keep the PAIRS and the linked list - // in the cachefile intact, as they will be cached away - // in case an open comes soon. - ct->list.evict_from_cachetable(p); - pair_unlock(p); - } -} - -// helper function for cachetable_flush_cachefile, which happens on a close -// writes out the dirty pairs on background threads and returns when -// the writing is done -static void write_dirty_pairs_for_close(CACHETABLE ct, CACHEFILE cf) { - BACKGROUND_JOB_MANAGER bjm = NULL; - bjm_init(&bjm); - ct->list.write_list_lock(); // TODO: (Zardosht), verify that this lock is unnecessary to take here - PAIR p = NULL; - // write out dirty PAIRs - uint32_t i; - if (cf) { - for (i = 0, p = cf->cf_head; - i < cf->num_pairs; - i++, p = p->cf_next) - { - flush_pair_for_close_on_background_thread(p, bjm, ct); - } - } - else { - for (i = 0, p = ct->list.m_checkpoint_head; - i < ct->list.m_n_in_table; - i++, p = p->clock_next) - { - flush_pair_for_close_on_background_thread(p, bjm, ct); - } - } - ct->list.write_list_unlock(); - bjm_wait_for_jobs_to_finish(bjm); - bjm_destroy(bjm); -} - -static void remove_all_pairs_for_close(CACHETABLE ct, CACHEFILE cf, bool evict_completely) { - ct->list.write_list_lock(); - if (cf) { - if (evict_completely) { - // if we are evicting completely, then the PAIRs will - // be removed from the linked list managed by the - // cachefile, so this while loop works - while (cf->num_pairs > 0) { - PAIR p = cf->cf_head; - remove_pair_for_close(p, ct, evict_completely); - } - } - else { - // on the other hand, if we are not evicting completely, - // then the cachefile's linked list stays intact, and we must - // iterate like this. - for (PAIR p = cf->cf_head; p; p = p->cf_next) { - remove_pair_for_close(p, ct, evict_completely); - } - } - } - else { - while (ct->list.m_n_in_table > 0) { - PAIR p = ct->list.m_checkpoint_head; - // if there is no cachefile, then we better - // be evicting completely because we have no - // cachefile to save the PAIRs to. At least, - // we have no guarantees that the cachefile - // will remain good - invariant(evict_completely); - remove_pair_for_close(p, ct, true); - } - } - ct->list.write_list_unlock(); -} - -static void verify_cachefile_flushed(CACHETABLE ct UU(), CACHEFILE cf UU()) { -#ifdef TOKU_DEBUG_PARANOID - // assert here that cachefile is flushed by checking - // pair_list and finding no pairs belonging to this cachefile - // Make a list of pairs that belong to this cachefile. - if (cf) { - ct->list.write_list_lock(); - // assert here that cachefile is flushed by checking - // pair_list and finding no pairs belonging to this cachefile - // Make a list of pairs that belong to this cachefile. - uint32_t i; - PAIR p = NULL; - for (i = 0, p = ct->list.m_checkpoint_head; - i < ct->list.m_n_in_table; - i++, p = p->clock_next) - { - assert(p->cachefile != cf); - } - ct->list.write_list_unlock(); - } -#endif -} - -// Flush (write to disk) all of the pairs that belong to a cachefile (or all pairs if -// the cachefile is NULL. -// Must be holding cachetable lock on entry. -// -// This function assumes that no client thread is accessing or -// trying to access the cachefile while this function is executing. -// This implies no client thread will be trying to lock any nodes -// belonging to the cachefile. -// -// This function also assumes that the cachefile is not in the process -// of being used by a checkpoint. If a checkpoint is currently happening, -// it does NOT include this cachefile. -// -static void cachetable_flush_cachefile(CACHETABLE ct, CACHEFILE cf, bool evict_completely) { - // - // Because work on a kibbutz is always done by the client thread, - // and this function assumes that no client thread is doing any work - // on the cachefile, we assume that no client thread will be adding jobs - // to this cachefile's kibbutz. - // - // The caller of this function must ensure that there are - // no jobs added to the kibbutz. This implies that the only work other - // threads may be doing is work by the writer threads. - // - // first write out dirty PAIRs - write_dirty_pairs_for_close(ct, cf); - - // now that everything is clean, get rid of everything - remove_all_pairs_for_close(ct, cf, evict_completely); - - verify_cachefile_flushed(ct, cf); -} - -/* Requires that no locks be held that are used by the checkpoint logic */ -void -toku_cachetable_minicron_shutdown(CACHETABLE ct) { - int r = ct->cp.shutdown(); - assert(r==0); - ct->cl.destroy(); -} - -/* Requires that it all be flushed. */ -void toku_cachetable_close (CACHETABLE *ctp) { - CACHETABLE ct = *ctp; - ct->cp.destroy(); - ct->cl.destroy(); - ct->cf_list.free_stale_data(&ct->ev); - cachetable_flush_cachefile(ct, NULL, true); - ct->ev.destroy(); - ct->list.destroy(); - ct->cf_list.destroy(); - - if (ct->client_kibbutz) - toku_kibbutz_destroy(ct->client_kibbutz); - if (ct->ct_kibbutz) - toku_kibbutz_destroy(ct->ct_kibbutz); - if (ct->checkpointing_kibbutz) - toku_kibbutz_destroy(ct->checkpointing_kibbutz); - toku_free(ct->env_dir); - toku_free(ct); - *ctp = 0; -} - -static PAIR test_get_pair(CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, bool have_ct_lock) { - CACHETABLE ct = cachefile->cachetable; - - if (!have_ct_lock) { - ct->list.read_list_lock(); - } - - PAIR p = ct->list.find_pair(cachefile, key, fullhash); - assert(p != NULL); - if (!have_ct_lock) { - ct->list.read_list_unlock(); - } - return p; -} - -//test-only wrapper -int toku_test_cachetable_unpin(CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR attr) { - // By default we don't have the lock - PAIR p = test_get_pair(cachefile, key, fullhash, false); - return toku_cachetable_unpin(cachefile, p, dirty, attr); // assume read lock is not grabbed, and that it is a write lock -} - -//test-only wrapper -int toku_test_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR attr) { - // We hold the cachetable mutex. - PAIR p = test_get_pair(cachefile, key, fullhash, true); - return toku_cachetable_unpin_ct_prelocked_no_flush(cachefile, p, dirty, attr); -} - -//test-only wrapper -int toku_test_cachetable_unpin_and_remove ( - CACHEFILE cachefile, - CACHEKEY key, - CACHETABLE_REMOVE_KEY remove_key, - void* remove_key_extra) -{ - uint32_t fullhash = toku_cachetable_hash(cachefile, key); - PAIR p = test_get_pair(cachefile, key, fullhash, false); - return toku_cachetable_unpin_and_remove(cachefile, p, remove_key, remove_key_extra); -} - -int toku_cachetable_unpin_and_remove ( - CACHEFILE cachefile, - PAIR p, - CACHETABLE_REMOVE_KEY remove_key, - void* remove_key_extra - ) -{ - invariant_notnull(p); - int r = ENOENT; - CACHETABLE ct = cachefile->cachetable; - - p->dirty = CACHETABLE_CLEAN; // clear the dirty bit. We're just supposed to remove it. - // grab disk_nb_mutex to ensure any background thread writing - // out a cloned value completes - pair_lock(p); - assert(p->value_rwlock.writers()); - nb_mutex_lock(&p->disk_nb_mutex, p->mutex); - pair_unlock(p); - assert(p->cloned_value_data == NULL); - - // - // take care of key removal - // - ct->list.write_list_lock(); - ct->list.read_pending_cheap_lock(); - bool for_checkpoint = p->checkpoint_pending; - // now let's wipe out the pending bit, because we are - // removing the PAIR - p->checkpoint_pending = false; - - // For the PAIR to not be picked by the - // cleaner thread, we mark the cachepressure_size to be 0 - // (This is redundant since we have the write_list_lock) - // This should not be an issue because we call - // cachetable_remove_pair before - // releasing the cachetable lock. - // - CACHEKEY key_to_remove = p->key; - p->attr.cache_pressure_size = 0; - // - // callback for removing the key - // for FTNODEs, this leads to calling - // toku_free_blocknum - // - if (remove_key) { - remove_key( - &key_to_remove, - for_checkpoint, - remove_key_extra - ); - } - ct->list.read_pending_cheap_unlock(); - - pair_lock(p); - p->value_rwlock.write_unlock(); - nb_mutex_unlock(&p->disk_nb_mutex); - // - // As of Clayface (6.5), only these threads may be - // blocked waiting to lock this PAIR: - // - the checkpoint thread (because a checkpoint is in progress - // and the PAIR was in the list of pending pairs) - // - a client thread running get_and_pin_nonblocking, who - // ran unlockers, then waited on the PAIR lock. - // While waiting on a PAIR lock, another thread comes in, - // locks the PAIR, and ends up calling unpin_and_remove, - // all while get_and_pin_nonblocking is waiting on the PAIR lock. - // We did not realize this at first, which caused bug #4357 - // The following threads CANNOT be blocked waiting on - // the PAIR lock: - // - a thread trying to run eviction via run_eviction. - // That cannot happen because run_eviction only - // attempts to lock PAIRS that are not locked, and this PAIR - // is locked. - // - cleaner thread, for the same reason as a thread running - // eviction - // - client thread doing a normal get_and_pin. The client is smart - // enough to not try to lock a PAIR that another client thread - // is trying to unpin and remove. Note that this includes work - // done on kibbutzes. - // - writer thread. Writer threads do not grab PAIR locks. They - // get PAIR locks transferred to them by client threads. - // - - // first thing we do is remove the PAIR from the various - // cachetable data structures, so no other thread can possibly - // access it. We do not want to risk some other thread - // trying to lock this PAIR if we release the write list lock - // below. If some thread is already waiting on the lock, - // then we let that thread grab the lock and finish, but - // we don't want any NEW threads to try to grab the PAIR - // lock. - // - // Because we call cachetable_remove_pair and wait, - // the threads that may be waiting - // on this PAIR lock must be careful to do NOTHING with the PAIR - // As per our analysis above, we only need - // to make sure the checkpoint thread and get_and_pin_nonblocking do - // nothing, and looking at those functions, it is clear they do nothing. - // - cachetable_remove_pair(&ct->list, &ct->ev, p); - ct->list.write_list_unlock(); - if (p->refcount > 0) { - pair_wait_for_ref_release_unlocked(p); - } - if (p->value_rwlock.users() > 0) { - // Need to wait for everyone else to leave - // This write lock will be granted only after all waiting - // threads are done. - p->value_rwlock.write_lock(true); - assert(p->refcount == 0); - assert(p->value_rwlock.users() == 1); // us - assert(!p->checkpoint_pending); - assert(p->attr.cache_pressure_size == 0); - p->value_rwlock.write_unlock(); - } - // just a sanity check - assert(nb_mutex_users(&p->disk_nb_mutex) == 0); - assert(p->cloned_value_data == NULL); - //Remove pair. - pair_unlock(p); - cachetable_free_pair(p); - r = 0; - return r; -} - -int set_filenum_in_array(const FT &ft, const uint32_t index, FILENUM *const array); -int set_filenum_in_array(const FT &ft, const uint32_t index, FILENUM *const array) { - array[index] = toku_cachefile_filenum(ft->cf); - return 0; -} - -static int log_open_txn (TOKUTXN txn, void* extra) { - int r; - checkpointer* cp = (checkpointer *)extra; - TOKULOGGER logger = txn->logger; - FILENUMS open_filenums; - uint32_t num_filenums = txn->open_fts.size(); - FILENUM array[num_filenums]; - if (toku_txn_is_read_only(txn)) { - goto cleanup; - } - else { - cp->increment_num_txns(); - } - - open_filenums.num = num_filenums; - open_filenums.filenums = array; - //Fill in open_filenums - r = txn->open_fts.iterate(array); - invariant(r==0); - switch (toku_txn_get_state(txn)) { - case TOKUTXN_LIVE:{ - toku_log_xstillopen(logger, NULL, 0, txn, - toku_txn_get_txnid(txn), - toku_txn_get_txnid(toku_logger_txn_parent(txn)), - txn->roll_info.rollentry_raw_count, - open_filenums, - txn->force_fsync_on_commit, - txn->roll_info.num_rollback_nodes, - txn->roll_info.num_rollentries, - txn->roll_info.spilled_rollback_head, - txn->roll_info.spilled_rollback_tail, - txn->roll_info.current_rollback); - goto cleanup; - } - case TOKUTXN_PREPARING: { - TOKU_XA_XID xa_xid; - toku_txn_get_prepared_xa_xid(txn, &xa_xid); - toku_log_xstillopenprepared(logger, NULL, 0, txn, - toku_txn_get_txnid(txn), - &xa_xid, - txn->roll_info.rollentry_raw_count, - open_filenums, - txn->force_fsync_on_commit, - txn->roll_info.num_rollback_nodes, - txn->roll_info.num_rollentries, - txn->roll_info.spilled_rollback_head, - txn->roll_info.spilled_rollback_tail, - txn->roll_info.current_rollback); - goto cleanup; - } - case TOKUTXN_RETIRED: - case TOKUTXN_COMMITTING: - case TOKUTXN_ABORTING: { - assert(0); - } - } - // default is an error - assert(0); -cleanup: - return 0; -} - -// Requires: All three checkpoint-relevant locks must be held (see checkpoint.c). -// Algorithm: Write a checkpoint record to the log, noting the LSN of that record. -// Use the begin_checkpoint callback to take necessary snapshots (header, btt) -// Mark every dirty node as "pending." ("Pending" means that the node must be -// written to disk before it can be modified.) -void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, TOKULOGGER UU(logger)) { - cp->begin_checkpoint(); -} - - -// This is used by the cachetable_race test. -static volatile int toku_checkpointing_user_data_status = 0; -static void toku_cachetable_set_checkpointing_user_data_status (int v) { - toku_checkpointing_user_data_status = v; -} -int toku_cachetable_get_checkpointing_user_data_status (void) { - return toku_checkpointing_user_data_status; -} - -// Requires: The big checkpoint lock must be held (see checkpoint.c). -// Algorithm: Write all pending nodes to disk -// Use checkpoint callback to write snapshot information to disk (header, btt) -// Use end_checkpoint callback to fsync dictionary and log, and to free unused blocks -// Note: If testcallback is null (for testing purposes only), call it after writing dictionary but before writing log -void toku_cachetable_end_checkpoint(CHECKPOINTER cp, TOKULOGGER UU(logger), - void (*testcallback_f)(void*), void* testextra) { - cp->end_checkpoint(testcallback_f, testextra); -} - -TOKULOGGER toku_cachefile_logger (CACHEFILE cf) { - return cf->cachetable->cp.get_logger(); -} - -FILENUM toku_cachefile_filenum (CACHEFILE cf) { - return cf->filenum; -} - -// debug functions - -int toku_cachetable_assert_all_unpinned (CACHETABLE ct) { - uint32_t i; - int some_pinned=0; - ct->list.read_list_lock(); - for (i=0; ilist.m_table_size; i++) { - PAIR p; - for (p=ct->list.m_table[i]; p; p=p->hash_chain) { - pair_lock(p); - if (p->value_rwlock.users()) { - //printf("%s:%d pinned: %" PRId64 " (%p)\n", __FILE__, __LINE__, p->key.b, p->value_data); - some_pinned=1; - } - pair_unlock(p); - } - } - ct->list.read_list_unlock(); - return some_pinned; -} - -int toku_cachefile_count_pinned (CACHEFILE cf, int print_them) { - assert(cf != NULL); - int n_pinned=0; - CACHETABLE ct = cf->cachetable; - ct->list.read_list_lock(); - - // Iterate over all the pairs to find pairs specific to the - // given cachefile. - for (uint32_t i = 0; i < ct->list.m_table_size; i++) { - for (PAIR p = ct->list.m_table[i]; p; p = p->hash_chain) { - if (p->cachefile == cf) { - pair_lock(p); - if (p->value_rwlock.users()) { - if (print_them) { - printf("%s:%d pinned: %" PRId64 " (%p)\n", - __FILE__, - __LINE__, - p->key.b, - p->value_data); - } - n_pinned++; - } - pair_unlock(p); - } - } - } - - ct->list.read_list_unlock(); - return n_pinned; -} - -void toku_cachetable_print_state (CACHETABLE ct) { - uint32_t i; - ct->list.read_list_lock(); - for (i=0; ilist.m_table_size; i++) { - PAIR p = ct->list.m_table[i]; - if (p != 0) { - pair_lock(p); - printf("t[%u]=", i); - for (p=ct->list.m_table[i]; p; p=p->hash_chain) { - printf(" {%" PRId64 ", %p, dirty=%d, pin=%d, size=%ld}", p->key.b, p->cachefile, (int) p->dirty, p->value_rwlock.users(), p->attr.size); - } - printf("\n"); - pair_unlock(p); - } - } - ct->list.read_list_unlock(); -} - -void toku_cachetable_get_state (CACHETABLE ct, int *num_entries_ptr, int *hash_size_ptr, long *size_current_ptr, long *size_limit_ptr) { - ct->list.get_state(num_entries_ptr, hash_size_ptr); - ct->ev.get_state(size_current_ptr, size_limit_ptr); -} - -int toku_cachetable_get_key_state (CACHETABLE ct, CACHEKEY key, CACHEFILE cf, void **value_ptr, - int *dirty_ptr, long long *pin_ptr, long *size_ptr) { - int r = -1; - uint32_t fullhash = toku_cachetable_hash(cf, key); - ct->list.read_list_lock(); - PAIR p = ct->list.find_pair(cf, key, fullhash); - if (p) { - pair_lock(p); - if (value_ptr) - *value_ptr = p->value_data; - if (dirty_ptr) - *dirty_ptr = p->dirty; - if (pin_ptr) - *pin_ptr = p->value_rwlock.users(); - if (size_ptr) - *size_ptr = p->attr.size; - r = 0; - pair_unlock(p); - } - ct->list.read_list_unlock(); - return r; -} - -void -toku_cachefile_set_userdata (CACHEFILE cf, - void *userdata, - void (*log_fassociate_during_checkpoint)(CACHEFILE, void*), - void (*close_userdata)(CACHEFILE, int, void*, bool, LSN), - void (*free_userdata)(CACHEFILE, void*), - void (*checkpoint_userdata)(CACHEFILE, int, void*), - void (*begin_checkpoint_userdata)(LSN, void*), - void (*end_checkpoint_userdata)(CACHEFILE, int, void*), - void (*note_pin_by_checkpoint)(CACHEFILE, void*), - void (*note_unpin_by_checkpoint)(CACHEFILE, void*)) { - cf->userdata = userdata; - cf->log_fassociate_during_checkpoint = log_fassociate_during_checkpoint; - cf->close_userdata = close_userdata; - cf->free_userdata = free_userdata; - cf->checkpoint_userdata = checkpoint_userdata; - cf->begin_checkpoint_userdata = begin_checkpoint_userdata; - cf->end_checkpoint_userdata = end_checkpoint_userdata; - cf->note_pin_by_checkpoint = note_pin_by_checkpoint; - cf->note_unpin_by_checkpoint = note_unpin_by_checkpoint; -} - -void *toku_cachefile_get_userdata(CACHEFILE cf) { - return cf->userdata; -} - -CACHETABLE -toku_cachefile_get_cachetable(CACHEFILE cf) { - return cf->cachetable; -} - -//Only called by ft_end_checkpoint -//Must have access to cf->fd (must be protected) -void toku_cachefile_fsync(CACHEFILE cf) { - toku_file_fsync(cf->fd); -} - -// Make it so when the cachefile closes, the underlying file is unlinked -void toku_cachefile_unlink_on_close(CACHEFILE cf) { - assert(!cf->unlink_on_close); - cf->unlink_on_close = true; -} - -// is this cachefile marked as unlink on close? -bool toku_cachefile_is_unlink_on_close(CACHEFILE cf) { - return cf->unlink_on_close; -} - -uint64_t toku_cachefile_size(CACHEFILE cf) { - int64_t file_size; - int fd = toku_cachefile_get_fd(cf); - int r = toku_os_get_file_size(fd, &file_size); - assert_zero(r); - return file_size; -} - -char * -toku_construct_full_name(int count, ...) { - va_list ap; - char *name = NULL; - size_t n = 0; - int i; - va_start(ap, count); - for (i=0; ienv_dir, fname_in_env); -} - -static long -cleaner_thread_rate_pair(PAIR p) -{ - return p->attr.cache_pressure_size; -} - -static int const CLEANER_N_TO_CHECK = 8; - -int toku_cleaner_thread_for_test (CACHETABLE ct) { - return ct->cl.run_cleaner(); -} - -int toku_cleaner_thread (void *cleaner_v) { - cleaner* cl = (cleaner *) cleaner_v; - assert(cl); - return cl->run_cleaner(); -} - -///////////////////////////////////////////////////////////////////////// -// -// cleaner methods -// -ENSURE_POD(cleaner); - -int cleaner::init(uint32_t _cleaner_iterations, pair_list* _pl, CACHETABLE _ct) { - // default is no cleaner, for now - m_cleaner_cron_init = false; - int r = toku_minicron_setup(&m_cleaner_cron, 0, toku_cleaner_thread, this); - if (r == 0) { - m_cleaner_cron_init = true; - } - TOKU_VALGRIND_HG_DISABLE_CHECKING(&m_cleaner_iterations, sizeof m_cleaner_iterations); - m_cleaner_iterations = _cleaner_iterations; - m_pl = _pl; - m_ct = _ct; - m_cleaner_init = true; - return r; -} - -// this function is allowed to be called multiple times -void cleaner::destroy(void) { - if (!m_cleaner_init) { - return; - } - if (m_cleaner_cron_init && !toku_minicron_has_been_shutdown(&m_cleaner_cron)) { - // for test code only, production code uses toku_cachetable_minicron_shutdown() - int r = toku_minicron_shutdown(&m_cleaner_cron); - assert(r==0); - } -} - -uint32_t cleaner::get_iterations(void) { - return m_cleaner_iterations; -} - -void cleaner::set_iterations(uint32_t new_iterations) { - m_cleaner_iterations = new_iterations; -} - -uint32_t cleaner::get_period_unlocked(void) { - return toku_minicron_get_period_in_seconds_unlocked(&m_cleaner_cron); -} - -// -// Sets how often the cleaner thread will run, in seconds -// -void cleaner::set_period(uint32_t new_period) { - toku_minicron_change_period(&m_cleaner_cron, new_period*1000); -} - -// Effect: runs a cleaner. -// -// We look through some number of nodes, the first N that we see which are -// unlocked and are not involved in a cachefile flush, pick one, and call -// the cleaner callback. While we're picking a node, we have the -// cachetable lock the whole time, so we don't need any extra -// synchronization. Once we have one we want, we lock it and notify the -// cachefile that we're doing some background work (so a flush won't -// start). At this point, we can safely unlock the cachetable, do the -// work (callback), and unlock/release our claim to the cachefile. -int cleaner::run_cleaner(void) { - toku::context cleaner_ctx(CTX_CLEANER); - - int r; - uint32_t num_iterations = this->get_iterations(); - for (uint32_t i = 0; i < num_iterations; ++i) { - cleaner_executions++; - m_pl->read_list_lock(); - PAIR best_pair = NULL; - int n_seen = 0; - long best_score = 0; - const PAIR first_pair = m_pl->m_cleaner_head; - if (first_pair == NULL) { - // nothing in the cachetable, just get out now - m_pl->read_list_unlock(); - break; - } - // here we select a PAIR for cleaning - // look at some number of PAIRS, and - // pick what we think is the best one for cleaning - //***** IMPORTANT ****** - // we MUST not pick a PAIR whose rating is 0. We have - // numerous assumptions in other parts of the code that - // this is the case: - // - this is how rollback nodes and leaf nodes are not selected for cleaning - // - this is how a thread that is calling unpin_and_remove will prevent - // the cleaner thread from picking its PAIR (see comments in that function) - do { - // - // We are already holding onto best_pair, if we run across a pair that - // has the same mutex due to a collision in the hashtable, we need - // to be careful. - // - if (best_pair && m_pl->m_cleaner_head->mutex == best_pair->mutex) { - // Advance the cleaner head. - long score = 0; - // only bother with this pair if it has no current users - if (m_pl->m_cleaner_head->value_rwlock.users() == 0) { - score = cleaner_thread_rate_pair(m_pl->m_cleaner_head); - if (score > best_score) { - best_score = score; - best_pair = m_pl->m_cleaner_head; - } - } - m_pl->m_cleaner_head = m_pl->m_cleaner_head->clock_next; - continue; - } - pair_lock(m_pl->m_cleaner_head); - if (m_pl->m_cleaner_head->value_rwlock.users() > 0) { - pair_unlock(m_pl->m_cleaner_head); - } - else { - n_seen++; - long score = 0; - score = cleaner_thread_rate_pair(m_pl->m_cleaner_head); - if (score > best_score) { - best_score = score; - // Since we found a new best pair, we need to - // free the old best pair. - if (best_pair) { - pair_unlock(best_pair); - } - best_pair = m_pl->m_cleaner_head; - } - else { - pair_unlock(m_pl->m_cleaner_head); - } - } - // Advance the cleaner head. - m_pl->m_cleaner_head = m_pl->m_cleaner_head->clock_next; - } while (m_pl->m_cleaner_head != first_pair && n_seen < CLEANER_N_TO_CHECK); - m_pl->read_list_unlock(); - - // - // at this point, if we have found a PAIR for cleaning, - // that is, best_pair != NULL, we do the clean - // - // if best_pair !=NULL, then best_pair->mutex is held - // no list lock is held - // - if (best_pair) { - CACHEFILE cf = best_pair->cachefile; - // try to add a background job to the manager - // if we can't, that means the cachefile is flushing, so - // we simply continue the for loop and this iteration - // becomes a no-op - r = bjm_add_background_job(cf->bjm); - if (r) { - pair_unlock(best_pair); - continue; - } - best_pair->value_rwlock.write_lock(true); - pair_unlock(best_pair); - // verify a key assumption. - assert(cleaner_thread_rate_pair(best_pair) > 0); - // check the checkpoint_pending bit - m_pl->read_pending_cheap_lock(); - bool checkpoint_pending = best_pair->checkpoint_pending; - best_pair->checkpoint_pending = false; - m_pl->read_pending_cheap_unlock(); - if (checkpoint_pending) { - write_locked_pair_for_checkpoint(m_ct, best_pair, true); - } - - bool cleaner_callback_called = false; - - // it's theoretically possible that after writing a PAIR for checkpoint, the - // PAIR's heuristic tells us nothing needs to be done. It is not possible - // in Dr. Noga, but unit tests verify this behavior works properly. - if (cleaner_thread_rate_pair(best_pair) > 0) { - r = best_pair->cleaner_callback(best_pair->value_data, - best_pair->key, - best_pair->fullhash, - best_pair->write_extraargs); - assert_zero(r); - cleaner_callback_called = true; - } - - // The cleaner callback must have unlocked the pair, so we - // don't need to unlock it if the cleaner callback is called. - if (!cleaner_callback_called) { - pair_lock(best_pair); - best_pair->value_rwlock.write_unlock(); - pair_unlock(best_pair); - } - // We need to make sure the cachefile sticks around so a close - // can't come destroy it. That's the purpose of this - // "add/remove_background_job" business, which means the - // cachefile is still valid here, even though the cleaner - // callback unlocks the pair. - bjm_remove_background_job(cf->bjm); - } - else { - // If we didn't find anything this time around the cachetable, - // we probably won't find anything if we run around again, so - // just break out from the for-loop now and - // we'll try again when the cleaner thread runs again. - break; - } - } - return 0; -} - -static_assert(std::is_pod::value, "pair_list isn't POD"); - -const uint32_t INITIAL_PAIR_LIST_SIZE = 1<<20; -uint32_t PAIR_LOCK_SIZE = 1<<20; - -void toku_pair_list_set_lock_size(uint32_t num_locks) { - PAIR_LOCK_SIZE = num_locks; -} - -static void evict_pair_from_cachefile(PAIR p) { - CACHEFILE cf = p->cachefile; - if (p->cf_next) { - p->cf_next->cf_prev = p->cf_prev; - } - if (p->cf_prev) { - p->cf_prev->cf_next = p->cf_next; - } - else if (p->cachefile->cf_head == p) { - cf->cf_head = p->cf_next; - } - p->cf_prev = p->cf_next = NULL; - cf->num_pairs--; -} - -// Allocates the hash table of pairs inside this pair list. -// -void pair_list::init() { - m_table_size = INITIAL_PAIR_LIST_SIZE; - m_num_locks = PAIR_LOCK_SIZE; - m_n_in_table = 0; - m_clock_head = NULL; - m_cleaner_head = NULL; - m_checkpoint_head = NULL; - m_pending_head = NULL; - m_table = NULL; - - - pthread_rwlockattr_t attr; - pthread_rwlockattr_init(&attr); -#if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP) - pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); -#else - // TODO: need to figure out how to make writer-preferential rwlocks - // happen on osx -#endif - toku_pthread_rwlock_init(&m_list_lock, &attr); - toku_pthread_rwlock_init(&m_pending_lock_expensive, &attr); - toku_pthread_rwlock_init(&m_pending_lock_cheap, &attr); - XCALLOC_N(m_table_size, m_table); - XCALLOC_N(m_num_locks, m_mutexes); - for (uint64_t i = 0; i < m_num_locks; i++) { - toku_mutex_init(&m_mutexes[i].aligned_mutex, NULL); - } -} - -// Frees the pair_list hash table. It is expected to be empty by -// the time this is called. Returns an error if there are any -// pairs in any of the hash table slots. -void pair_list::destroy() { - // Check if any entries exist in the hash table. - for (uint32_t i = 0; i < m_table_size; ++i) { - invariant_null(m_table[i]); - } - for (uint64_t i = 0; i < m_num_locks; i++) { - toku_mutex_destroy(&m_mutexes[i].aligned_mutex); - } - toku_pthread_rwlock_destroy(&m_list_lock); - toku_pthread_rwlock_destroy(&m_pending_lock_expensive); - toku_pthread_rwlock_destroy(&m_pending_lock_cheap); - toku_free(m_table); - toku_free(m_mutexes); -} - -// adds a PAIR to the cachetable's structures, -// but does NOT add it to the list maintained by -// the cachefile -void pair_list::add_to_cachetable_only(PAIR p) { - // sanity check to make sure that the PAIR does not already exist - PAIR pp = this->find_pair(p->cachefile, p->key, p->fullhash); - assert(pp == NULL); - - this->add_to_clock(p); - this->add_to_hash_chain(p); - m_n_in_table++; -} - -// This places the given pair inside of the pair list. -// -// requires caller to have grabbed write lock on list. -// requires caller to have p->mutex held as well -// -void pair_list::put(PAIR p) { - this->add_to_cachetable_only(p); - this->add_to_cf_list(p); -} - -// This removes the given pair from completely from the pair list. -// -// requires caller to have grabbed write lock on list, and p->mutex held -// -void pair_list::evict_completely(PAIR p) { - this->evict_from_cachetable(p); - this->evict_from_cachefile(p); -} - -// Removes the PAIR from the cachetable's lists, -// but does NOT impact the list maintained by the cachefile -void pair_list::evict_from_cachetable(PAIR p) { - this->pair_remove(p); - this->pending_pairs_remove(p); - this->remove_from_hash_chain(p); - - assert(m_n_in_table > 0); - m_n_in_table--; -} - -// Removes the PAIR from the cachefile's list of PAIRs -void pair_list::evict_from_cachefile(PAIR p) { - evict_pair_from_cachefile(p); -} - -// -// Remove pair from linked list for cleaner/clock -// -// -// requires caller to have grabbed write lock on list. -// -void pair_list::pair_remove (PAIR p) { - if (p->clock_prev == p) { - invariant(m_clock_head == p); - invariant(p->clock_next == p); - invariant(m_cleaner_head == p); - invariant(m_checkpoint_head == p); - m_clock_head = NULL; - m_cleaner_head = NULL; - m_checkpoint_head = NULL; - } - else { - if (p == m_clock_head) { - m_clock_head = m_clock_head->clock_next; - } - if (p == m_cleaner_head) { - m_cleaner_head = m_cleaner_head->clock_next; - } - if (p == m_checkpoint_head) { - m_checkpoint_head = m_checkpoint_head->clock_next; - } - p->clock_prev->clock_next = p->clock_next; - p->clock_next->clock_prev = p->clock_prev; - } - p->clock_prev = p->clock_next = NULL; -} - -//Remove a pair from the list of pairs that were marked with the -//pending bit for the in-progress checkpoint. -// -// requires that if the caller is the checkpoint thread, then a read lock -// is grabbed on the list. Otherwise, must have write lock on list. -// -void pair_list::pending_pairs_remove (PAIR p) { - if (p->pending_next) { - p->pending_next->pending_prev = p->pending_prev; - } - if (p->pending_prev) { - p->pending_prev->pending_next = p->pending_next; - } - else if (m_pending_head==p) { - m_pending_head = p->pending_next; - } - p->pending_prev = p->pending_next = NULL; -} - -void pair_list::remove_from_hash_chain(PAIR p) { - // Remove it from the hash chain. - unsigned int h = p->fullhash&(m_table_size - 1); - paranoid_invariant(m_table[h] != NULL); - if (m_table[h] == p) { - m_table[h] = p->hash_chain; - } - else { - PAIR curr = m_table[h]; - while (curr->hash_chain != p) { - curr = curr->hash_chain; - } - // remove p from the singular linked list - curr->hash_chain = p->hash_chain; - } - p->hash_chain = NULL; -} - -// Returns a pair from the pair list, using the given -// pair. If the pair cannot be found, null is returned. -// -// requires caller to have grabbed either a read lock on the list or -// bucket's mutex. -// -PAIR pair_list::find_pair(CACHEFILE file, CACHEKEY key, uint32_t fullhash) { - PAIR found_pair = nullptr; - for (PAIR p = m_table[fullhash&(m_table_size - 1)]; p; p = p->hash_chain) { - if (p->key.b == key.b && p->cachefile == file) { - found_pair = p; - break; - } - } - return found_pair; -} - -// Add PAIR to linked list shared by cleaner thread and clock -// -// requires caller to have grabbed write lock on list. -// -void pair_list::add_to_clock (PAIR p) { - // requires that p is not currently in the table. - // inserts p into the clock list at the tail. - - p->count = CLOCK_INITIAL_COUNT; - //assert either both head and tail are set or they are both NULL - // tail and head exist - if (m_clock_head) { - assert(m_cleaner_head); - assert(m_checkpoint_head); - // insert right before the head - p->clock_next = m_clock_head; - p->clock_prev = m_clock_head->clock_prev; - - p->clock_prev->clock_next = p; - p->clock_next->clock_prev = p; - - } - // this is the first element in the list - else { - m_clock_head = p; - p->clock_next = p->clock_prev = m_clock_head; - m_cleaner_head = p; - m_checkpoint_head = p; - } -} - -// add the pair to the linked list that of PAIRs belonging -// to the same cachefile. This linked list is used -// in cachetable_flush_cachefile. -void pair_list::add_to_cf_list(PAIR p) { - CACHEFILE cf = p->cachefile; - if (cf->cf_head) { - cf->cf_head->cf_prev = p; - } - p->cf_next = cf->cf_head; - p->cf_prev = NULL; - cf->cf_head = p; - cf->num_pairs++; -} - -// Add PAIR to the hashtable -// -// requires caller to have grabbed write lock on list -// and to have grabbed the p->mutex. -void pair_list::add_to_hash_chain(PAIR p) { - uint32_t h = p->fullhash & (m_table_size - 1); - p->hash_chain = m_table[h]; - m_table[h] = p; -} - -// test function -// -// grabs and releases write list lock -// -void pair_list::verify() { - this->write_list_lock(); - uint32_t num_found = 0; - - // First clear all the verify flags by going through the hash chains - { - uint32_t i; - for (i = 0; i < m_table_size; i++) { - PAIR p; - for (p = m_table[i]; p; p = p->hash_chain) { - num_found++; - } - } - } - assert(num_found == m_n_in_table); - num_found = 0; - // Now go through the clock chain, make sure everything in the LRU chain is hashed. - { - PAIR p; - bool is_first = true; - for (p = m_clock_head; m_clock_head != NULL && (p != m_clock_head || is_first); p=p->clock_next) { - is_first=false; - PAIR p2; - uint32_t fullhash = p->fullhash; - //assert(fullhash==toku_cachetable_hash(p->cachefile, p->key)); - for (p2 = m_table[fullhash&(m_table_size-1)]; p2; p2=p2->hash_chain) { - if (p2==p) { - /* found it */ - num_found++; - goto next; - } - } - fprintf(stderr, "Something in the clock chain is not hashed\n"); - assert(0); - next:; - } - assert (num_found == m_n_in_table); - } - this->write_list_unlock(); -} - -// If given pointers are not null, assign the hash table size of -// this pair list and the number of pairs in this pair list. -// -// -// grabs and releases read list lock -// -void pair_list::get_state(int *num_entries, int *hash_size) { - this->read_list_lock(); - if (num_entries) { - *num_entries = m_n_in_table; - } - if (hash_size) { - *hash_size = m_table_size; - } - this->read_list_unlock(); -} - -void pair_list::read_list_lock() { - toku_pthread_rwlock_rdlock(&m_list_lock); -} - -void pair_list::read_list_unlock() { - toku_pthread_rwlock_rdunlock(&m_list_lock); -} - -void pair_list::write_list_lock() { - toku_pthread_rwlock_wrlock(&m_list_lock); -} - -void pair_list::write_list_unlock() { - toku_pthread_rwlock_wrunlock(&m_list_lock); -} - -void pair_list::read_pending_exp_lock() { - toku_pthread_rwlock_rdlock(&m_pending_lock_expensive); -} - -void pair_list::read_pending_exp_unlock() { - toku_pthread_rwlock_rdunlock(&m_pending_lock_expensive); -} - -void pair_list::write_pending_exp_lock() { - toku_pthread_rwlock_wrlock(&m_pending_lock_expensive); -} - -void pair_list::write_pending_exp_unlock() { - toku_pthread_rwlock_wrunlock(&m_pending_lock_expensive); -} - -void pair_list::read_pending_cheap_lock() { - toku_pthread_rwlock_rdlock(&m_pending_lock_cheap); -} - -void pair_list::read_pending_cheap_unlock() { - toku_pthread_rwlock_rdunlock(&m_pending_lock_cheap); -} - -void pair_list::write_pending_cheap_lock() { - toku_pthread_rwlock_wrlock(&m_pending_lock_cheap); -} - -void pair_list::write_pending_cheap_unlock() { - toku_pthread_rwlock_wrunlock(&m_pending_lock_cheap); -} - -toku_mutex_t* pair_list::get_mutex_for_pair(uint32_t fullhash) { - return &m_mutexes[fullhash&(m_num_locks - 1)].aligned_mutex; -} - -void pair_list::pair_lock_by_fullhash(uint32_t fullhash) { - toku_mutex_lock(&m_mutexes[fullhash&(m_num_locks - 1)].aligned_mutex); -} - -void pair_list::pair_unlock_by_fullhash(uint32_t fullhash) { - toku_mutex_unlock(&m_mutexes[fullhash&(m_num_locks - 1)].aligned_mutex); -} - - -ENSURE_POD(evictor); - -// -// This is the function that runs eviction on its own thread. -// -static void *eviction_thread(void *evictor_v) { - evictor* CAST_FROM_VOIDP(evictor, evictor_v); - evictor->run_eviction_thread(); - return evictor_v; -} - -// -// Starts the eviction thread, assigns external object references, -// and initializes all counters and condition variables. -// -int evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period) { - TOKU_VALGRIND_HG_DISABLE_CHECKING(&m_ev_thread_is_running, sizeof m_ev_thread_is_running); - TOKU_VALGRIND_HG_DISABLE_CHECKING(&m_size_evicting, sizeof m_size_evicting); - - // set max difference to around 500MB - int64_t max_diff = (1 << 29); - - m_low_size_watermark = _size_limit; - // these values are selected kind of arbitrarily right now as - // being a percentage more than low_size_watermark, which is provided - // by the caller. - m_low_size_hysteresis = (11 * _size_limit)/10; //10% more - if ((m_low_size_hysteresis - m_low_size_watermark) > max_diff) { - m_low_size_hysteresis = m_low_size_watermark + max_diff; - } - m_high_size_hysteresis = (5 * _size_limit)/4; // 20% more - if ((m_high_size_hysteresis - m_low_size_hysteresis) > max_diff) { - m_high_size_hysteresis = m_low_size_hysteresis + max_diff; - } - m_high_size_watermark = (3 * _size_limit)/2; // 50% more - if ((m_high_size_watermark - m_high_size_hysteresis) > max_diff) { - m_high_size_watermark = m_high_size_hysteresis + max_diff; - } - - m_size_reserved = unreservable_memory(_size_limit); - m_size_current = 0; - m_size_evicting = 0; - - m_size_nonleaf = create_partitioned_counter(); - m_size_leaf = create_partitioned_counter(); - m_size_rollback = create_partitioned_counter(); - m_size_cachepressure = create_partitioned_counter(); - m_wait_pressure_count = create_partitioned_counter(); - m_wait_pressure_time = create_partitioned_counter(); - m_long_wait_pressure_count = create_partitioned_counter(); - m_long_wait_pressure_time = create_partitioned_counter(); - - m_pl = _pl; - m_cf_list = _cf_list; - m_kibbutz = _kibbutz; - toku_mutex_init(&m_ev_thread_lock, NULL); - toku_cond_init(&m_flow_control_cond, NULL); - toku_cond_init(&m_ev_thread_cond, NULL); - m_num_sleepers = 0; - m_ev_thread_is_running = false; - m_period_in_seconds = eviction_period; - - unsigned int seed = (unsigned int) time(NULL); - int r = myinitstate_r(seed, m_random_statebuf, sizeof m_random_statebuf, &m_random_data); - assert_zero(r); - - // start the background thread - m_run_thread = true; - m_num_eviction_thread_runs = 0; - m_ev_thread_init = false; - r = toku_pthread_create(&m_ev_thread, NULL, eviction_thread, this); - if (r == 0) { - m_ev_thread_init = true; - } - m_evictor_init = true; - return r; -} - -// -// This stops the eviction thread and clears the condition variable. -// -// NOTE: This should only be called if there are no evictions in progress. -// -void evictor::destroy() { - if (!m_evictor_init) { - return; - } - assert(m_size_evicting == 0); - // - // commented out of Ming, because we could not finish - // #5672. Once #5672 is solved, we should restore this - // - //assert(m_size_current == 0); - - // Stop the eviction thread. - if (m_ev_thread_init) { - toku_mutex_lock(&m_ev_thread_lock); - m_run_thread = false; - this->signal_eviction_thread(); - toku_mutex_unlock(&m_ev_thread_lock); - void *ret; - int r = toku_pthread_join(m_ev_thread, &ret); - assert_zero(r); - assert(!m_ev_thread_is_running); - } - destroy_partitioned_counter(m_size_nonleaf); - m_size_nonleaf = NULL; - destroy_partitioned_counter(m_size_leaf); - m_size_leaf = NULL; - destroy_partitioned_counter(m_size_rollback); - m_size_rollback = NULL; - destroy_partitioned_counter(m_size_cachepressure); - m_size_cachepressure = NULL; - - destroy_partitioned_counter(m_wait_pressure_count); m_wait_pressure_count = NULL; - destroy_partitioned_counter(m_wait_pressure_time); m_wait_pressure_time = NULL; - destroy_partitioned_counter(m_long_wait_pressure_count); m_long_wait_pressure_count = NULL; - destroy_partitioned_counter(m_long_wait_pressure_time); m_long_wait_pressure_time = NULL; - - toku_cond_destroy(&m_flow_control_cond); - toku_cond_destroy(&m_ev_thread_cond); - toku_mutex_destroy(&m_ev_thread_lock); -} - -// -// Increases status variables and the current size variable -// of the evictor based on the given pair attribute. -// -void evictor::add_pair_attr(PAIR_ATTR attr) { - assert(attr.is_valid); - add_to_size_current(attr.size); - increment_partitioned_counter(m_size_nonleaf, attr.nonleaf_size); - increment_partitioned_counter(m_size_leaf, attr.leaf_size); - increment_partitioned_counter(m_size_rollback, attr.rollback_size); - increment_partitioned_counter(m_size_cachepressure, attr.cache_pressure_size); -} - -// -// Decreases status variables and the current size variable -// of the evictor based on the given pair attribute. -// -void evictor::remove_pair_attr(PAIR_ATTR attr) { - assert(attr.is_valid); - remove_from_size_current(attr.size); - increment_partitioned_counter(m_size_nonleaf, 0 - attr.nonleaf_size); - increment_partitioned_counter(m_size_leaf, 0 - attr.leaf_size); - increment_partitioned_counter(m_size_rollback, 0 - attr.rollback_size); - increment_partitioned_counter(m_size_cachepressure, 0 - attr.cache_pressure_size); -} - -// -// Updates this evictor's stats to match the "new" pair attribute given -// while also removing the given "old" pair attribute. -// -void evictor::change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr) { - this->add_pair_attr(new_attr); - this->remove_pair_attr(old_attr); -} - -// -// Adds the given size to the evictor's estimation of -// the size of the cachetable. -// -void evictor::add_to_size_current(long size) { - (void) toku_sync_fetch_and_add(&m_size_current, size); -} - -// -// Subtracts the given size from the evictor's current -// approximation of the cachetable size. -// -void evictor::remove_from_size_current(long size) { - (void) toku_sync_fetch_and_sub(&m_size_current, size); -} - -// -// TODO: (Zardosht) comment this function -// -uint64_t evictor::reserve_memory(double fraction, uint64_t upper_bound) { - toku_mutex_lock(&m_ev_thread_lock); - uint64_t reserved_memory = fraction * (m_low_size_watermark - m_size_reserved); - if (0) { // debug - fprintf(stderr, "%s %" PRIu64 " %" PRIu64 "\n", __PRETTY_FUNCTION__, reserved_memory, upper_bound); - } - if (upper_bound > 0 && reserved_memory > upper_bound) { - reserved_memory = upper_bound; - } - m_size_reserved += reserved_memory; - (void) toku_sync_fetch_and_add(&m_size_current, reserved_memory); - this->signal_eviction_thread(); - toku_mutex_unlock(&m_ev_thread_lock); - - if (this->should_client_thread_sleep()) { - this->wait_for_cache_pressure_to_subside(); - } - return reserved_memory; -} - -// -// TODO: (Zardosht) comment this function -// -void evictor::release_reserved_memory(uint64_t reserved_memory){ - (void) toku_sync_fetch_and_sub(&m_size_current, reserved_memory); - toku_mutex_lock(&m_ev_thread_lock); - m_size_reserved -= reserved_memory; - // signal the eviction thread in order to possibly wake up sleeping clients - if (m_num_sleepers > 0) { - this->signal_eviction_thread(); - } - toku_mutex_unlock(&m_ev_thread_lock); -} - -// -// This function is the eviction thread. It runs for the lifetime of -// the evictor. Goes to sleep for period_in_seconds -// by waiting on m_ev_thread_cond. -// -void evictor::run_eviction_thread(){ - toku_mutex_lock(&m_ev_thread_lock); - while (m_run_thread) { - m_num_eviction_thread_runs++; // for test purposes only - m_ev_thread_is_running = true; - // responsibility of run_eviction to release and - // regrab ev_thread_lock as it sees fit - this->run_eviction(); - m_ev_thread_is_running = false; - - if (m_run_thread) { - // - // sleep until either we are signaled - // via signal_eviction_thread or - // m_period_in_seconds amount of time has passed - // - if (m_period_in_seconds) { - toku_timespec_t wakeup_time; - struct timeval tv; - gettimeofday(&tv, 0); - wakeup_time.tv_sec = tv.tv_sec; - wakeup_time.tv_nsec = tv.tv_usec * 1000LL; - wakeup_time.tv_sec += m_period_in_seconds; - toku_cond_timedwait( - &m_ev_thread_cond, - &m_ev_thread_lock, - &wakeup_time - ); - } - // for test purposes, we have an option of - // not waiting on a period, but rather sleeping indefinitely - else { - toku_cond_wait(&m_ev_thread_cond, &m_ev_thread_lock); - } - } - } - toku_mutex_unlock(&m_ev_thread_lock); -} - -// -// runs eviction. -// on entry, ev_thread_lock is grabbed, on exit, ev_thread_lock must still be grabbed -// it is the responsibility of this function to release and reacquire ev_thread_lock as it sees fit. -// -void evictor::run_eviction(){ - // - // These variables will help us detect if everything in the clock is currently being accessed. - // We must detect this case otherwise we will end up in an infinite loop below. - // - bool exited_early = false; - uint32_t num_pairs_examined_without_evicting = 0; - - while (this->eviction_needed()) { - if (m_num_sleepers > 0 && this->should_sleeping_clients_wakeup()) { - toku_cond_broadcast(&m_flow_control_cond); - } - // release ev_thread_lock so that eviction may run without holding mutex - toku_mutex_unlock(&m_ev_thread_lock); - - // first try to do an eviction from stale cachefiles - bool some_eviction_ran = m_cf_list->evict_some_stale_pair(this); - if (!some_eviction_ran) { - m_pl->read_list_lock(); - PAIR curr_in_clock = m_pl->m_clock_head; - // if nothing to evict, we need to exit - if (!curr_in_clock) { - m_pl->read_list_unlock(); - toku_mutex_lock(&m_ev_thread_lock); - exited_early = true; - goto exit; - } - if (num_pairs_examined_without_evicting > m_pl->m_n_in_table) { - // we have a cycle where everything in the clock is in use - // do not return an error - // just let memory be overfull - m_pl->read_list_unlock(); - toku_mutex_lock(&m_ev_thread_lock); - exited_early = true; - goto exit; - } - bool eviction_run = run_eviction_on_pair(curr_in_clock); - if (eviction_run) { - // reset the count - num_pairs_examined_without_evicting = 0; - } - else { - num_pairs_examined_without_evicting++; - } - // at this point, either curr_in_clock is still in the list because it has not been fully evicted, - // and we need to move ct->m_clock_head over. Otherwise, curr_in_clock has been fully evicted - // and we do NOT need to move ct->m_clock_head, as the removal of curr_in_clock - // modified ct->m_clock_head - if (m_pl->m_clock_head && (m_pl->m_clock_head == curr_in_clock)) { - m_pl->m_clock_head = m_pl->m_clock_head->clock_next; - } - m_pl->read_list_unlock(); - } - toku_mutex_lock(&m_ev_thread_lock); - } - -exit: - if (m_num_sleepers > 0 && (exited_early || this->should_sleeping_clients_wakeup())) { - toku_cond_broadcast(&m_flow_control_cond); - } - return; -} - -// -// NOTE: Cachetable lock held on entry. -// Runs eviction on the given PAIR. This may be a -// partial eviction or full eviction. -// -// on entry, pair mutex is NOT held, but pair list's read list lock -// IS held -// on exit, the same conditions must apply -// -bool evictor::run_eviction_on_pair(PAIR curr_in_clock) { - uint32_t n_in_table; - int64_t size_current; - bool ret_val = false; - // function meant to be called on PAIR that is not being accessed right now - CACHEFILE cf = curr_in_clock->cachefile; - int r = bjm_add_background_job(cf->bjm); - if (r) { - goto exit; - } - pair_lock(curr_in_clock); - // these are the circumstances under which we don't run eviction on a pair: - // - if other users are waiting on the lock - // - if the PAIR is referenced by users - // - if the PAIR's disk_nb_mutex is in use, implying that it is - // undergoing a checkpoint - if (curr_in_clock->value_rwlock.users() || - curr_in_clock->refcount > 0 || - nb_mutex_users(&curr_in_clock->disk_nb_mutex)) - { - pair_unlock(curr_in_clock); - bjm_remove_background_job(cf->bjm); - goto exit; - } - - // extract and use these values so that we don't risk them changing - // out from underneath us in calculations below. - n_in_table = m_pl->m_n_in_table; - size_current = m_size_current; - - // now that we have the pair mutex we care about, we can - // release the read list lock and reacquire it at the end of the function - m_pl->read_list_unlock(); - ret_val = true; - if (curr_in_clock->count > 0) { - toku::context pe_ctx(CTX_PARTIAL_EVICTION); - - uint32_t curr_size = curr_in_clock->attr.size; - // if the size of this PAIR is greater than the average size of PAIRs - // in the cachetable, then decrement it, otherwise, decrement - // probabilistically - if (curr_size*n_in_table >= size_current) { - curr_in_clock->count--; - } else { - // generate a random number between 0 and 2^16 - assert(size_current <= (INT64_MAX / ((1<<16)-1))); // to protect against possible overflows - int32_t rnd = myrandom_r(&m_random_data) % (1<<16); - // The if-statement below will be true with probability of - // curr_size/(average size of PAIR in cachetable) - // Here is how the math is done: - // average_size = size_current/n_in_table - // curr_size/average_size = curr_size*n_in_table/size_current - // we evaluate if a random number from 0 to 2^16 is less than - // than curr_size/average_size * 2^16. So, our if-clause should be - // if (2^16*curr_size/average_size > rnd) - // this evaluates to: - // if (2^16*curr_size*n_in_table/size_current > rnd) - // by multiplying each side of the equation by size_current, we get - // if (2^16*curr_size*n_in_table > rnd*size_current) - // and dividing each side by 2^16, - // we get the if-clause below - // - if ((((int64_t)curr_size) * n_in_table) >= (((int64_t)rnd) * size_current)>>16) { - curr_in_clock->count--; - } - } - // call the partial eviction callback - curr_in_clock->value_rwlock.write_lock(true); - - void *value = curr_in_clock->value_data; - void* disk_data = curr_in_clock->disk_data; - void *write_extraargs = curr_in_clock->write_extraargs; - enum partial_eviction_cost cost; - long bytes_freed_estimate = 0; - curr_in_clock->pe_est_callback( - value, - disk_data, - &bytes_freed_estimate, - &cost, - write_extraargs - ); - if (cost == PE_CHEAP) { - pair_unlock(curr_in_clock); - curr_in_clock->size_evicting_estimate = 0; - this->do_partial_eviction(curr_in_clock); - bjm_remove_background_job(cf->bjm); - } - else if (cost == PE_EXPENSIVE) { - // only bother running an expensive partial eviction - // if it is expected to free space - if (bytes_freed_estimate > 0) { - pair_unlock(curr_in_clock); - curr_in_clock->size_evicting_estimate = bytes_freed_estimate; - toku_mutex_lock(&m_ev_thread_lock); - m_size_evicting += bytes_freed_estimate; - toku_mutex_unlock(&m_ev_thread_lock); - toku_kibbutz_enq( - m_kibbutz, - cachetable_partial_eviction, - curr_in_clock - ); - } - else { - curr_in_clock->value_rwlock.write_unlock(); - pair_unlock(curr_in_clock); - bjm_remove_background_job(cf->bjm); - } - } - else { - assert(false); - } - } - else { - toku::context pe_ctx(CTX_FULL_EVICTION); - - // responsibility of try_evict_pair to eventually remove background job - // pair's mutex is still grabbed here - this->try_evict_pair(curr_in_clock); - } - // regrab the read list lock, because the caller assumes - // that it is held. The contract requires this. - m_pl->read_list_lock(); -exit: - return ret_val; -} - -struct pair_unpin_with_new_attr_extra { - pair_unpin_with_new_attr_extra(evictor *e, PAIR p) : - ev(e), pair(p) { - } - evictor *ev; - PAIR pair; -}; - -static void pair_unpin_with_new_attr(PAIR_ATTR new_attr, void *extra) { - struct pair_unpin_with_new_attr_extra *info = - reinterpret_cast(extra); - PAIR p = info->pair; - evictor *ev = info->ev; - - // change the attr in the evictor, then update the value in the pair - ev->change_pair_attr(p->attr, new_attr); - p->attr = new_attr; - - // unpin - pair_lock(p); - p->value_rwlock.write_unlock(); - pair_unlock(p); -} - -// -// on entry and exit, pair's mutex is not held -// on exit, PAIR is unpinned -// -void evictor::do_partial_eviction(PAIR p) { - // Copy the old attr - PAIR_ATTR old_attr = p->attr; - long long size_evicting_estimate = p->size_evicting_estimate; - - struct pair_unpin_with_new_attr_extra extra(this, p); - p->pe_callback(p->value_data, old_attr, p->write_extraargs, - // passed as the finalize continuation, which allows the - // pe_callback to unpin the node before doing expensive cleanup - pair_unpin_with_new_attr, &extra); - - // now that the pe_callback (and its pair_unpin_with_new_attr continuation) - // have finished, we can safely decrease size_evicting - this->decrease_size_evicting(size_evicting_estimate); -} - -// -// CT lock held on entry -// background job has been added for p->cachefile on entry -// responsibility of this function to make sure that background job is removed -// -// on entry, pair's mutex is held, on exit, the pair's mutex is NOT held -// -void evictor::try_evict_pair(PAIR p) { - CACHEFILE cf = p->cachefile; - // evictions without a write or unpinned pair's that are clean - // can be run in the current thread - - // the only caller, run_eviction_on_pair, should call this function - // only if no one else is trying to use it - assert(!p->value_rwlock.users()); - p->value_rwlock.write_lock(true); - // if the PAIR is dirty, the running eviction requires writing the - // PAIR out. if the disk_nb_mutex is grabbed, then running - // eviction requires waiting for the disk_nb_mutex to become available, - // which may be expensive. Hence, if either is true, we - // do the eviction on a writer thread - if (!p->dirty && (nb_mutex_writers(&p->disk_nb_mutex) == 0)) { - p->size_evicting_estimate = 0; - // - // This method will unpin PAIR and release PAIR mutex - // - // because the PAIR is not dirty, we can safely pass - // false for the for_checkpoint parameter - this->evict_pair(p, false); - bjm_remove_background_job(cf->bjm); - } - else { - pair_unlock(p); - toku_mutex_lock(&m_ev_thread_lock); - assert(m_size_evicting >= 0); - p->size_evicting_estimate = p->attr.size; - m_size_evicting += p->size_evicting_estimate; - assert(m_size_evicting >= 0); - toku_mutex_unlock(&m_ev_thread_lock); - toku_kibbutz_enq(m_kibbutz, cachetable_evicter, p); - } -} - -// -// Requires: This thread must hold the write lock (nb_mutex) for the pair. -// The pair's mutex (p->mutex) is also held. -// on exit, neither is held -// -void evictor::evict_pair(PAIR p, bool for_checkpoint) { - if (p->dirty) { - pair_unlock(p); - cachetable_write_locked_pair(this, p, for_checkpoint); - pair_lock(p); - } - // one thing we can do here is extract the size_evicting estimate, - // have decrease_size_evicting take the estimate and not the pair, - // and do this work after we have called - // cachetable_maybe_remove_and_free_pair - this->decrease_size_evicting(p->size_evicting_estimate); - // if we are to remove this pair, we need the write list lock, - // to get it in a way that avoids deadlocks, we must first release - // the pair's mutex, then grab the write list lock, then regrab the - // pair's mutex. The pair cannot go anywhere because - // the pair is still pinned - nb_mutex_lock(&p->disk_nb_mutex, p->mutex); - pair_unlock(p); - m_pl->write_list_lock(); - pair_lock(p); - p->value_rwlock.write_unlock(); - nb_mutex_unlock(&p->disk_nb_mutex); - // at this point, we have the pair list's write list lock - // and we have the pair's mutex (p->mutex) held - - // this ensures that a clone running in the background first completes - bool removed = false; - if (p->value_rwlock.users() == 0 && p->refcount == 0) { - // assumption is that if we are about to remove the pair - // that no one has grabbed the disk_nb_mutex, - // and that there is no cloned_value_data, because - // no one is writing a cloned value out. - assert(nb_mutex_users(&p->disk_nb_mutex) == 0); - assert(p->cloned_value_data == NULL); - cachetable_remove_pair(m_pl, this, p); - removed = true; - } - pair_unlock(p); - m_pl->write_list_unlock(); - // do not want to hold the write list lock while freeing a pair - if (removed) { - cachetable_free_pair(p); - } -} - -// -// this function handles the responsibilities for writer threads when they -// decrease size_evicting. The responsibilities are: -// - decrease m_size_evicting in a thread safe manner -// - in some circumstances, signal the eviction thread -// -void evictor::decrease_size_evicting(long size_evicting_estimate) { - if (size_evicting_estimate > 0) { - toku_mutex_lock(&m_ev_thread_lock); - int64_t buffer = m_high_size_hysteresis - m_low_size_watermark; - // if size_evicting is transitioning from greater than buffer to below buffer, and - // some client threads are sleeping, we need to wake up the eviction thread. - // Here is why. In this scenario, we are in one of two cases: - // - size_current - size_evicting < low_size_watermark - // If this is true, then size_current < high_size_hysteresis, which - // means we need to wake up sleeping clients - // - size_current - size_evicting > low_size_watermark, - // which means more evictions must be run. - // The consequences of both cases are the responsibility - // of the eviction thread. - // - bool need_to_signal_ev_thread = - (m_num_sleepers > 0) && - !m_ev_thread_is_running && - (m_size_evicting > buffer) && - ((m_size_evicting - size_evicting_estimate) <= buffer); - m_size_evicting -= size_evicting_estimate; - assert(m_size_evicting >= 0); - if (need_to_signal_ev_thread) { - this->signal_eviction_thread(); - } - toku_mutex_unlock(&m_ev_thread_lock); - } -} - -// -// Wait for cache table space to become available -// size_current is number of bytes currently occupied by data (referred to by pairs) -// size_evicting is number of bytes queued up to be evicted -// -void evictor::wait_for_cache_pressure_to_subside() { - uint64_t t0 = toku_current_time_microsec(); - toku_mutex_lock(&m_ev_thread_lock); - m_num_sleepers++; - this->signal_eviction_thread(); - toku_cond_wait(&m_flow_control_cond, &m_ev_thread_lock); - m_num_sleepers--; - toku_mutex_unlock(&m_ev_thread_lock); - uint64_t t1 = toku_current_time_microsec(); - increment_partitioned_counter(m_wait_pressure_count, 1); - uint64_t tdelta = t1 - t0; - increment_partitioned_counter(m_wait_pressure_time, tdelta); - if (tdelta > 1000000) { - increment_partitioned_counter(m_long_wait_pressure_count, 1); - increment_partitioned_counter(m_long_wait_pressure_time, tdelta); - } -} - -// -// Get the status of the current estimated size of the cachetable, -// and the evictor's set limit. -// -void evictor::get_state(long *size_current_ptr, long *size_limit_ptr) { - if (size_current_ptr) { - *size_current_ptr = m_size_current; - } - if (size_limit_ptr) { - *size_limit_ptr = m_low_size_watermark; - } -} - -// -// Force the eviction thread to do some work. -// -// This function does not require any mutex to be held. -// As a result, scheduling is not guaranteed, but that is tolerable. -// -void evictor::signal_eviction_thread() { - toku_cond_signal(&m_ev_thread_cond); -} - -// -// Returns true if the cachetable is so over subscribed, that a client thread should sleep -// -// This function may be called in a thread-unsafe manner. Locks are not -// required to read size_current. The result is that -// the values may be a little off, but we think that is tolerable. -// -bool evictor::should_client_thread_sleep(){ - return unsafe_read_size_current() > m_high_size_watermark; -} - -// -// Returns true if a sleeping client should be woken up because -// the cachetable is not overly subscribed -// -// This function may be called in a thread-unsafe manner. Locks are not -// required to read size_current. The result is that -// the values may be a little off, but we think that is tolerable. -// -bool evictor::should_sleeping_clients_wakeup() { - return unsafe_read_size_current() <= m_high_size_hysteresis; -} - -// -// Returns true if a client thread should try to wake up the eviction -// thread because the client thread has noticed too much data taken -// up in the cachetable. -// -// This function may be called in a thread-unsafe manner. Locks are not -// required to read size_current or size_evicting. The result is that -// the values may be a little off, but we think that is tolerable. -// If the caller wants to ensure that ev_thread_is_running and size_evicting -// are accurate, then the caller must hold ev_thread_lock before -// calling this function. -// -bool evictor::should_client_wake_eviction_thread() { - return - !m_ev_thread_is_running && - ((unsafe_read_size_current() - m_size_evicting) > m_low_size_hysteresis); -} - -// -// Determines if eviction is needed. If the current size of -// the cachetable exceeds the sum of our fixed size limit and -// the amount of data currently being evicted, then eviction is needed -// -bool evictor::eviction_needed() { - return (m_size_current - m_size_evicting) > m_low_size_watermark; -} - -inline int64_t evictor::unsafe_read_size_current(void) const { - return m_size_current; -} - -void evictor::fill_engine_status() { - STATUS_VALUE(CT_SIZE_CURRENT) = m_size_current; - STATUS_VALUE(CT_SIZE_LIMIT) = m_low_size_hysteresis; - STATUS_VALUE(CT_SIZE_WRITING) = m_size_evicting; - STATUS_VALUE(CT_SIZE_NONLEAF) = read_partitioned_counter(m_size_nonleaf); - STATUS_VALUE(CT_SIZE_LEAF) = read_partitioned_counter(m_size_leaf); - STATUS_VALUE(CT_SIZE_ROLLBACK) = read_partitioned_counter(m_size_rollback); - STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = read_partitioned_counter(m_size_cachepressure); - STATUS_VALUE(CT_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_wait_pressure_count); - STATUS_VALUE(CT_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_wait_pressure_time); - STATUS_VALUE(CT_LONG_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_long_wait_pressure_count); - STATUS_VALUE(CT_LONG_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_long_wait_pressure_time); -} - -//////////////////////////////////////////////////////////////////////////////// - -ENSURE_POD(checkpointer); - -// -// Sets the cachetable reference in this checkpointer class, this is temporary. -// -int checkpointer::init(pair_list *_pl, - TOKULOGGER _logger, - evictor *_ev, - cachefile_list *files) { - m_list = _pl; - m_logger = _logger; - m_ev = _ev; - m_cf_list = files; - bjm_init(&m_checkpoint_clones_bjm); - - // Default is no checkpointing. - m_checkpointer_cron_init = false; - int r = toku_minicron_setup(&m_checkpointer_cron, 0, checkpoint_thread, this); - if (r == 0) { - m_checkpointer_cron_init = true; - } - m_checkpointer_init = true; - return r; -} - -void checkpointer::destroy() { - if (!m_checkpointer_init) { - return; - } - if (m_checkpointer_cron_init && !this->has_been_shutdown()) { - // for test code only, production code uses toku_cachetable_minicron_shutdown() - int r = this->shutdown(); - assert(r == 0); - } - bjm_destroy(m_checkpoint_clones_bjm); -} - -// -// Sets how often the checkpoint thread will run, in seconds -// -void checkpointer::set_checkpoint_period(uint32_t new_period) { - toku_minicron_change_period(&m_checkpointer_cron, new_period*1000); -} - -// -// Sets how often the checkpoint thread will run. -// -uint32_t checkpointer::get_checkpoint_period() { - return toku_minicron_get_period_in_seconds_unlocked(&m_checkpointer_cron); -} - -// -// Stops the checkpoint thread. -// -int checkpointer::shutdown() { - return toku_minicron_shutdown(&m_checkpointer_cron); -} - -// -// If checkpointing is running, this returns false. -// -bool checkpointer::has_been_shutdown() { - return toku_minicron_has_been_shutdown(&m_checkpointer_cron); -} - -TOKULOGGER checkpointer::get_logger() { - return m_logger; -} - -void checkpointer::increment_num_txns() { - m_checkpoint_num_txns++; -} - -// -// Update the user data in any cachefiles in our checkpoint list. -// -void checkpointer::update_cachefiles() { - CACHEFILE cf; - for(cf = m_cf_list->m_active_head; cf; cf=cf->next) { - assert(cf->begin_checkpoint_userdata); - if (cf->for_checkpoint) { - cf->begin_checkpoint_userdata(m_lsn_of_checkpoint_in_progress, - cf->userdata); - } - } -} - -// -// Sets up and kicks off a checkpoint. -// -void checkpointer::begin_checkpoint() { - // 1. Initialize the accountability counters. - m_checkpoint_num_files = 0; - m_checkpoint_num_txns = 0; - - // 2. Make list of cachefiles to be included in the checkpoint. - // TODO: How do we remove the non-lock cachetable reference here? - m_cf_list->read_lock(); - for (CACHEFILE cf = m_cf_list->m_active_head; cf; cf = cf->next) { - // The caller must serialize open, close, and begin checkpoint. - // So we should never see a closing cachefile here. - // Is there an assert we can add here? - - // Putting this check here so that this method may be called - // by cachetable tests. - assert(cf->note_pin_by_checkpoint); - cf->note_pin_by_checkpoint(cf, cf->userdata); - cf->for_checkpoint = true; - m_checkpoint_num_files++; - } - m_cf_list->read_unlock(); - - // 3. Create log entries for this checkpoint. - if (m_logger) { - this->log_begin_checkpoint(); - } - - bjm_reset(m_checkpoint_clones_bjm); - - m_list->write_pending_exp_lock(); - m_list->read_list_lock(); - m_cf_list->read_lock(); // needed for update_cachefiles - m_list->write_pending_cheap_lock(); - // 4. Turn on all the relevant checkpoint pending bits. - this->turn_on_pending_bits(); - - // 5. - this->update_cachefiles(); - m_list->write_pending_cheap_unlock(); - m_cf_list->read_unlock(); - m_list->read_list_unlock(); - m_list->write_pending_exp_unlock(); -} - -// -// Assuming the logger exists, this will write out the folloing -// information to the log. -// -// 1. Writes the BEGIN_CHECKPOINT to the log. -// 2. Writes the list of open dictionaries to the log. -// 3. Writes the list of open transactions to the log. -// 4. Writes the list of dicionaries that have had rollback logs suppresed. -// -// NOTE: This also has the side effecto of setting the LSN -// of checkpoint in progress. -// -void checkpointer::log_begin_checkpoint() { - int r = 0; - - // Write the BEGIN_CHECKPOINT to the log. - LSN begin_lsn={ .lsn = (uint64_t) -1 }; // we'll need to store the lsn of the checkpoint begin in all the trees that are checkpointed. - TXN_MANAGER mgr = toku_logger_get_txn_manager(m_logger); - TXNID last_xid = toku_txn_manager_get_last_xid(mgr); - toku_log_begin_checkpoint(m_logger, &begin_lsn, 0, 0, last_xid); - m_lsn_of_checkpoint_in_progress = begin_lsn; - - // Log the list of open dictionaries. - for (CACHEFILE cf = m_cf_list->m_active_head; cf; cf = cf->next) { - assert(cf->log_fassociate_during_checkpoint); - cf->log_fassociate_during_checkpoint(cf, cf->userdata); - } - - // Write open transactions to the log. - r = toku_txn_manager_iter_over_live_txns( - m_logger->txn_manager, - log_open_txn, - this - ); - assert(r == 0); -} - -// -// Sets the pending bits of EVERY PAIR in the cachetable, regardless of -// whether the PAIR is clean or not. It will be the responsibility of -// end_checkpoint or client threads to simply clear the pending bit -// if the PAIR is clean. -// -// On entry and exit , the pair list's read list lock is grabbed, and -// both pending locks are grabbed -// -void checkpointer::turn_on_pending_bits() { - PAIR p = NULL; - uint32_t i; - for (i = 0, p = m_list->m_checkpoint_head; i < m_list->m_n_in_table; i++, p = p->clock_next) { - assert(!p->checkpoint_pending); - //Only include pairs belonging to cachefiles in the checkpoint - if (!p->cachefile->for_checkpoint) { - continue; - } - // Mark everything as pending a checkpoint - // - // The rule for the checkpoint_pending bit is as follows: - // - begin_checkpoint may set checkpoint_pending to true - // even though the pair lock on the node is not held. - // - any thread that wants to clear the pending bit must own - // the PAIR lock. Otherwise, - // we may end up clearing the pending bit before the - // current lock is ever released. - p->checkpoint_pending = true; - if (m_list->m_pending_head) { - m_list->m_pending_head->pending_prev = p; - } - p->pending_next = m_list->m_pending_head; - p->pending_prev = NULL; - m_list->m_pending_head = p; - } - invariant(p == m_list->m_checkpoint_head); -} - -void checkpointer::add_background_job() { - int r = bjm_add_background_job(m_checkpoint_clones_bjm); - assert_zero(r); -} -void checkpointer::remove_background_job() { - bjm_remove_background_job(m_checkpoint_clones_bjm); -} - -void checkpointer::end_checkpoint(void (*testcallback_f)(void*), void* testextra) { - CACHEFILE *XMALLOC_N(m_checkpoint_num_files, checkpoint_cfs); - - this->fill_checkpoint_cfs(checkpoint_cfs); - this->checkpoint_pending_pairs(); - this->checkpoint_userdata(checkpoint_cfs); - // For testing purposes only. Dictionary has been fsync-ed to disk but log has not yet been written. - if (testcallback_f) { - testcallback_f(testextra); - } - this->log_end_checkpoint(); - this->end_checkpoint_userdata(checkpoint_cfs); - - //Delete list of cachefiles in the checkpoint, - this->remove_cachefiles(checkpoint_cfs); - toku_free(checkpoint_cfs); -} - -void checkpointer::fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs) { - m_cf_list->read_lock(); - uint32_t curr_index = 0; - for (CACHEFILE cf = m_cf_list->m_active_head; cf; cf = cf->next) { - if (cf->for_checkpoint) { - assert(curr_index < m_checkpoint_num_files); - checkpoint_cfs[curr_index] = cf; - curr_index++; - } - } - assert(curr_index == m_checkpoint_num_files); - m_cf_list->read_unlock(); -} - -void checkpointer::checkpoint_pending_pairs() { - PAIR p; - m_list->read_list_lock(); - while ((p = m_list->m_pending_head)!=0) { - // TODO: Investigate why we move pending head outisde of the pending_pairs_remove() call. - m_list->m_pending_head = m_list->m_pending_head->pending_next; - m_list->pending_pairs_remove(p); - // if still pending, clear the pending bit and write out the node - pair_lock(p); - m_list->read_list_unlock(); - write_pair_for_checkpoint_thread(m_ev, p); - pair_unlock(p); - m_list->read_list_lock(); - } - assert(!m_list->m_pending_head); - m_list->read_list_unlock(); - bjm_wait_for_jobs_to_finish(m_checkpoint_clones_bjm); -} - -void checkpointer::checkpoint_userdata(CACHEFILE* checkpoint_cfs) { - // have just written data blocks, so next write the translation and header for each open dictionary - for (uint32_t i = 0; i < m_checkpoint_num_files; i++) { - CACHEFILE cf = checkpoint_cfs[i]; - assert(cf->for_checkpoint); - assert(cf->checkpoint_userdata); - toku_cachetable_set_checkpointing_user_data_status(1); - cf->checkpoint_userdata(cf, cf->fd, cf->userdata); - toku_cachetable_set_checkpointing_user_data_status(0); - } -} - -void checkpointer::log_end_checkpoint() { - if (m_logger) { - toku_log_end_checkpoint(m_logger, NULL, - 1, // want the end_checkpoint to be fsync'd - m_lsn_of_checkpoint_in_progress, - 0, - m_checkpoint_num_files, - m_checkpoint_num_txns); - toku_logger_note_checkpoint(m_logger, m_lsn_of_checkpoint_in_progress); - } -} - -void checkpointer::end_checkpoint_userdata(CACHEFILE* checkpoint_cfs) { - // everything has been written to file and fsynced - // ... call checkpoint-end function in block translator - // to free obsolete blocks on disk used by previous checkpoint - //cachefiles_in_checkpoint is protected by the checkpoint_safe_lock - for (uint32_t i = 0; i < m_checkpoint_num_files; i++) { - CACHEFILE cf = checkpoint_cfs[i]; - assert(cf->for_checkpoint); - assert(cf->end_checkpoint_userdata); - cf->end_checkpoint_userdata(cf, cf->fd, cf->userdata); - } -} - -// -// Deletes all the cachefiles in this checkpointers cachefile list. -// -void checkpointer::remove_cachefiles(CACHEFILE* checkpoint_cfs) { - // making this a while loop because note_unpin_by_checkpoint may destroy the cachefile - for (uint32_t i = 0; i < m_checkpoint_num_files; i++) { - CACHEFILE cf = checkpoint_cfs[i]; - // Checking for function existing so that this function - // can be called from cachetable tests. - assert(cf->for_checkpoint); - cf->for_checkpoint = false; - assert(cf->note_unpin_by_checkpoint); - // Clear the bit saying theis file is in the checkpoint. - cf->note_unpin_by_checkpoint(cf, cf->userdata); - } -} - - -//////////////////////////////////////////////////////// -// -// cachefiles list -// -static_assert(std::is_pod::value, "cachefile_list isn't POD"); - -void cachefile_list::init() { - m_active_head = NULL; - m_stale_head = NULL; - m_stale_tail = NULL; - m_next_filenum_to_use.fileid = 0; - m_next_hash_id_to_use = 0; - toku_pthread_rwlock_init(&m_lock, NULL); - m_active_filenum.create(); - m_active_fileid.create(); -} - -void cachefile_list::destroy() { - m_active_filenum.destroy(); - m_active_fileid.destroy(); - toku_pthread_rwlock_destroy(&m_lock); -} - -void cachefile_list::read_lock() { - toku_pthread_rwlock_rdlock(&m_lock); -} - -void cachefile_list::read_unlock() { - toku_pthread_rwlock_rdunlock(&m_lock); -} - -void cachefile_list::write_lock() { - toku_pthread_rwlock_wrlock(&m_lock); -} - -void cachefile_list::write_unlock() { - toku_pthread_rwlock_wrunlock(&m_lock); -} -int cachefile_list::cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf) { - read_lock(); - CACHEFILE extant; - int r; - r = ENOENT; - for (extant = m_active_head; extant; extant = extant->next) { - if (extant->fname_in_env && - !strcmp(extant->fname_in_env, iname_in_env)) { - *cf = extant; - r = 0; - break; - } - } - read_unlock(); - return r; -} - -int cachefile_list::cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf) { - read_lock(); - CACHEFILE extant; - int r = ENOENT; - *cf = NULL; - for (extant = m_active_head; extant; extant = extant->next) { - if (extant->filenum.fileid==filenum.fileid) { - *cf = extant; - r = 0; - break; - } - } - read_unlock(); - return r; -} - -static int cachefile_find_by_filenum(const CACHEFILE &a_cf, const FILENUM &b) { - const FILENUM a = a_cf->filenum; - if (a.fileid < b.fileid) { - return -1; - } else if (a.fileid == b.fileid) { - return 0; - } else { - return 1; - } -} - -static int cachefile_find_by_fileid(const CACHEFILE &a_cf, const struct fileid &b) { - return toku_fileid_cmp(a_cf->fileid, b); -} - -void cachefile_list::add_cf_unlocked(CACHEFILE cf) { - invariant(cf->next == NULL); - invariant(cf->prev == NULL); - cf->next = m_active_head; - cf->prev = NULL; - if (m_active_head) { - m_active_head->prev = cf; - } - m_active_head = cf; - - int r; - r = m_active_filenum.insert(cf, cf->filenum, nullptr); - assert_zero(r); - r = m_active_fileid.insert(cf, cf->fileid, nullptr); - assert_zero(r); -} - -void cachefile_list::add_stale_cf(CACHEFILE cf) { - write_lock(); - invariant(cf->next == NULL); - invariant(cf->prev == NULL); - - cf->next = m_stale_head; - cf->prev = NULL; - if (m_stale_head) { - m_stale_head->prev = cf; - } - m_stale_head = cf; - if (m_stale_tail == NULL) { - m_stale_tail = cf; - } - write_unlock(); -} - -void cachefile_list::remove_cf(CACHEFILE cf) { - write_lock(); - invariant(m_active_head != NULL); - if (cf->next) { - cf->next->prev = cf->prev; - } - if (cf->prev) { - cf->prev->next = cf->next; - } - if (cf == m_active_head) { - invariant(cf->prev == NULL); - m_active_head = cf->next; - } - cf->prev = NULL; - cf->next = NULL; - - uint32_t idx; - int r; - r = m_active_filenum.find_zero(cf->filenum, nullptr, &idx); - assert_zero(r); - r = m_active_filenum.delete_at(idx); - assert_zero(r); - - r = m_active_fileid.find_zero(cf->fileid, nullptr, &idx); - assert_zero(r); - r = m_active_fileid.delete_at(idx); - assert_zero(r); - - write_unlock(); -} - -void cachefile_list::remove_stale_cf_unlocked(CACHEFILE cf) { - invariant(m_stale_head != NULL); - invariant(m_stale_tail != NULL); - if (cf->next) { - cf->next->prev = cf->prev; - } - if (cf->prev) { - cf->prev->next = cf->next; - } - if (cf == m_stale_head) { - invariant(cf->prev == NULL); - m_stale_head = cf->next; - } - if (cf == m_stale_tail) { - invariant(cf->next == NULL); - m_stale_tail = cf->prev; - } - cf->prev = NULL; - cf->next = NULL; -} - -FILENUM cachefile_list::reserve_filenum() { - // taking a write lock because we are modifying next_filenum_to_use - write_lock(); - while (1) { - int r = m_active_filenum.find_zero(m_next_filenum_to_use, nullptr, nullptr); - if (r == 0) { - m_next_filenum_to_use.fileid++; - continue; - } - assert(r == DB_NOTFOUND); - break; - } - FILENUM filenum = m_next_filenum_to_use; -#if TOKU_DEBUG_PARANOID - for (CACHEFILE extant = m_active_head; extant; extant = extant->next) { - assert(filenum.fileid != extant->filenum.fileid); - } -#endif - m_next_filenum_to_use.fileid++; - write_unlock(); - return filenum; -} - -uint32_t cachefile_list::get_new_hash_id_unlocked() { - uint32_t retval = m_next_hash_id_to_use; - m_next_hash_id_to_use++; - return retval; -} - -CACHEFILE cachefile_list::find_cachefile_in_list_unlocked( - CACHEFILE start, - struct fileid* fileid - ) -{ - CACHEFILE retval = NULL; - for (CACHEFILE extant = start; extant; extant = extant->next) { - if (toku_fileids_are_equal(&extant->fileid, fileid)) { - // Clients must serialize cachefile open, close, and unlink - // So, during open, we should never see a closing cachefile - // or one that has been marked as unlink on close. - assert(!extant->unlink_on_close); - retval = extant; - goto exit; - } - } -exit: - return retval; -} - -CACHEFILE cachefile_list::find_cachefile_unlocked(struct fileid* fileid) { - CACHEFILE cf = nullptr; - int r = m_active_fileid.find_zero(*fileid, &cf, nullptr); - if (r == 0) { - assert(!cf->unlink_on_close); - } -#if TOKU_DEBUG_PARANOID - assert(cf == find_cachefile_in_list_unlocked(m_active_head, fileid)); -#endif - return cf; -} - -CACHEFILE cachefile_list::find_stale_cachefile_unlocked(struct fileid* fileid) { - return find_cachefile_in_list_unlocked(m_stale_head, fileid); -} - -void cachefile_list::verify_unused_filenum(FILENUM filenum) { - int r = m_active_filenum.find_zero(filenum, nullptr, nullptr); - assert(r == DB_NOTFOUND); -#if TOKU_DEBUG_PARANOID - for (CACHEFILE extant = m_active_head; extant; extant = extant->next) { - invariant(extant->filenum.fileid != filenum.fileid); - } -#endif -} - -// returns true if some eviction ran, false otherwise -bool cachefile_list::evict_some_stale_pair(evictor* ev) { - PAIR p = NULL; - CACHEFILE cf_to_destroy = NULL; - write_lock(); - if (m_stale_tail == NULL) { - write_unlock(); - return false; - } - p = m_stale_tail->cf_head; - // we should not have a cf in the stale list - // that does not have any pairs - paranoid_invariant(p != NULL); - - evict_pair_from_cachefile(p); - - // now that we have evicted something, - // let's check if the cachefile is needed anymore - if (m_stale_tail->cf_head == NULL) { - cf_to_destroy = m_stale_tail; - remove_stale_cf_unlocked(m_stale_tail); - } - - write_unlock(); - - ev->remove_pair_attr(p->attr); - cachetable_free_pair(p); - if (cf_to_destroy) { - cachefile_destroy(cf_to_destroy); - } - return true; -} - -void cachefile_list::free_stale_data(evictor* ev) { - write_lock(); - while (m_stale_tail != NULL) { - PAIR p = m_stale_tail->cf_head; - // we should not have a cf in the stale list - // that does not have any pairs - paranoid_invariant(p != NULL); - - evict_pair_from_cachefile(p); - ev->remove_pair_attr(p->attr); - cachetable_free_pair(p); - - // now that we have evicted something, - // let's check if the cachefile is needed anymore - if (m_stale_tail->cf_head == NULL) { - CACHEFILE cf_to_destroy = m_stale_tail; - remove_stale_cf_unlocked(m_stale_tail); - cachefile_destroy(cf_to_destroy); - } - } - write_unlock(); -} - -void __attribute__((__constructor__)) toku_cachetable_helgrind_ignore(void); -void -toku_cachetable_helgrind_ignore(void) { - TOKU_VALGRIND_HG_DISABLE_CHECKING(&cachetable_miss, sizeof cachetable_miss); - TOKU_VALGRIND_HG_DISABLE_CHECKING(&cachetable_misstime, sizeof cachetable_misstime); - TOKU_VALGRIND_HG_DISABLE_CHECKING(&cachetable_prefetches, sizeof cachetable_prefetches); - TOKU_VALGRIND_HG_DISABLE_CHECKING(&cachetable_evictions, sizeof cachetable_evictions); - TOKU_VALGRIND_HG_DISABLE_CHECKING(&cleaner_executions, sizeof cleaner_executions); - TOKU_VALGRIND_HG_DISABLE_CHECKING(&ct_status, sizeof ct_status); -} - -#undef STATUS_VALUE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cachetable.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cachetable.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,605 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef CACHETABLE_H -#define CACHETABLE_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include "fttypes.h" -#include "minicron.h" - -// Maintain a cache mapping from cachekeys to values (void*) -// Some of the keys can be pinned. Don't pin too many or for too long. -// If the cachetable is too full, it will call the flush_callback() function with the key, the value, and the otherargs -// and then remove the key-value pair from the cache. -// The callback won't be any of the currently pinned keys. -// Also when flushing an object, the cachetable drops all references to it, -// so you may need to free() it. -// Note: The cachetable should use a common pool of memory, flushing things across cachetables. -// (The first implementation doesn't) -// If you pin something twice, you must unpin it twice. -// table_size is the initial size of the cache table hash table (in number of entries) -// size limit is the upper bound of the sum of size of the entries in the cache table (total number of bytes) - -typedef BLOCKNUM CACHEKEY; - -void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period); -uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct); -void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations); -uint32_t toku_get_cleaner_iterations (CACHETABLE ct); -uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct); - -// cachetable operations - -// create and initialize a cache table -// size_limit is the upper limit on the size of the size of the values in the table -// pass 0 if you want the default -int toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, TOKULOGGER); - -// Create a new cachetable. -// Effects: a new cachetable is created and initialized. -// The cachetable pointer is stored into result. -// The sum of the sizes of the memory objects is set to size_limit, in whatever -// units make sense to the user of the cachetable. -// Returns: If success, returns 0 and result points to the new cachetable. Otherwise, -// returns an error number. - -// Returns a pointer to the checkpointer within the given cachetable. -CHECKPOINTER toku_cachetable_get_checkpointer(CACHETABLE ct); - -// What is the cachefile that goes with a particular filenum? -// During a transaction, we cannot reuse a filenum. -int toku_cachefile_of_filenum (CACHETABLE t, FILENUM filenum, CACHEFILE *cf); - -// What is the cachefile that goes with a particular iname (relative to env)? -// During a transaction, we cannot reuse an iname. -int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CACHEFILE *cf); - -// Get the iname (within the cwd) associated with the cachefile -// Return the filename -char *toku_cachefile_fname_in_cwd (CACHEFILE cf); - -void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, TOKULOGGER); - -void toku_cachetable_end_checkpoint(CHECKPOINTER cp, TOKULOGGER logger, - void (*testcallback_f)(void*), void * testextra); - -// Shuts down checkpoint thread -// Requires no locks be held that are taken by the checkpoint function -void toku_cachetable_minicron_shutdown(CACHETABLE ct); - -// Close the cachetable. -// Effects: All of the memory objects are flushed to disk, and the cachetable is destroyed. -void toku_cachetable_close(CACHETABLE *ct); - -// Open a file and bind the file to a new cachefile object. (For use by test programs only.) -int toku_cachetable_openf(CACHEFILE *,CACHETABLE, const char *fname_in_env, int flags, mode_t mode); - -// Bind a file to a new cachefile object. -int toku_cachetable_openfd(CACHEFILE *,CACHETABLE, int fd, - const char *fname_relative_to_env); -int toku_cachetable_openfd_with_filenum (CACHEFILE *,CACHETABLE, int fd, - const char *fname_in_env, - FILENUM filenum, bool* was_open); - -// reserve a unique filenum -FILENUM toku_cachetable_reserve_filenum(CACHETABLE ct); - -// Effect: Reserve a fraction of the cachetable memory. -// Returns the amount reserved. -// To return the memory to the cachetable, call toku_cachetable_release_reserved_memory -// Requires 0 -#include -#include -#include -#include - -////////////////////////////////////////////////////////////////////////////// -// -// This file contains the classes and structs that make up the cachetable. -// The structs are: -// - cachefile -// - ctpair -// - pair_list -// - cachefile_list -// - checkpointer -// - evictor -// - cleaner -// -// The rest of this comment assumes familiarity with the locks used in these -// classes/structs and what the locks protect. Nevertheless, here is -// a list of the locks that we have: -// - pair_list->list_lock -// - pair_list->pending_lock_expensive -// - pair_list->pending_lock_cheap -// - cachefile_list->lock -// - PAIR->mutex -// - PAIR->value_rwlock -// - PAIR->disk_nb_mutex -// -// Here are rules for how the locks interact: -// - To grab any of the pair_list's locks, or the cachefile_list's lock, -// the cachetable must be in existence -// - To grab the PAIR mutex, we must know the PAIR will not dissappear: -// - the PAIR must be pinned (value_rwlock or disk_nb_mutex is held) -// - OR, the pair_list's list lock is held -// - As a result, to get rid of a PAIR from the pair_list, we must hold -// both the pair_list's list_lock and the PAIR's mutex -// - To grab PAIR->value_rwlock, we must hold the PAIR's mutex -// - To grab PAIR->disk_nb_mutex, we must hold the PAIR's mutex -// and hold PAIR->value_rwlock -// -// Now let's talk about ordering. Here is an order from outer to inner (top locks must be grabbed first) -// - pair_list->pending_lock_expensive -// - pair_list->list_lock -// - cachefile_list->lock -// - PAIR->mutex -// - pair_list->pending_lock_cheap <-- after grabbing this lock, -// NO other locks -// should be grabbed. -// - when grabbing PAIR->value_rwlock or PAIR->disk_nb_mutex, -// if the acquisition will not block, then it does not matter if any other locks held, -// BUT if the acquisition will block, then NO other locks may be held besides -// PAIR->mutex. -// -// HERE ARE TWO EXAMPLES: -// To pin a PAIR on a client thread, the following must be done: -// - first grab the list lock and find the PAIR -// - with the list lock grabbed, grab PAIR->mutex -// - with PAIR->mutex held: -// - release list lock -// - pin PAIR -// - with PAIR pinned, grab pending_lock_cheap, -// - copy and clear PAIR->checkpoint_pending, -// - resolve checkpointing if necessary -// - return to user. -// The list lock may be held while pinning the PAIR if -// the PAIR has no contention. Otherwise, we may have -// get a deadlock with another thread that has the PAIR pinned, -// tries to pin some other PAIR, and in doing so, grabs the list lock. -// -// To unpin a PAIR on a client thread: -// - because the PAIR is pinned, we don't need the pair_list's list_lock -// - so, simply acquire PAIR->mutex -// - unpin the PAIR -// - return -// -////////////////////////////////////////////////////////////////////////////// -class evictor; -class pair_list; - -/////////////////////////////////////////////////////////////////////////////// -// -// Maps to a file on disk. -// -struct cachefile { - CACHEFILE next; - CACHEFILE prev; - // these next two fields are protected by cachetable's list lock - // they are managed whenever we add or remove a pair from - // the cachetable. As of Riddler, this linked list is only used to - // make cachetable_flush_cachefile more efficient - PAIR cf_head; // doubly linked list that is NOT circular - uint32_t num_pairs; // count on number of pairs in the cachetable belong to this cachefile - - bool for_checkpoint; //True if part of the in-progress checkpoint - - // If set and the cachefile closes, the file will be removed. - // Clients must not operate on the cachefile after setting this, - // nor attempt to open any cachefile with the same fname (dname) - // until this cachefile has been fully closed and unlinked. - bool unlink_on_close; - int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */ - CACHETABLE cachetable; - struct fileid fileid; - // the filenum is used as an identifer of the cachefile - // for logging and recovery - FILENUM filenum; - // number used to generate hashes for blocks in the cachefile - // used in toku_cachetable_hash - // this used to be the filenum.fileid, but now it is separate - uint32_t hash_id; - char *fname_in_env; /* Used for logging */ - - void *userdata; - void (*log_fassociate_during_checkpoint)(CACHEFILE cf, void *userdata); // When starting a checkpoint we must log all open files. - void (*close_userdata)(CACHEFILE cf, int fd, void *userdata, bool lsnvalid, LSN); // when closing the last reference to a cachefile, first call this function. - void (*free_userdata)(CACHEFILE cf, void *userdata); // when closing the last reference to a cachefile, first call this function. - void (*begin_checkpoint_userdata)(LSN lsn_of_checkpoint, void *userdata); // before checkpointing cachefiles call this function. - void (*checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // when checkpointing a cachefile, call this function. - void (*end_checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // after checkpointing cachefiles call this function. - void (*note_pin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory - void (*note_unpin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory - BACKGROUND_JOB_MANAGER bjm; -}; - - -/////////////////////////////////////////////////////////////////////////////// -// -// The pair represents the data stored in the cachetable. -// -struct ctpair { - // these fields are essentially constants. They do not change. - CACHEFILE cachefile; - CACHEKEY key; - uint32_t fullhash; - CACHETABLE_FLUSH_CALLBACK flush_callback; - CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback; - CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback; - CACHETABLE_CLEANER_CALLBACK cleaner_callback; - CACHETABLE_CLONE_CALLBACK clone_callback; - CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK checkpoint_complete_callback; - void *write_extraargs; - - // access to these fields are protected by disk_nb_mutex - void* cloned_value_data; // cloned copy of value_data used for checkpointing - long cloned_value_size; // size of cloned_value_data, used for accounting of size_current - void* disk_data; // data used to fetch/flush value_data to and from disk. - - // access to these fields are protected by value_rwlock - void* value_data; // data used by client threads, FTNODEs and ROLLBACK_LOG_NODEs - PAIR_ATTR attr; - enum cachetable_dirty dirty; - - // protected by PAIR->mutex - uint32_t count; // clock count - uint32_t refcount; // if > 0, then this PAIR is referenced by - // callers to the cachetable, and therefore cannot - // be evicted - uint32_t num_waiting_on_refs; // number of threads waiting on refcount to go to zero - toku_cond_t refcount_wait; // cond used to wait for refcount to go to zero - - // locks - toku::frwlock value_rwlock; - struct nb_mutex disk_nb_mutex; // single writer, protects disk_data, is used for writing cloned nodes for checkpoint - toku_mutex_t* mutex; // gotten from the pair list - - // Access to checkpoint_pending is protected by two mechanisms, - // the value_rwlock and the pair_list's pending locks (expensive and cheap). - // checkpoint_pending may be true of false. - // Here are the rules for reading/modifying this bit. - // - To transition this field from false to true during begin_checkpoint, - // we must be holding both of the pair_list's pending locks. - // - To transition this field from true to false during end_checkpoint, - // we must be holding the value_rwlock. - // - For a non-checkpoint thread to read the value, we must hold both the - // value_rwlock and one of the pair_list's pending locks - // - For the checkpoint thread to read the value, we must - // hold the value_rwlock - // - bool checkpoint_pending; // If this is on, then we have got to resolve checkpointing modifying it. - - // these are variables that are only used to transfer information to background threads - // we cache them here to avoid a malloc. In the future, we should investigate if this - // is necessary, as having these fields here is not technically necessary - long size_evicting_estimate; - evictor* ev; - pair_list* list; - - // A PAIR is stored in a pair_list (which happens to be PAIR->list). - // These variables are protected by the list lock in the pair_list - // - // clock_next,clock_prev represent a circular doubly-linked list. - PAIR clock_next,clock_prev; // In clock. - PAIR hash_chain; - - // pending_next,pending_next represent a non-circular doubly-linked list. - PAIR pending_next; - PAIR pending_prev; - - // cf_next, cf_prev represent a non-circular doubly-linked list. - // entries in linked list for PAIRs in a cachefile, these are protected - // by the list lock of the PAIR's pair_list. They are used to make - // cachetable_flush_cachefile cheaper so that we don't need - // to search the entire cachetable to find a particular cachefile's - // PAIRs - PAIR cf_next; - PAIR cf_prev; -}; - -// -// This initializes the fields and members of the pair. -// -void pair_init(PAIR p, - CACHEFILE cachefile, - CACHEKEY key, - void *value, - PAIR_ATTR attr, - enum cachetable_dirty dirty, - uint32_t fullhash, - CACHETABLE_WRITE_CALLBACK write_callback, - evictor *ev, - pair_list *list); - - -/////////////////////////////////////////////////////////////////////////////// -// -// The pair list maintains the set of PAIR's that make up -// the cachetable. -// -class pair_list { -public: - // - // the following fields are protected by the list lock - // - uint32_t m_n_in_table; // number of pairs in the hash table - uint32_t m_table_size; // number of buckets in the hash table - uint32_t m_num_locks; - PAIR *m_table; // hash table - toku_mutex_aligned_t *m_mutexes; - // - // The following fields are the heads of various linked lists. - // They also protected by the list lock, but their - // usage is not as straightforward. For each of them, - // only ONE thread is allowed iterate over them with - // a read lock on the list lock. All other threads - // that want to modify elements in the lists or iterate over - // the lists must hold the write list lock. Here is the - // association between what threads may hold a read lock - // on the list lock while iterating: - // - clock_head -> eviction thread (evictor) - // - cleaner_head -> cleaner thread (cleaner) - // - pending_head -> checkpoint thread (checkpointer) - // - PAIR m_clock_head; // of clock . head is the next thing to be up for decrement. - PAIR m_cleaner_head; // for cleaner thread. head is the next thing to look at for possible cleaning. - PAIR m_checkpoint_head; // for begin checkpoint to iterate over PAIRs and mark as pending_checkpoint - PAIR m_pending_head; // list of pairs marked with checkpoint_pending - - // this field is public so we are still POD - - // usage of this lock is described above - toku_pthread_rwlock_t m_list_lock; - // - // these locks are the "pending locks" referenced - // in comments about PAIR->checkpoint_pending. There - // are two of them, but both serve the same purpose, which - // is to protect the transition of a PAIR's checkpoint pending - // value from false to true during begin_checkpoint. - // We use two locks, because threads that want to read the - // checkpoint_pending value may hold a lock for varying periods of time. - // Threads running eviction may need to protect checkpoint_pending - // while writing a node to disk, which is an expensive operation, - // so it uses pending_lock_expensive. Client threads that - // want to pin PAIRs will want to protect checkpoint_pending - // just long enough to read the value and wipe it out. This is - // a cheap operation, and as a result, uses pending_lock_cheap. - // - // By having two locks, and making begin_checkpoint first - // grab pending_lock_expensive and then pending_lock_cheap, - // we ensure that threads that want to pin nodes can grab - // only pending_lock_cheap, and never block behind threads - // holding pending_lock_expensive and writing a node out to disk - // - toku_pthread_rwlock_t m_pending_lock_expensive; - toku_pthread_rwlock_t m_pending_lock_cheap; - void init(); - void destroy(); - void evict_completely(PAIR pair); - void evict_from_cachetable(PAIR pair); - void evict_from_cachefile(PAIR pair); - void add_to_cachetable_only(PAIR p); - void put(PAIR pair); - PAIR find_pair(CACHEFILE file, CACHEKEY key, uint32_t hash); - void pending_pairs_remove (PAIR p); - void verify(); - void get_state(int *num_entries, int *hash_size); - void read_list_lock(); - void read_list_unlock(); - void write_list_lock(); - void write_list_unlock(); - void read_pending_exp_lock(); - void read_pending_exp_unlock(); - void write_pending_exp_lock(); - void write_pending_exp_unlock(); - void read_pending_cheap_lock(); - void read_pending_cheap_unlock(); - void write_pending_cheap_lock(); - void write_pending_cheap_unlock(); - toku_mutex_t* get_mutex_for_pair(uint32_t fullhash); - void pair_lock_by_fullhash(uint32_t fullhash); - void pair_unlock_by_fullhash(uint32_t fullhash); - -private: - void pair_remove (PAIR p); - void remove_from_hash_chain(PAIR p); - void add_to_cf_list (PAIR p); - void add_to_clock (PAIR p); - void add_to_hash_chain(PAIR p); -}; - -/////////////////////////////////////////////////////////////////////////////// -// -// Wrapper for the head of our cachefile list. -// -class cachefile_list { -public: - void init(); - void destroy(); - void read_lock(); - void read_unlock(); - void write_lock(); - void write_unlock(); - int cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf); - int cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf); - void add_cf_unlocked(CACHEFILE newcf); - void add_stale_cf(CACHEFILE newcf); - void remove_cf(CACHEFILE cf); - void remove_stale_cf_unlocked(CACHEFILE cf); - FILENUM reserve_filenum(); - uint32_t get_new_hash_id_unlocked(); - CACHEFILE find_cachefile_unlocked(struct fileid* fileid); - CACHEFILE find_stale_cachefile_unlocked(struct fileid* fileid); - void verify_unused_filenum(FILENUM filenum); - bool evict_some_stale_pair(evictor* ev); - void free_stale_data(evictor* ev); - // access to these fields are protected by the lock - CACHEFILE m_active_head; // head of CACHEFILEs that are active - CACHEFILE m_stale_head; // head of CACHEFILEs that are stale - CACHEFILE m_stale_tail; // tail of CACHEFILEs that are stale - FILENUM m_next_filenum_to_use; - uint32_t m_next_hash_id_to_use; - toku_pthread_rwlock_t m_lock; // this field is publoc so we are still POD - toku::omt m_active_filenum; - toku::omt m_active_fileid; -private: - CACHEFILE find_cachefile_in_list_unlocked(CACHEFILE start, struct fileid* fileid); -}; - - -/////////////////////////////////////////////////////////////////////////////// -// -// The checkpointer handles starting and finishing checkpoints of the -// cachetable's data. -// -class checkpointer { -public: - int init(pair_list *_pl, TOKULOGGER _logger, evictor *_ev, cachefile_list *files); - void destroy(); - void set_checkpoint_period(uint32_t new_period); - uint32_t get_checkpoint_period(); - int shutdown(); - bool has_been_shutdown(); - void begin_checkpoint(); - void add_background_job(); - void remove_background_job(); - void end_checkpoint(void (*testcallback_f)(void*), void* testextra); - TOKULOGGER get_logger(); - // used during begin_checkpoint - void increment_num_txns(); -private: - uint32_t m_checkpoint_num_txns; // how many transactions are in the checkpoint - TOKULOGGER m_logger; - LSN m_lsn_of_checkpoint_in_progress; - uint32_t m_checkpoint_num_files; // how many cachefiles are in the checkpoint - struct minicron m_checkpointer_cron; // the periodic checkpointing thread - cachefile_list *m_cf_list; - pair_list *m_list; - evictor *m_ev; - bool m_checkpointer_cron_init; - bool m_checkpointer_init; - - // variable used by the checkpoint thread to know - // when all work induced by cloning on client threads is done - BACKGROUND_JOB_MANAGER m_checkpoint_clones_bjm; - // private methods for begin_checkpoint - void update_cachefiles(); - void log_begin_checkpoint(); - void turn_on_pending_bits(); - // private methods for end_checkpoint - void fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs); - void checkpoint_pending_pairs(); - void checkpoint_userdata(CACHEFILE* checkpoint_cfs); - void log_end_checkpoint(); - void end_checkpoint_userdata(CACHEFILE* checkpoint_cfs); - void remove_cachefiles(CACHEFILE* checkpoint_cfs); - - // Unit test struct needs access to private members. - friend struct checkpointer_test; -}; - -// -// This is how often we want the eviction thread -// to run, in seconds. -// -const int EVICTION_PERIOD = 1; - -/////////////////////////////////////////////////////////////////////////////// -// -// The evictor handles the removal of pairs from the pair list/cachetable. -// -class evictor { -public: - int init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period); - void destroy(); - void add_pair_attr(PAIR_ATTR attr); - void remove_pair_attr(PAIR_ATTR attr); - void change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr); - void add_to_size_current(long size); - void remove_from_size_current(long size); - uint64_t reserve_memory(double fraction, uint64_t upper_bound); - void release_reserved_memory(uint64_t reserved_memory); - void run_eviction_thread(); - void do_partial_eviction(PAIR p); - void evict_pair(PAIR p, bool checkpoint_pending); - void wait_for_cache_pressure_to_subside(); - void signal_eviction_thread(); - bool should_client_thread_sleep(); - bool should_client_wake_eviction_thread(); - // function needed for testing - void get_state(long *size_current_ptr, long *size_limit_ptr); - void fill_engine_status(); -private: - void run_eviction(); - bool run_eviction_on_pair(PAIR p); - void try_evict_pair(PAIR p); - void decrease_size_evicting(long size_evicting_estimate); - bool should_sleeping_clients_wakeup(); - bool eviction_needed(); - - // We have some intentional races with these variables because we're ok with reading something a little bit old. - // Provide some hooks for reading variables in an unsafe way so that there are function names we can stick in a valgrind suppression. - int64_t unsafe_read_size_current(void) const; - int64_t unsafe_read_size_evicting(void) const; - - pair_list* m_pl; - cachefile_list* m_cf_list; - int64_t m_size_current; // the sum of the sizes of the pairs in the cachetable - // changes to these two values are protected - // by ev_thread_lock - int64_t m_size_reserved; // How much memory is reserved (e.g., by the loader) - int64_t m_size_evicting; // the sum of the sizes of the pairs being written - - // these are constants - int64_t m_low_size_watermark; // target max size of cachetable that eviction thread aims for - int64_t m_low_size_hysteresis; // if cachetable grows to this size, client threads wake up eviction thread upon adding data - int64_t m_high_size_watermark; // if cachetable grows to this size, client threads sleep upon adding data - int64_t m_high_size_hysteresis; // if > cachetable size, then sleeping client threads may wake up - - // used to calculate random numbers - struct random_data m_random_data; - char m_random_statebuf[64]; - - // mutex that protects fields listed immedietly below - toku_mutex_t m_ev_thread_lock; - // the eviction thread - toku_pthread_t m_ev_thread; - // condition variable that controls the sleeping period - // of the eviction thread - toku_cond_t m_ev_thread_cond; - // number of client threads that are currently sleeping - // due to an over-subscribed cachetable - uint32_t m_num_sleepers; - // states if the eviction thread should run. set to true - // in init, set to false during destroy - bool m_run_thread; - // bool that states if the eviction thread is currently running - bool m_ev_thread_is_running; - // period which the eviction thread sleeps - uint32_t m_period_in_seconds; - // condition variable on which client threads wait on when sleeping - // due to an over-subscribed cachetable - toku_cond_t m_flow_control_cond; - - // variables for engine status - PARTITIONED_COUNTER m_size_nonleaf; - PARTITIONED_COUNTER m_size_leaf; - PARTITIONED_COUNTER m_size_rollback; - PARTITIONED_COUNTER m_size_cachepressure; - PARTITIONED_COUNTER m_wait_pressure_count; - PARTITIONED_COUNTER m_wait_pressure_time; - PARTITIONED_COUNTER m_long_wait_pressure_count; - PARTITIONED_COUNTER m_long_wait_pressure_time; - - KIBBUTZ m_kibbutz; - - // this variable is ONLY used for testing purposes - uint64_t m_num_eviction_thread_runs; - - bool m_ev_thread_init; - bool m_evictor_init; - - friend class evictor_test_helpers; - friend class evictor_unit_test; -}; - -/////////////////////////////////////////////////////////////////////////////// -// -// Iterates over the clean head in the pair list, calling the cleaner -// callback on each node in that list. -// -class cleaner { -public: - int init(uint32_t cleaner_iterations, pair_list* _pl, CACHETABLE _ct); - void destroy(void); - uint32_t get_iterations(void); - void set_iterations(uint32_t new_iterations); - uint32_t get_period_unlocked(void); - void set_period(uint32_t new_period); - int run_cleaner(void); - -private: - pair_list* m_pl; - CACHETABLE m_ct; - struct minicron m_cleaner_cron; // the periodic cleaner thread - uint32_t m_cleaner_iterations; // how many times to run the cleaner per - // cleaner period (minicron has a - // minimum period of 1s so if you want - // more frequent cleaner runs you must - // use this) - bool m_cleaner_cron_init; - bool m_cleaner_init; -}; - -/////////////////////////////////////////////////////////////////////////////// -// -// The cachetable is as close to an ENV as we get. -// -struct cachetable { - pair_list list; - cleaner cl; - evictor ev; - checkpointer cp; - cachefile_list cf_list; - - KIBBUTZ client_kibbutz; // pool of worker threads and jobs to do asynchronously for the client. - KIBBUTZ ct_kibbutz; // pool of worker threads and jobs to do asynchronously for the cachetable - KIBBUTZ checkpointing_kibbutz; // small pool for checkpointing cloned pairs - - char *env_dir; -}; - -#endif // End of header guardian. diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/checkpoint.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/checkpoint.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,402 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ident "$Id$" - -/*********** - * The purpose of this file is to implement the high-level logic for - * taking a checkpoint. - * - * There are three locks used for taking a checkpoint. They are listed below. - * - * NOTE: The reader-writer locks may be held by either multiple clients - * or the checkpoint function. (The checkpoint function has the role - * of the writer, the clients have the reader roles.) - * - * - multi_operation_lock - * This is a new reader-writer lock. - * This lock is held by the checkpoint function only for as long as is required to - * to set all the "pending" bits and to create the checkpoint-in-progress versions - * of the header and translation table (btt). - * The following operations must take the multi_operation_lock: - * - any set of operations that must be atomic with respect to begin checkpoint - * - * - checkpoint_safe_lock - * This is a new reader-writer lock. - * This lock is held for the entire duration of the checkpoint. - * It is used to prevent more than one checkpoint from happening at a time - * (the checkpoint function is non-re-entrant), and to prevent certain operations - * that should not happen during a checkpoint. - * The following operations must take the checkpoint_safe lock: - * - delete a dictionary - * - rename a dictionary - * The application can use this lock to disable checkpointing during other sensitive - * operations, such as making a backup copy of the database. - * - * Once the "pending" bits are set and the snapshots are taken of the header and btt, - * most normal database operations are permitted to resume. - * - * - * - *****/ - -#include -#include - -#include "fttypes.h" -#include "cachetable.h" -#include "log-internal.h" -#include "logger.h" -#include "checkpoint.h" -#include -#include -#include - -/////////////////////////////////////////////////////////////////////////////////// -// Engine status -// -// Status is intended for display to humans to help understand system behavior. -// It does not need to be perfectly thread-safe. - -static CHECKPOINT_STATUS_S cp_status; - -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(cp_status, k, c, t, "checkpoint: " l, inc) - -static void -status_init(void) { - // Note, this function initializes the keyname, type, and legend fields. - // Value fields are initialized to zero by compiler. - - STATUS_INIT(CP_PERIOD, CHECKPOINT_PERIOD, UINT64, "period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CP_FOOTPRINT, nullptr, UINT64, "footprint", TOKU_ENGINE_STATUS); - STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN, CHECKPOINT_LAST_BEGAN, UNIXTIME, "last checkpoint began ", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE, CHECKPOINT_LAST_COMPLETE_BEGAN, UNIXTIME, "last complete checkpoint began ", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CP_TIME_LAST_CHECKPOINT_END, CHECKPOINT_LAST_COMPLETE_ENDED, UNIXTIME, "last complete checkpoint ended", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CP_TIME_CHECKPOINT_DURATION, CHECKPOINT_DURATION, UINT64, "time spent during checkpoint (begin and end phases)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CP_TIME_CHECKPOINT_DURATION_LAST, CHECKPOINT_DURATION_LAST, UINT64, "time spent during last checkpoint (begin and end phases)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CP_LAST_LSN, nullptr, UINT64, "last complete checkpoint LSN", TOKU_ENGINE_STATUS); - STATUS_INIT(CP_CHECKPOINT_COUNT, CHECKPOINT_TAKEN, UINT64, "checkpoints taken ", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CP_CHECKPOINT_COUNT_FAIL, CHECKPOINT_FAILED, UINT64, "checkpoints failed", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CP_WAITERS_NOW, nullptr, UINT64, "waiters now", TOKU_ENGINE_STATUS); - STATUS_INIT(CP_WAITERS_MAX, nullptr, UINT64, "waiters max", TOKU_ENGINE_STATUS); - STATUS_INIT(CP_CLIENT_WAIT_ON_MO, nullptr, UINT64, "non-checkpoint client wait on mo lock", TOKU_ENGINE_STATUS); - STATUS_INIT(CP_CLIENT_WAIT_ON_CS, nullptr, UINT64, "non-checkpoint client wait on cs lock", TOKU_ENGINE_STATUS); - - STATUS_INIT(CP_BEGIN_TIME, CHECKPOINT_BEGIN_TIME, UINT64, "checkpoint begin time", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CP_LONG_BEGIN_COUNT, CHECKPOINT_LONG_BEGIN_COUNT, UINT64, "long checkpoint begin count", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(CP_LONG_BEGIN_TIME, CHECKPOINT_LONG_BEGIN_TIME, UINT64, "long checkpoint begin time", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - - cp_status.initialized = true; -} -#undef STATUS_INIT - -#define STATUS_VALUE(x) cp_status.status[x].value.num - -void -toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) { - if (!cp_status.initialized) - status_init(); - STATUS_VALUE(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct); - *statp = cp_status; -} - - - -static LSN last_completed_checkpoint_lsn; - -static toku_mutex_t checkpoint_safe_mutex; -static toku::frwlock checkpoint_safe_lock; -static toku_pthread_rwlock_t multi_operation_lock; -static toku_pthread_rwlock_t low_priority_multi_operation_lock; - -static bool initialized = false; // sanity check -static volatile bool locked_mo = false; // true when the multi_operation write lock is held (by checkpoint) -static volatile bool locked_cs = false; // true when the checkpoint_safe write lock is held (by checkpoint) -static volatile uint64_t toku_checkpoint_long_threshold = 1000000; - -// Note following static functions are called from checkpoint internal logic only, -// and use the "writer" calls for locking and unlocking. - -static void -multi_operation_lock_init(void) { - pthread_rwlockattr_t attr; - pthread_rwlockattr_init(&attr); -#if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP) - pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); -#else - // TODO: need to figure out how to make writer-preferential rwlocks - // happen on osx -#endif - toku_pthread_rwlock_init(&multi_operation_lock, &attr); - toku_pthread_rwlock_init(&low_priority_multi_operation_lock, &attr); - pthread_rwlockattr_destroy(&attr); - locked_mo = false; -} - -static void -multi_operation_lock_destroy(void) { - toku_pthread_rwlock_destroy(&multi_operation_lock); - toku_pthread_rwlock_destroy(&low_priority_multi_operation_lock); -} - -static void -multi_operation_checkpoint_lock(void) { - toku_pthread_rwlock_wrlock(&low_priority_multi_operation_lock); - toku_pthread_rwlock_wrlock(&multi_operation_lock); - locked_mo = true; -} - -static void -multi_operation_checkpoint_unlock(void) { - locked_mo = false; - toku_pthread_rwlock_wrunlock(&multi_operation_lock); - toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock); -} - -static void -checkpoint_safe_lock_init(void) { - toku_mutex_init(&checkpoint_safe_mutex, NULL); - checkpoint_safe_lock.init(&checkpoint_safe_mutex); - locked_cs = false; -} - -static void -checkpoint_safe_lock_destroy(void) { - checkpoint_safe_lock.deinit(); - toku_mutex_destroy(&checkpoint_safe_mutex); -} - -static void -checkpoint_safe_checkpoint_lock(void) { - toku_mutex_lock(&checkpoint_safe_mutex); - checkpoint_safe_lock.write_lock(false); - toku_mutex_unlock(&checkpoint_safe_mutex); - locked_cs = true; -} - -static void -checkpoint_safe_checkpoint_unlock(void) { - locked_cs = false; - toku_mutex_lock(&checkpoint_safe_mutex); - checkpoint_safe_lock.write_unlock(); - toku_mutex_unlock(&checkpoint_safe_mutex); -} - -// toku_xxx_client_(un)lock() functions are only called from client code, -// never from checkpoint code, and use the "reader" interface to the lock functions. - -void -toku_multi_operation_client_lock(void) { - if (locked_mo) - (void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_MO), 1); - toku_pthread_rwlock_rdlock(&multi_operation_lock); -} - -void -toku_multi_operation_client_unlock(void) { - toku_pthread_rwlock_rdunlock(&multi_operation_lock); -} - -void toku_low_priority_multi_operation_client_lock(void) { - toku_pthread_rwlock_rdlock(&low_priority_multi_operation_lock); -} - -void toku_low_priority_multi_operation_client_unlock(void) { - toku_pthread_rwlock_rdunlock(&low_priority_multi_operation_lock); -} - -void -toku_checkpoint_safe_client_lock(void) { - if (locked_cs) - (void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_CS), 1); - toku_mutex_lock(&checkpoint_safe_mutex); - checkpoint_safe_lock.read_lock(); - toku_mutex_unlock(&checkpoint_safe_mutex); - toku_multi_operation_client_lock(); -} - -void -toku_checkpoint_safe_client_unlock(void) { - toku_mutex_lock(&checkpoint_safe_mutex); - checkpoint_safe_lock.read_unlock(); - toku_mutex_unlock(&checkpoint_safe_mutex); - toku_multi_operation_client_unlock(); -} - -// Initialize the checkpoint mechanism, must be called before any client operations. -void -toku_checkpoint_init(void) { - multi_operation_lock_init(); - checkpoint_safe_lock_init(); - initialized = true; -} - -void -toku_checkpoint_destroy(void) { - multi_operation_lock_destroy(); - checkpoint_safe_lock_destroy(); - initialized = false; -} - -#define SET_CHECKPOINT_FOOTPRINT(x) STATUS_VALUE(CP_FOOTPRINT) = footprint_offset + x - - -// Take a checkpoint of all currently open dictionaries -int -toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger, - void (*callback_f)(void*), void * extra, - void (*callback2_f)(void*), void * extra2, - checkpoint_caller_t caller_id) { - int footprint_offset = (int) caller_id * 1000; - - assert(initialized); - - (void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_WAITERS_NOW), 1); - checkpoint_safe_checkpoint_lock(); - (void) toku_sync_fetch_and_sub(&STATUS_VALUE(CP_WAITERS_NOW), 1); - - if (STATUS_VALUE(CP_WAITERS_NOW) > STATUS_VALUE(CP_WAITERS_MAX)) - STATUS_VALUE(CP_WAITERS_MAX) = STATUS_VALUE(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock - - SET_CHECKPOINT_FOOTPRINT(10); - multi_operation_checkpoint_lock(); - SET_CHECKPOINT_FOOTPRINT(20); - toku_ft_open_close_lock(); - - SET_CHECKPOINT_FOOTPRINT(30); - STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL); - uint64_t t_checkpoint_begin_start = toku_current_time_microsec(); - toku_cachetable_begin_checkpoint(cp, logger); - uint64_t t_checkpoint_begin_end = toku_current_time_microsec(); - - toku_ft_open_close_unlock(); - multi_operation_checkpoint_unlock(); - - SET_CHECKPOINT_FOOTPRINT(40); - if (callback_f) { - callback_f(extra); // callback is called with checkpoint_safe_lock still held - } - toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2); - - SET_CHECKPOINT_FOOTPRINT(50); - if (logger) { - last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn; - toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn); - STATUS_VALUE(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn; - } - - SET_CHECKPOINT_FOOTPRINT(60); - STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_END) = time(NULL); - STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN); - STATUS_VALUE(CP_CHECKPOINT_COUNT)++; - uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start; - STATUS_VALUE(CP_BEGIN_TIME) += duration; - if (duration >= toku_checkpoint_long_threshold) { - STATUS_VALUE(CP_LONG_BEGIN_TIME) += duration; - STATUS_VALUE(CP_LONG_BEGIN_COUNT) += 1; - } - STATUS_VALUE(CP_TIME_CHECKPOINT_DURATION) += (uint64_t) ((time_t) STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN)); - STATUS_VALUE(CP_TIME_CHECKPOINT_DURATION_LAST) = (uint64_t) ((time_t) STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN)); - STATUS_VALUE(CP_FOOTPRINT) = 0; - - checkpoint_safe_checkpoint_unlock(); - return 0; -} - -#include -void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void); -void -toku_checkpoint_helgrind_ignore(void) { - TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status); - TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo); - TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs); -} - -#undef SET_CHECKPOINT_FOOTPRINT -#undef STATUS_VALUE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/checkpoint.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/checkpoint.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/checkpoint.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/checkpoint.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,205 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_CHECKPOINT_H -#define TOKU_CHECKPOINT_H - -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ident "$Id$" - -#include "cachetable.h" - -#include - -void toku_set_checkpoint_period(CACHETABLE ct, uint32_t new_period); -//Effect: Change [end checkpoint (n) - begin checkpoint (n+1)] delay to -// new_period seconds. 0 means disable. - -uint32_t toku_get_checkpoint_period_unlocked(CACHETABLE ct); - - -/****** - * - * NOTE: checkpoint_safe_lock is highest level lock - * multi_operation_lock is next level lock - * ydb_big_lock is next level lock - * - * Locks must always be taken in this sequence (highest level first). - * - */ - - -/****** - * Client code must hold the checkpoint_safe lock during the following operations: - * - delete a dictionary via DB->remove - * - delete a dictionary via DB_TXN->abort(txn) (where txn created a dictionary) - * - rename a dictionary //TODO: Handlerton rename needs to take this - * //TODO: Handlerton rename needs to be recoded for transaction recovery - *****/ - -void toku_checkpoint_safe_client_lock(void); - -void toku_checkpoint_safe_client_unlock(void); - - - -/****** - * These functions are called from the ydb level. - * Client code must hold the multi_operation lock during the following operations: - * - insertion into multiple indexes - * - replace into (simultaneous delete/insert on a single key) - *****/ - -void toku_multi_operation_client_lock(void); -void toku_low_priority_multi_operation_client_lock(void); - -void toku_multi_operation_client_unlock(void); -void toku_low_priority_multi_operation_client_unlock(void); - - -// Initialize the checkpoint mechanism, must be called before any client operations. -// Must pass in function pointers to take/release ydb lock. -void toku_checkpoint_init(void); - -void toku_checkpoint_destroy(void); - -typedef enum {SCHEDULED_CHECKPOINT = 0, // "normal" checkpoint taken on checkpoint thread - CLIENT_CHECKPOINT = 1, // induced by client, such as FLUSH LOGS or SAVEPOINT - INDEXER_CHECKPOINT = 2, - STARTUP_CHECKPOINT = 3, - UPGRADE_CHECKPOINT = 4, - RECOVERY_CHECKPOINT = 5, - SHUTDOWN_CHECKPOINT = 6} checkpoint_caller_t; - -// Take a checkpoint of all currently open dictionaries -// Callbacks are called during checkpoint procedure while checkpoint_safe lock is still held. -// Callbacks are primarily intended for use in testing. -// caller_id identifies why the checkpoint is being taken. -int toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger, - void (*callback_f)(void*), void * extra, - void (*callback2_f)(void*), void * extra2, - checkpoint_caller_t caller_id); - - - -/****** - * These functions are called from the ydb level. - * They return status information and have no side effects. - * Some status information may be incorrect because no locks are taken to collect status. - * (If checkpoint is in progress, it may overwrite status info while it is being read.) - *****/ -typedef enum { - CP_PERIOD, - CP_FOOTPRINT, - CP_TIME_LAST_CHECKPOINT_BEGIN, - CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE, - CP_TIME_LAST_CHECKPOINT_END, - CP_TIME_CHECKPOINT_DURATION, - CP_TIME_CHECKPOINT_DURATION_LAST, - CP_LAST_LSN, - CP_CHECKPOINT_COUNT, - CP_CHECKPOINT_COUNT_FAIL, - CP_WAITERS_NOW, // how many threads are currently waiting for the checkpoint_safe lock to perform a checkpoint - CP_WAITERS_MAX, // max threads ever simultaneously waiting for the checkpoint_safe lock to perform a checkpoint - CP_CLIENT_WAIT_ON_MO, // how many times a client thread waited to take the multi_operation lock, not for checkpoint - CP_CLIENT_WAIT_ON_CS, // how many times a client thread waited for the checkpoint_safe lock, not for checkpoint - CP_BEGIN_TIME, - CP_LONG_BEGIN_TIME, - CP_LONG_BEGIN_COUNT, - CP_STATUS_NUM_ROWS // number of rows in this status array. must be last. -} cp_status_entry; - -typedef struct { - bool initialized; - TOKU_ENGINE_STATUS_ROW_S status[CP_STATUS_NUM_ROWS]; -} CHECKPOINT_STATUS_S, *CHECKPOINT_STATUS; - -void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS stat); - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/CMakeLists.txt mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/CMakeLists.txt 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/CMakeLists.txt 2014-10-08 13:19:51.000000000 +0000 @@ -7,15 +7,17 @@ "${CMAKE_CURRENT_BINARY_DIR}/log_header.h" PROPERTIES GENERATED TRUE) -add_executable(logformat logformat.cc) +add_executable(logformat logger/logformat.cc) target_link_libraries(logformat ${LIBTOKUPORTABILITY}_static) +add_space_separated_property(TARGET logformat LINK_FLAGS --coverage) + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_code.cc" OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_print.cc" OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_header.h" COMMAND $ . - DEPENDS logformat + DEPENDS logger/logformat ) add_custom_target( generate_log_code @@ -23,52 +25,52 @@ ) set(FT_SOURCES - background_job_manager - block_allocator - block_table bndata - cachetable - checkpoint - compress - dbufio - fifo + cachetable/background_job_manager + cachetable/cachetable + cachetable/checkpoint + cursor ft ft-cachetable-wrappers ft-flusher ft-hot-flusher - ftloader - ftloader-callback - ft_msg - ft_node-serialize - ft-node-deserialize ft-ops - ft-serialize ft-test-helpers ft-verify - key + loader/callbacks + loader/dbufio + loader/loader + loader/pqueue leafentry le-cursor - logcursor - logfilemgr - logger - log_upgrade - minicron - pqueue - queue - quicklz - recover - rollback - rollback-apply - rollback-ct-callbacks - rollback_log_node_cache - roll - sub_block - txn - txn_child_manager - txn_manager + logger/logcursor + logger/logfilemgr + logger/logger + logger/log_upgrade + logger/recover + msg + msg_buffer + node + pivotkeys + serialize/block_allocator + serialize/block_allocator_strategy + serialize/block_table + serialize/compress + serialize/ft_node-serialize + serialize/ft-node-deserialize + serialize/ft-serialize + serialize/quicklz + serialize/sub_block + txn/rollback + txn/rollback-apply + txn/rollback-ct-callbacks + txn/rollback_log_node_cache + txn/roll + txn/txn + txn/txn_child_manager + txn/txn_manager + txn/xids ule - xids - ybt "${CMAKE_CURRENT_BINARY_DIR}/log_code" "${CMAKE_CURRENT_BINARY_DIR}/log_print" ) @@ -85,24 +87,7 @@ ## link with lzma (which should be static) and link dependers with zlib target_link_libraries(ft LINK_PRIVATE util_static lzma ${LIBTOKUPORTABILITY}) -target_link_libraries(ft LINK_PUBLIC ${ZLIB_LIBRARY} ) +target_link_libraries(ft LINK_PUBLIC z) target_link_libraries(ft_static LINK_PRIVATE lzma) -## build the bins in this directory -foreach(tool tokuftdump tdb_logprint tdb-recover ftverify) - add_executable(${tool} ${tool}.cc) - add_dependencies(${tool} install_tdb_h) - target_link_libraries(${tool} ft_static util_static ${ZLIB_LIBRARY} lzma ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) - add_space_separated_property(TARGET ${tool} COMPILE_FLAGS -fvisibility=hidden) -endforeach(tool) - -# link in math.h library just for this tool. -target_link_libraries(ftverify m) - -install( - TARGETS tokuftdump - COMPONENT Server - DESTINATION ${INSTALL_BINDIR} - ) - add_subdirectory(tests) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/comparator.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/comparator.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/comparator.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/comparator.h 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -85,47 +85,105 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#pragma once - #include #include -#include -#include +#include "portability/memory.h" + +#include "util/dbt.h" + +typedef int (*ft_compare_func)(DB *db, const DBT *a, const DBT *b); + +int toku_keycompare(const void *key1, uint32_t key1len, const void *key2, uint32_t key2len); + +int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default"))); namespace toku { -// a comparator object encapsulates the data necessary for -// comparing two keys in a fractal tree. it further understands -// that points may be positive or negative infinity. - -class comparator { -public: - void set_descriptor(DESCRIPTOR desc) { - m_fake_db.cmp_descriptor = desc; - } - - void create(ft_compare_func cmp, DESCRIPTOR desc) { - m_cmp = cmp; - memset(&m_fake_db, 0, sizeof(m_fake_db)); - m_fake_db.cmp_descriptor = desc; - } - - int compare(const DBT *a, const DBT *b) { - if (toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b)) { - return toku_dbt_infinite_compare(a, b); - } else { - return m_cmp(&m_fake_db, a, b); - } - } - -private: - struct __toku_db m_fake_db; - ft_compare_func m_cmp; -}; + // a comparator object encapsulates the data necessary for + // comparing two keys in a fractal tree. it further understands + // that points may be positive or negative infinity. + + class comparator { + void init(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic) { + _cmp = cmp; + _fake_db->cmp_descriptor = desc; + _memcmp_magic = memcmp_magic; + } + + public: + // This magic value is reserved to mean that the magic has not been set. + static const uint8_t MEMCMP_MAGIC_NONE = 0; + + void create(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic = MEMCMP_MAGIC_NONE) { + XCALLOC(_fake_db); + init(cmp, desc, memcmp_magic); + } + + // inherit the attributes of another comparator, but keep our own + // copy of fake_db that is owned separately from the one given. + void inherit(const comparator &cmp) { + invariant_notnull(_fake_db); + invariant_notnull(cmp._cmp); + invariant_notnull(cmp._fake_db); + init(cmp._cmp, cmp._fake_db->cmp_descriptor, cmp._memcmp_magic); + } + + // like inherit, but doesn't require that the this comparator + // was already created + void create_from(const comparator &cmp) { + XCALLOC(_fake_db); + inherit(cmp); + } + + void destroy() { + toku_free(_fake_db); + } + + const DESCRIPTOR_S *get_descriptor() const { + return _fake_db->cmp_descriptor; + } + + ft_compare_func get_compare_func() const { + return _cmp; + } + + uint8_t get_memcmp_magic() const { + return _memcmp_magic; + } + + bool valid() const { + return _cmp != nullptr; + } + + inline bool dbt_has_memcmp_magic(const DBT *dbt) const { + return *reinterpret_cast(dbt->data) == _memcmp_magic; + } + + int operator()(const DBT *a, const DBT *b) const { + if (__builtin_expect(toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b), 0)) { + return toku_dbt_infinite_compare(a, b); + } else if (_memcmp_magic != MEMCMP_MAGIC_NONE + // If `a' has the memcmp magic.. + && dbt_has_memcmp_magic(a) + // ..then we expect `b' to also have the memcmp magic + && __builtin_expect(dbt_has_memcmp_magic(b), 1)) { + return toku_builtin_compare_fun(nullptr, a, b); + } else { + // yikes, const sadness here + return _cmp(const_cast(_fake_db), a, b); + } + } + + private: + DB *_fake_db; + ft_compare_func _cmp; + uint8_t _memcmp_magic; + }; } /* namespace toku */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/compress.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/compress.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/compress.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/compress.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,294 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." -#ident "$Id$" - -#include -#include - -#include -#include - -#include "compress.h" -#include "memory.h" -#include "quicklz.h" -#include "toku_assert.h" - -static inline enum toku_compression_method -normalize_compression_method(enum toku_compression_method method) -// Effect: resolve "friendly" names like "fast" and "small" into their real values. -{ - switch (method) { - case TOKU_DEFAULT_COMPRESSION_METHOD: - case TOKU_FAST_COMPRESSION_METHOD: - return TOKU_QUICKLZ_METHOD; - case TOKU_SMALL_COMPRESSION_METHOD: - return TOKU_LZMA_METHOD; - default: - return method; // everything else is fine - } -} - -size_t toku_compress_bound (enum toku_compression_method a, size_t size) -// See compress.h for the specification of this function. -{ - a = normalize_compression_method(a); - switch (a) { - case TOKU_NO_COMPRESSION: - return size + 1; - case TOKU_LZMA_METHOD: - return 1+lzma_stream_buffer_bound(size); // We need one extra for the rfc1950-style header byte (bits -03 are TOKU_LZMA_METHOD (1), bits 4-7 are the compression level) - case TOKU_QUICKLZ_METHOD: - return size+400 + 1; // quicklz manual says 400 bytes is enough. We need one more byte for the rfc1950-style header byte. bits 0-3 are 9, bits 4-7 are the QLZ_COMPRESSION_LEVEL. - case TOKU_ZLIB_METHOD: - return compressBound (size); - case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: - return 2+deflateBound(nullptr, size); // We need one extra for the rfc1950-style header byte, and one extra to store windowBits (a bit over cautious about future upgrades maybe). - default: - break; - } - // fall through for bad enum (thus compiler can warn us if we didn't use all the enums - assert(0); return 0; -} - -void toku_compress (enum toku_compression_method a, - // the following types and naming conventions come from zlib.h - Bytef *dest, uLongf *destLen, - const Bytef *source, uLong sourceLen) -// See compress.h for the specification of this function. -{ - static const int zlib_compression_level = 5; - static const int zlib_without_checksum_windowbits = -15; - - a = normalize_compression_method(a); - assert(sourceLen < (1LL << 32)); - switch (a) { - case TOKU_NO_COMPRESSION: - dest[0] = TOKU_NO_COMPRESSION; - memcpy(dest + 1, source, sourceLen); - *destLen = sourceLen + 1; - return; - case TOKU_ZLIB_METHOD: { - int r = compress2(dest, destLen, source, sourceLen, zlib_compression_level); - assert(r == Z_OK); - assert((dest[0]&0xF) == TOKU_ZLIB_METHOD); - return; - } - case TOKU_QUICKLZ_METHOD: { - if (sourceLen==0) { - // quicklz requires at least one byte, so we handle this ourselves - assert(1 <= *destLen); - *destLen = 1; - } else { - qlz_state_compress *XCALLOC(qsc); - size_t actual_destlen = qlz_compress(source, (char*)(dest+1), sourceLen, qsc); - assert(actual_destlen +1 <= *destLen); - *destLen = actual_destlen+1; // add one for the rfc1950-style header byte. - toku_free(qsc); - } - // Fill in that first byte - dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4); - return; - } - case TOKU_LZMA_METHOD: { - const int lzma_compression_level = 2; - if (sourceLen==0) { - // lzma version 4.999 requires at least one byte, so we'll do it ourselves. - assert(1<=*destLen); - *destLen = 1; - } else { - size_t out_pos = 1; - lzma_ret r = lzma_easy_buffer_encode(lzma_compression_level, LZMA_CHECK_NONE, NULL, - source, sourceLen, - dest, &out_pos, *destLen); - assert(out_pos < *destLen); - if (r != LZMA_OK) { - fprintf(stderr, "lzma_easy_buffer_encode() returned %d\n", (int) r); - } - assert(r==LZMA_OK); - *destLen = out_pos; - } - dest[0] = TOKU_LZMA_METHOD + (lzma_compression_level << 4); - - return; - } - case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: { - z_stream strm; - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - strm.next_in = const_cast(source); - strm.avail_in = sourceLen; - int r = deflateInit2(&strm, zlib_compression_level, Z_DEFLATED, - zlib_without_checksum_windowbits, 8, Z_DEFAULT_STRATEGY); - lazy_assert(r == Z_OK); - strm.next_out = dest + 2; - strm.avail_out = *destLen - 2; - r = deflate(&strm, Z_FINISH); - lazy_assert(r == Z_STREAM_END); - r = deflateEnd(&strm); - lazy_assert(r == Z_OK); - *destLen = strm.total_out + 2; - dest[0] = TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD + (zlib_compression_level << 4); - dest[1] = zlib_without_checksum_windowbits; - return; - } - default: - break; - } - // default fall through to error. - assert(0); -} - -void toku_decompress (Bytef *dest, uLongf destLen, - const Bytef *source, uLongf sourceLen) -// See compress.h for the specification of this function. -{ - assert(sourceLen>=1); // need at least one byte for the RFC header. - switch (source[0] & 0xF) { - case TOKU_NO_COMPRESSION: - memcpy(dest, source + 1, sourceLen - 1); - return; - case TOKU_ZLIB_METHOD: { - uLongf actual_destlen = destLen; - int r = uncompress(dest, &actual_destlen, source, sourceLen); - assert(r == Z_OK); - assert(actual_destlen == destLen); - return; - } - case TOKU_QUICKLZ_METHOD: - if (sourceLen>1) { - toku::scoped_calloc state_buf(sizeof(qlz_state_decompress)); - qlz_state_decompress *qsd = reinterpret_cast(state_buf.get()); - uLongf actual_destlen = qlz_decompress((char*)source+1, dest, qsd); - assert(actual_destlen == destLen); - } else { - // length 1 means there is no data, so do nothing. - assert(destLen==0); - } - return; - case TOKU_LZMA_METHOD: { - if (sourceLen>1) { - uint64_t memlimit = UINT64_MAX; - size_t out_pos = 0; - size_t in_pos = 1; - lzma_ret r = lzma_stream_buffer_decode(&memlimit, // memlimit, use UINT64_MAX to disable this check - 0, // flags - NULL, // allocator - source, &in_pos, sourceLen, - dest, &out_pos, destLen); - assert(r==LZMA_OK); - assert(out_pos == destLen); - } else { - // length 1 means there is no data, so do nothing. - assert(destLen==0); - } - return; - } - case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: { - z_stream strm; - strm.next_in = const_cast(source + 2); - strm.avail_in = sourceLen - 2; - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - char windowBits = source[1]; - int r = inflateInit2(&strm, windowBits); - lazy_assert(r == Z_OK); - strm.next_out = dest; - strm.avail_out = destLen; - r = inflate(&strm, Z_FINISH); - lazy_assert(r == Z_STREAM_END); - r = inflateEnd(&strm); - lazy_assert(r == Z_OK); - return; - } - } - // default fall through to error. - assert(0); -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/compress.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/compress.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/compress.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/compress.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,135 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#ifndef TOKU_COMPRESS_H -#define TOKU_COMPRESS_H - - -#include -#include - -// The following provides an abstraction of quicklz and zlib. -// We offer three compression methods: ZLIB, QUICKLZ, and LZMA, as well as a "no compression" option. These options are declared in make_tdb.c. -// The resulting byte string includes enough information for us to decompress it. That is, we can tell whether it's z-compressed or qz-compressed or xz-compressed. - -size_t toku_compress_bound (enum toku_compression_method a, size_t size); -// Effect: Return the number of bytes needed to compress a buffer of size SIZE using compression method A. -// Typically, the result is a little bit larger than SIZE, since some data cannot be compressed. -// Usage note: It may help to know roughly how much space is involved. -// zlib's bound is something like (size + (size>>12) + (size>>14) + (size>>25) + 13. -// quicklz's bound is something like size+400. - -void toku_compress (enum toku_compression_method a, - // the following types and naming conventions come from zlib.h - Bytef *dest, uLongf *destLen, - const Bytef *source, uLong sourceLen); -// Effect: Using compression method A, compress SOURCE into DEST. The number of bytes to compress is passed in SOURCELEN. -// On input: *destLen is the size of the buffer. -// On output: *destLen is the size of the actual compressed data. -// Usage note: sourceLen may be be zero (unlike for quicklz, which requires sourceLen>0). -// Requires: The buffer must be big enough to hold the compressed data. (That is *destLen >= compressBound(a, sourceLen)) -// Requires: sourceLen < 2^32. -// Usage note: Although we *try* to assert if the DESTLEN isn't big enough, it's possible that it's too late by then (in the case of quicklz which offers -// no way to avoid a buffer overrun.) So we require that that DESTLEN is big enough. -// Rationale: zlib's argument order is DEST then SOURCE with the size of the buffer passed in *destLen, and the size of the result returned in *destLen. -// quicklz's argument order is SOURCE then DEST with the size returned (and it has no way to verify that an overright didn't happen). -// We use zlib's calling conventions partly because it is safer, and partly because it is more established. -// We also use zlib's ugly camel case convention for destLen and sourceLen. -// Unlike zlib, we return no error codes. Instead, we require that the data be OK and the size of the buffers is OK, and assert if there's a problem. - -void toku_decompress (Bytef *dest, uLongf destLen, - const Bytef *source, uLongf sourceLen); -// Effect: Decompress source (length sourceLen) into dest (length destLen) -// This function can decompress data compressed with either zlib or quicklz compression methods (calling toku_compress(), which puts an appropriate header on so we know which it is.) -// Requires: destLen is equal to the actual decompressed size of the data. -// Requires: The source must have been properly compressed. - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cursor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cursor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cursor.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cursor.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,507 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include + +#include "ft/ft-internal.h" + +#include "ft/cursor.h" +#include "ft/leafentry.h" +#include "ft/txn/txn.h" +#include "util/dbt.h" + +int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn, + bool is_snapshot_read, + bool disable_prefetching, + bool is_temporary) { + if (is_snapshot_read) { + invariant(ttxn != NULL); + int accepted = toku_txn_reads_txnid(ft_handle->ft->h->root_xid_that_created, ttxn); + if (accepted != TOKUDB_ACCEPT) { + invariant(accepted == 0); + return TOKUDB_MVCC_DICTIONARY_TOO_NEW; + } + } + + memset(cursor, 0, sizeof(*cursor)); + cursor->ft_handle = ft_handle; + cursor->ttxn = ttxn; + cursor->is_snapshot_read = is_snapshot_read; + cursor->disable_prefetching = disable_prefetching; + cursor->is_temporary = is_temporary; + return 0; +} + +void toku_ft_cursor_destroy(FT_CURSOR cursor) { + toku_destroy_dbt(&cursor->key); + toku_destroy_dbt(&cursor->val); + toku_destroy_dbt(&cursor->range_lock_left_key); + toku_destroy_dbt(&cursor->range_lock_right_key); +} + +// deprecated, should only be used by tests +int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *cursorptr, TOKUTXN ttxn, + bool is_snapshot_read, bool disable_prefetching) { + FT_CURSOR XCALLOC(cursor); + int r = toku_ft_cursor_create(ft_handle, cursor, ttxn, is_snapshot_read, disable_prefetching, false); + if (r == 0) { + *cursorptr = cursor; + } else { + toku_free(cursor); + } + return r; +} + +// deprecated, should only be used by tests +void toku_ft_cursor_close(FT_CURSOR cursor) { + toku_ft_cursor_destroy(cursor); + toku_free(cursor); +} + +void toku_ft_cursor_remove_restriction(FT_CURSOR cursor) { + cursor->out_of_range_error = 0; + cursor->direction = 0; +} + +void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra) { + cursor->interrupt_cb = cb; + cursor->interrupt_cb_extra = extra; +} + +void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor) { + cursor->is_leaf_mode = true; +} + +int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor) { + return cursor->is_leaf_mode; +} + +// TODO: Rename / cleanup - this has nothing to do with locking +void toku_ft_cursor_set_range_lock(FT_CURSOR cursor, + const DBT *left, const DBT *right, + bool left_is_neg_infty, bool right_is_pos_infty, + int out_of_range_error) { + // Destroy any existing keys and then clone the given left, right keys + toku_destroy_dbt(&cursor->range_lock_left_key); + if (left_is_neg_infty) { + cursor->left_is_neg_infty = true; + } else { + toku_clone_dbt(&cursor->range_lock_left_key, *left); + } + + toku_destroy_dbt(&cursor->range_lock_right_key); + if (right_is_pos_infty) { + cursor->right_is_pos_infty = true; + } else { + toku_clone_dbt(&cursor->range_lock_right_key, *right); + } + + // TOKUDB_FOUND_BUT_REJECTED is a DB_NOTFOUND with instructions to stop looking. (Faster) + cursor->out_of_range_error = out_of_range_error == DB_NOTFOUND ? TOKUDB_FOUND_BUT_REJECTED : out_of_range_error; + cursor->direction = 0; +} + +void toku_ft_cursor_set_prefetching(FT_CURSOR cursor) { + cursor->prefetching = true; +} + +bool toku_ft_cursor_prefetching(FT_CURSOR cursor) { + return cursor->prefetching; +} + +//Return true if cursor is uninitialized. false otherwise. +bool toku_ft_cursor_not_set(FT_CURSOR cursor) { + assert((cursor->key.data==NULL) == (cursor->val.data==NULL)); + return (bool)(cursor->key.data == NULL); +} + +struct ft_cursor_search_struct { + FT_GET_CALLBACK_FUNCTION getf; + void *getf_v; + FT_CURSOR cursor; + ft_search *search; +}; + +/* search for the first kv pair that matches the search object */ +static int ft_cursor_search(FT_CURSOR cursor, ft_search *search, + FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool can_bulk_fetch) { + int r = toku_ft_search(cursor->ft_handle, search, getf, getf_v, cursor, can_bulk_fetch); + return r; +} + +static inline int compare_k_x(FT_HANDLE ft_handle, const DBT *k, const DBT *x) { + return ft_handle->ft->cmp(k, x); +} + +int toku_ft_cursor_compare_one(const ft_search &UU(search), const DBT *UU(x)) { + return 1; +} + +static int ft_cursor_compare_set(const ft_search &search, const DBT *x) { + FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); + return compare_k_x(ft_handle, search.k, x) <= 0; /* return min xy: kv <= xy */ +} + +static int +ft_cursor_current_getf(uint32_t keylen, const void *key, + uint32_t vallen, const void *val, + void *v, bool lock_only) { + struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v); + int r; + if (key==NULL) { + r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); + } else { + FT_CURSOR cursor = bcss->cursor; + DBT newkey; + toku_fill_dbt(&newkey, key, keylen); + if (compare_k_x(cursor->ft_handle, &cursor->key, &newkey) != 0) { + r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); // This was once DB_KEYEMPTY + if (r==0) r = TOKUDB_FOUND_BUT_REJECTED; + } + else + r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only); + } + return r; +} + +static int ft_cursor_compare_next(const ft_search &search, const DBT *x) { + FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); + return compare_k_x(ft_handle, search.k, x) < 0; /* return min xy: kv < xy */ +} + +int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + if (toku_ft_cursor_not_set(cursor)) { + return EINVAL; + } + cursor->direction = 0; + if (op == DB_CURRENT) { + struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, 0}; + ft_search search; + ft_search_init(&search, ft_cursor_compare_set, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle); + int r = toku_ft_search(cursor->ft_handle, &search, ft_cursor_current_getf, &bcss, cursor, false); + ft_search_finish(&search); + return r; + } + return getf(cursor->key.size, cursor->key.data, cursor->val.size, cursor->val.data, getf_v, false); // ft_cursor_copyout(cursor, outkey, outval); +} + +int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = 0; + ft_search search; + ft_search_init(&search, toku_ft_cursor_compare_one, FT_SEARCH_LEFT, nullptr, nullptr, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, false); + ft_search_finish(&search); + return r; +} + +int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = 0; + ft_search search; + ft_search_init(&search, toku_ft_cursor_compare_one, FT_SEARCH_RIGHT, nullptr, nullptr, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, false); + ft_search_finish(&search); + return r; +} + +int toku_ft_cursor_check_restricted_range(FT_CURSOR c, const void *key, uint32_t keylen) { + if (c->out_of_range_error) { + FT ft = c->ft_handle->ft; + DBT found_key; + toku_fill_dbt(&found_key, key, keylen); + if ((!c->left_is_neg_infty && c->direction <= 0 && ft->cmp(&found_key, &c->range_lock_left_key) < 0) || + (!c->right_is_pos_infty && c->direction >= 0 && ft->cmp(&found_key, &c->range_lock_right_key) > 0)) { + invariant(c->out_of_range_error); + return c->out_of_range_error; + } + } + // Reset cursor direction to mitigate risk if some query type doesn't set the direction. + // It is always correct to check both bounds (which happens when direction==0) but it can be slower. + c->direction = 0; + return 0; +} + +int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd, + FT_GET_CALLBACK_FUNCTION getf, void *getf_v, + uint32_t *keylen, void **key, uint32_t *vallen, void **val) { + int r = 0; + // if we are searching towards the end, limit is last element + // if we are searching towards the beginning, limit is the first element + uint32_t limit = (direction > 0) ? (bd->num_klpairs() - 1) : 0; + + //Starting with the prev, find the first real (non-provdel) leafentry. + while (index != limit) { + index += direction; + LEAFENTRY le; + void* foundkey = NULL; + uint32_t foundkeylen = 0; + + r = bd->fetch_klpair(index, &le, &foundkeylen, &foundkey); + invariant_zero(r); + + if (toku_ft_cursor_is_leaf_mode(cursor) || !le_val_is_del(le, cursor->is_snapshot_read, cursor->ttxn)) { + le_extract_val( + le, + toku_ft_cursor_is_leaf_mode(cursor), + cursor->is_snapshot_read, + cursor->ttxn, + vallen, + val + ); + *key = foundkey; + *keylen = foundkeylen; + + cursor->direction = direction; + r = toku_ft_cursor_check_restricted_range(cursor, *key, *keylen); + if (r!=0) { + paranoid_invariant(r == cursor->out_of_range_error); + // We already got at least one entry from the bulk fetch. + // Return 0 (instead of out of range error). + r = 0; + break; + } + r = getf(*keylen, *key, *vallen, *val, getf_v, false); + if (r == TOKUDB_CURSOR_CONTINUE) { + continue; + } + else { + break; + } + } + } + + return r; +} + +int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = +1; + ft_search search; + ft_search_init(&search, ft_cursor_compare_next, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, true); + ft_search_finish(&search); + if (r == 0) { + toku_ft_cursor_set_prefetching(cursor); + } + return r; +} + +static int ft_cursor_search_eq_k_x_getf(uint32_t keylen, const void *key, + uint32_t vallen, const void *val, + void *v, bool lock_only) { + struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v); + int r; + if (key==NULL) { + r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, false); + } else { + FT_CURSOR cursor = bcss->cursor; + DBT newkey; + toku_fill_dbt(&newkey, key, keylen); + if (compare_k_x(cursor->ft_handle, bcss->search->k, &newkey) == 0) { + r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only); + } else { + r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); + if (r==0) r = TOKUDB_FOUND_BUT_REJECTED; + } + } + return r; +} + +/* search for the kv pair that matches the search object and is equal to k */ +static int ft_cursor_search_eq_k_x(FT_CURSOR cursor, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, search}; + int r = toku_ft_search(cursor->ft_handle, search, ft_cursor_search_eq_k_x_getf, &bcss, cursor, false); + return r; +} + +static int ft_cursor_compare_prev(const ft_search &search, const DBT *x) { + FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); + return compare_k_x(ft_handle, search.k, x) > 0; /* return max xy: kv > xy */ +} + +int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = -1; + ft_search search; + ft_search_init(&search, ft_cursor_compare_prev, FT_SEARCH_RIGHT, &cursor->key, nullptr, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, true); + ft_search_finish(&search); + return r; +} + +int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x) { + FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); + return compare_k_x(ft_handle, search.k, x) <= 0; /* return kv <= xy */ +} + +int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = 0; + ft_search search; + ft_search_init(&search, toku_ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, nullptr, cursor->ft_handle); + int r = ft_cursor_search_eq_k_x(cursor, &search, getf, getf_v); + ft_search_finish(&search); + return r; +} + +int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = 0; + ft_search search; + ft_search_init(&search, toku_ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, key_bound, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, false); + ft_search_finish(&search); + return r; +} + +static int ft_cursor_compare_set_range_reverse(const ft_search &search, const DBT *x) { + FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); + return compare_k_x(ft_handle, search.k, x) >= 0; /* return kv >= xy */ +} + +int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = 0; + ft_search search; + ft_search_init(&search, ft_cursor_compare_set_range_reverse, FT_SEARCH_RIGHT, key, nullptr, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, false); + ft_search_finish(&search); + return r; +} + +//TODO: When tests have been rewritten, get rid of this function. +//Only used by tests. +int toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags) { + int op = get_flags & DB_OPFLAGS_MASK; + if (get_flags & ~DB_OPFLAGS_MASK) + return EINVAL; + + switch (op) { + case DB_CURRENT: + case DB_CURRENT_BINDING: + return toku_ft_cursor_current(cursor, op, getf, getf_v); + case DB_FIRST: + return toku_ft_cursor_first(cursor, getf, getf_v); + case DB_LAST: + return toku_ft_cursor_last(cursor, getf, getf_v); + case DB_NEXT: + if (toku_ft_cursor_not_set(cursor)) { + return toku_ft_cursor_first(cursor, getf, getf_v); + } else { + return toku_ft_cursor_next(cursor, getf, getf_v); + } + case DB_PREV: + if (toku_ft_cursor_not_set(cursor)) { + return toku_ft_cursor_last(cursor, getf, getf_v); + } else { + return toku_ft_cursor_prev(cursor, getf, getf_v); + } + case DB_SET: + return toku_ft_cursor_set(cursor, key, getf, getf_v); + case DB_SET_RANGE: + return toku_ft_cursor_set_range(cursor, key, nullptr, getf, getf_v); + default: ;// Fall through + } + return EINVAL; +} + +void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval) { + *pkey = &cursor->key; + *pval = &cursor->val; +} + +bool toku_ft_cursor_uninitialized(FT_CURSOR c) { + return toku_ft_cursor_not_set(c); +} + +int toku_ft_lookup(FT_HANDLE ft_handle, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + FT_CURSOR cursor; + int r = toku_ft_cursor(ft_handle, &cursor, NULL, false, false); + if (r != 0) { + return r; + } + + r = toku_ft_cursor_set(cursor, k, getf, getf_v); + + toku_ft_cursor_close(cursor); + return r; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cursor.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cursor.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/cursor.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/cursor.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,237 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#include + +#include "ft/ft-internal.h" + +/* an ft cursor is represented as a kv pair in a tree */ +struct ft_cursor { + FT_HANDLE ft_handle; + DBT key, val; // The key-value pair that the cursor currently points to + DBT range_lock_left_key, range_lock_right_key; + bool prefetching; + bool left_is_neg_infty, right_is_pos_infty; + bool is_snapshot_read; // true if query is read_committed, false otherwise + bool is_leaf_mode; + bool disable_prefetching; + bool is_temporary; + int out_of_range_error; + int direction; + TOKUTXN ttxn; + FT_CHECK_INTERRUPT_CALLBACK interrupt_cb; + void *interrupt_cb_extra; +}; +typedef struct ft_cursor *FT_CURSOR; + +enum ft_search_direction_e { + FT_SEARCH_LEFT = 1, /* search left -> right, finds min xy as defined by the compare function */ + FT_SEARCH_RIGHT = 2, /* search right -> left, finds max xy as defined by the compare function */ +}; + +struct ft_search; + +/* the search compare function should return 0 for all xy < kv and 1 for all xy >= kv + the compare function should be a step function from 0 to 1 for a left to right search + and 1 to 0 for a right to left search */ + +typedef int (*ft_search_compare_func_t)(const struct ft_search &, const DBT *); + +/* the search object contains the compare function, search direction, and the kv pair that + is used in the compare function. the context is the user's private data */ + +struct ft_search { + ft_search_compare_func_t compare; + enum ft_search_direction_e direction; + const DBT *k; + void *context; + + // To fix #3522, we need to remember the pivots that we have searched unsuccessfully. + // For example, when searching right (left), we call search->compare() on the ith pivot key. If search->compare(0 returns + // nonzero, then we search the ith subtree. If that subsearch returns DB_NOTFOUND then maybe the key isn't present in the + // tree. But maybe we are doing a DB_NEXT (DB_PREV), and everything was deleted. So we remember the pivot, and later we + // will only search subtrees which contain keys that are bigger than (less than) the pivot. + // The code is a kludge (even before this fix), and interacts strangely with the TOKUDB_FOUND_BUT_REJECTED (which is there + // because a failed DB_GET we would keep searching the rest of the tree). We probably should write the various lookup + // codes (NEXT, PREV, CURRENT, etc) more directly, and we should probably use a binary search within a node to search the + // pivots so that we can support a larger fanout. + // These changes (3312+3522) also (probably) introduce an isolation error (#3529). + // We must make sure we lock the right range for proper isolation level. + // There's probably a bug in which the following could happen. + // Thread A: Searches through deleted keys A,B,D,E and finds nothing, so searches the next leaf, releasing the YDB lock. + // Thread B: Inserts key C, and acquires the write lock, then commits. + // Thread A: Resumes, searching F,G,H and return success. Thread A then read-locks the range A-H, and doesn't notice + // the value C inserted by thread B. Thus a failure of serialization. + // See #3529. + // There also remains a potential thrashing problem. When we get a TOKUDB_TRY_AGAIN, we unpin everything. There's + // no guarantee that we will get everything pinned again. We ought to keep nodes pinned when we retry, except that on the + // way out with a DB_NOTFOUND we ought to unpin those nodes. See #3528. + DBT pivot_bound; + const DBT *k_bound; +}; + +/* initialize the search compare object */ +static inline ft_search *ft_search_init(ft_search *search, ft_search_compare_func_t compare, + enum ft_search_direction_e direction, + const DBT *k, const DBT *k_bound, void *context) { + search->compare = compare; + search->direction = direction; + search->k = k; + search->context = context; + toku_init_dbt(&search->pivot_bound); + search->k_bound = k_bound; + return search; +} + +static inline void ft_search_finish(ft_search *search) { + toku_destroy_dbt(&search->pivot_bound); +} + + +int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN txn, + bool is_snapshot_read, + bool disable_prefetching, + bool is_temporary); + +void toku_ft_cursor_destroy(FT_CURSOR cursor); + +int toku_ft_lookup(FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +void toku_ft_cursor_set_prefetching(FT_CURSOR cursor); + +bool toku_ft_cursor_prefetching(FT_CURSOR cursor); + +bool toku_ft_cursor_not_set(FT_CURSOR cursor); + +void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor); + +void toku_ft_cursor_remove_restriction(FT_CURSOR cursor); + +void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra); + +int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor); + +void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int); + +int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +bool toku_ft_cursor_uninitialized(FT_CURSOR cursor) __attribute__ ((warn_unused_result)); + +void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval); + +int toku_ft_cursor_check_restricted_range(FT_CURSOR cursor, const void *key, uint32_t keylen); + +int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd, + FT_GET_CALLBACK_FUNCTION getf, void *getf_v, + uint32_t *keylen, void **key, uint32_t *vallen, void **val); + +// used by get_key_after_bytes +int toku_ft_cursor_compare_one(const ft_search &search, const DBT *x); +int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x); + +// deprecated, should only be used by tests, and eventually removed +int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *ftcursor_p, TOKUTXN txn, bool, bool) __attribute__ ((warn_unused_result)); +void toku_ft_cursor_close(FT_CURSOR cursor); +int toku_ft_cursor_get(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags); +int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/dbufio.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/dbufio.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/dbufio.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/dbufio.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,628 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "dbufio.h" -#include "fttypes.h" -#include -#include -#include -#include "memory.h" -#include -#include "ftloader-internal.h" -#include "ft-internal.h" -#include "ft.h" - -struct dbufio_file { - // i/o thread owns these - int fd; - - // consumers own these - size_t offset_in_buf; - toku_off_t offset_in_uncompressed_file; - - // need the mutex to modify these - struct dbufio_file *next; - bool second_buf_ready; // if true, the i/o thread is not touching anything. - - // consumers own [0], i/o thread owns [1], they are swapped by the consumer only when the condition mutex is held and second_buf_ready is true. - char *buf[2]; - size_t n_in_buf[2]; - int error_code[2]; // includes errno or eof. [0] is the error code associated with buf[0], [1] is the code for buf[1] - - bool io_done; -}; - - -/* A dbufio_fileset */ -struct dbufio_fileset { - // The mutex/condition variables protect - // the singly-linked list of files that need I/O (head/tail in the fileset, and next in each file) - // in each file: - // the second_buf_ready boolean (which says the second buffer is full of data). - // the swapping of the buf[], n_in_buf[], and error_code[] values. - toku_mutex_t mutex; - toku_cond_t cond; - int N; // How many files. This is constant once established. - int n_not_done; // how many of the files require more I/O? Owned by the i/o thread. - struct dbufio_file *files; // an array of length N. - struct dbufio_file *head, *tail; // must have the mutex to fiddle with these. - size_t bufsize; // the bufsize is the constant (the same for all buffers). - - bool panic; - bool compressed; - int panic_errno; - toku_pthread_t iothread; -}; - - -static void enq (DBUFIO_FILESET bfs, struct dbufio_file *f) { - if (bfs->tail==NULL) { - bfs->head = f; - } else { - bfs->tail->next = f; - } - bfs->tail = f; - f->next = NULL; -} - -static void panic (DBUFIO_FILESET bfs, int r) { - if (bfs->panic) return; - bfs->panic_errno = r; // Don't really care about a race on this variable... Writes to it are atomic, so at least one good panic reason will be stored. - bfs->panic = true; - return; -} - -static bool paniced (DBUFIO_FILESET bfs) { - return bfs->panic; -} - -static ssize_t dbf_read_some_compressed(struct dbufio_file *dbf, char *buf, size_t bufsize) { - ssize_t ret; - invariant(bufsize >= MAX_UNCOMPRESSED_BUF); - unsigned char *raw_block = NULL; - - // deserialize the sub block header - - // total_size - // num_sub_blocks - // compressed_size,uncompressed_size,xsum (repeated num_sub_blocks times) - ssize_t readcode; - const uint32_t header_size = sizeof(uint32_t); - char header[header_size]; - - readcode = toku_os_read(dbf->fd, &header, header_size); - if (readcode < 0) { - ret = -1; - goto exit; - } - if (readcode == 0) { - ret = 0; - goto exit; - } - if (readcode < header_size) { - errno = TOKUDB_NO_DATA; - ret = -1; - goto exit; - } - uint32_t total_size; - { - uint32_t *p = (uint32_t *) &header[0]; - total_size = toku_dtoh32(p[0]); - } - if (total_size == 0 || total_size > (1<<30)) { - errno = toku_db_badformat(); - ret = -1; - goto exit; - } - - //Cannot use XMALLOC - MALLOC_N(total_size, raw_block); - if (raw_block == nullptr) { - errno = ENOMEM; - ret = -1; - goto exit; - } - readcode = toku_os_read(dbf->fd, raw_block, total_size); - if (readcode < 0) { - ret = -1; - goto exit; - } - if (readcode < total_size) { - errno = TOKUDB_NO_DATA; - ret = -1; - goto exit; - } - - struct sub_block sub_block[max_sub_blocks]; - uint32_t *sub_block_header; - sub_block_header = (uint32_t *) &raw_block[0]; - int32_t n_sub_blocks; - n_sub_blocks = toku_dtoh32(sub_block_header[0]); - sub_block_header++; - size_t size_subblock_header; - size_subblock_header = sub_block_header_size(n_sub_blocks); - if (n_sub_blocks == 0 || n_sub_blocks > max_sub_blocks || size_subblock_header > total_size) { - errno = toku_db_badformat(); - ret = -1; - goto exit; - } - for (int i = 0; i < n_sub_blocks; i++) { - sub_block_init(&sub_block[i]); - sub_block[i].compressed_size = toku_dtoh32(sub_block_header[0]); - sub_block[i].uncompressed_size = toku_dtoh32(sub_block_header[1]); - sub_block[i].xsum = toku_dtoh32(sub_block_header[2]); - sub_block_header += 3; - } - - // verify sub block sizes - size_t total_compressed_size; - total_compressed_size = 0; - for (int i = 0; i < n_sub_blocks; i++) { - uint32_t compressed_size = sub_block[i].compressed_size; - if (compressed_size<=0 || compressed_size>(1<<30)) { - errno = toku_db_badformat(); - ret = -1; - goto exit; - } - - uint32_t uncompressed_size = sub_block[i].uncompressed_size; - if (uncompressed_size<=0 || uncompressed_size>(1<<30)) { - errno = toku_db_badformat(); - ret = -1; - goto exit; - } - total_compressed_size += compressed_size; - } - if (total_size != total_compressed_size + size_subblock_header) { - errno = toku_db_badformat(); - ret = -1; - goto exit; - } - - // sum up the uncompressed size of the sub blocks - size_t uncompressed_size; - uncompressed_size = get_sum_uncompressed_size(n_sub_blocks, sub_block); - if (uncompressed_size > bufsize || uncompressed_size > MAX_UNCOMPRESSED_BUF) { - errno = toku_db_badformat(); - ret = -1; - goto exit; - } - - unsigned char *uncompressed_data; - uncompressed_data = (unsigned char *)buf; - - // point at the start of the compressed data (past the node header, the sub block header, and the header checksum) - unsigned char *compressed_data; - compressed_data = raw_block + size_subblock_header; - - // decompress all the compressed sub blocks into the uncompressed buffer - { - int r; - r = decompress_all_sub_blocks(n_sub_blocks, sub_block, compressed_data, uncompressed_data, get_num_cores(), get_ft_pool()); - if (r != 0) { - fprintf(stderr, "%s:%d loader failed %d at %p size %" PRIu32"\n", __FUNCTION__, __LINE__, r, raw_block, total_size); - dump_bad_block(raw_block, total_size); - errno = r; - ret = -1; - goto exit; - } - } - ret = uncompressed_size; -exit: - if (raw_block) { - toku_free(raw_block); - } - return ret; -} - -static ssize_t dbf_read_compressed(struct dbufio_file *dbf, char *buf, size_t bufsize) { - invariant(bufsize >= MAX_UNCOMPRESSED_BUF); - size_t count = 0; - - while (count + MAX_UNCOMPRESSED_BUF <= bufsize) { - ssize_t readcode = dbf_read_some_compressed(dbf, buf + count, bufsize - count); - if (readcode < 0) { - return readcode; - } - count += readcode; - if (readcode == 0) { - break; - } - } - return count; -} - -static void* io_thread (void *v) -// The dbuf_thread does all the asynchronous I/O. -{ - DBUFIO_FILESET bfs = (DBUFIO_FILESET)v; - toku_mutex_lock(&bfs->mutex); - //printf("%s:%d Locked\n", __FILE__, __LINE__); - while (1) { - - if (paniced(bfs)) { - toku_mutex_unlock(&bfs->mutex); // ignore any error - return 0; - } - //printf("n_not_done=%d\n", bfs->n_not_done); - if (bfs->n_not_done==0) { - // all done (meaning we stored EOF (or another error) in error_code[0] for the file. - //printf("unlocked\n"); - toku_mutex_unlock(&bfs->mutex); - return 0; - } - - struct dbufio_file *dbf = bfs->head; - if (dbf==NULL) { - // No I/O needs to be done yet. - // Wait until something happens that will wake us up. - toku_cond_wait(&bfs->cond, &bfs->mutex); - if (paniced(bfs)) { - toku_mutex_unlock(&bfs->mutex); // ignore any error - return 0; - } - // Have the lock so go around. - } else { - // Some I/O needs to be done. - //printf("%s:%d Need I/O\n", __FILE__, __LINE__); - assert(dbf->second_buf_ready == false); - assert(!dbf->io_done); - bfs->head = dbf->next; - if (bfs->head==NULL) bfs->tail=NULL; - - // Unlock the mutex now that we have ownership of dbf to allow consumers to get the mutex and perform swaps. They won't swap - // this buffer because second_buf_ready is false. - toku_mutex_unlock(&bfs->mutex); - //printf("%s:%d Doing read fd=%d\n", __FILE__, __LINE__, dbf->fd); - { - ssize_t readcode; - if (bfs->compressed) { - readcode = dbf_read_compressed(dbf, dbf->buf[1], bfs->bufsize); - } - else { - readcode = toku_os_read(dbf->fd, dbf->buf[1], bfs->bufsize); - } - //printf("%s:%d readcode=%ld\n", __FILE__, __LINE__, readcode); - if (readcode==-1) { - // a real error. Save the real error. - int the_errno = get_error_errno(); - fprintf(stderr, "%s:%d dbf=%p fd=%d errno=%d\n", __FILE__, __LINE__, dbf, dbf->fd, the_errno); - dbf->error_code[1] = the_errno; - dbf->n_in_buf[1] = 0; - } else if (readcode==0) { - // End of file. Save it. - dbf->error_code[1] = EOF; - dbf->n_in_buf[1] = 0; - dbf->io_done = true; - - } else { - dbf->error_code[1] = 0; - dbf->n_in_buf[1] = readcode; - } - - //printf("%s:%d locking mutex again=%ld\n", __FILE__, __LINE__, readcode); - { - toku_mutex_lock(&bfs->mutex); - if (paniced(bfs)) { - toku_mutex_unlock(&bfs->mutex); // ignore any error - return 0; - } - } - // Now that we have the mutex, we can decrement n_not_done (if applicable) and set second_buf_ready - if (readcode<=0) { - bfs->n_not_done--; - } - //printf("%s:%d n_not_done=%d\n", __FILE__, __LINE__, bfs->n_not_done); - dbf->second_buf_ready = true; - toku_cond_broadcast(&bfs->cond); - //printf("%s:%d did broadcast=%d\n", __FILE__, __LINE__, bfs->n_not_done); - // Still have the lock so go around the loop - } - } - } -} - -int create_dbufio_fileset (DBUFIO_FILESET *bfsp, int N, int fds[/*N*/], size_t bufsize, bool compressed) { - //printf("%s:%d here\n", __FILE__, __LINE__); - int result = 0; - DBUFIO_FILESET CALLOC(bfs); - if (bfs==0) { result = get_error_errno(); } - - bfs->compressed = compressed; - - bool mutex_inited = false, cond_inited = false; - if (result==0) { - CALLOC_N(N, bfs->files); - if (bfs->files==NULL) { result = get_error_errno(); } - else { - for (int i=0; ifiles[i].buf[0] = bfs->files[i].buf[1] = NULL; - } - } - } - //printf("%s:%d here\n", __FILE__, __LINE__); - if (result==0) { - toku_mutex_init(&bfs->mutex, NULL); - mutex_inited = true; - } - if (result==0) { - toku_cond_init(&bfs->cond, NULL); - cond_inited = true; - } - if (result==0) { - bfs->N = N; - bfs->n_not_done = N; - bfs->head = bfs->tail = NULL; - for (int i=0; ifiles[i].fd = fds[i]; - bfs->files[i].offset_in_buf = 0; - bfs->files[i].offset_in_uncompressed_file = 0; - bfs->files[i].next = NULL; - bfs->files[i].second_buf_ready = false; - for (int j=0; j<2; j++) { - if (result==0) { - MALLOC_N(bufsize, bfs->files[i].buf[j]); - if (bfs->files[i].buf[j]==NULL) { result=get_error_errno(); } - } - bfs->files[i].n_in_buf[j] = 0; - bfs->files[i].error_code[j] = 0; - } - bfs->files[i].io_done = false; - ssize_t r; - if (bfs->compressed) { - r = dbf_read_compressed(&bfs->files[i], bfs->files[i].buf[0], bufsize); - } else { - r = toku_os_read(bfs->files[i].fd, bfs->files[i].buf[0], bufsize); - } - { - if (r<0) { - result=get_error_errno(); - break; - } else if (r==0) { - // it's EOF - bfs->files[i].io_done = true; - bfs->n_not_done--; - bfs->files[i].error_code[0] = EOF; - } else { - bfs->files[i].n_in_buf[0] = r; - //printf("%s:%d enq [%d]\n", __FILE__, __LINE__, i); - enq(bfs, &bfs->files[i]); - } - } - } - bfs->bufsize = bufsize; - bfs->panic = false; - bfs->panic_errno = 0; - } - //printf("Creating IO thread\n"); - if (result==0) { - result = toku_pthread_create(&bfs->iothread, NULL, io_thread, (void*)bfs); - } - if (result==0) { *bfsp = bfs; return 0; } - // Now undo everything. - // If we got here, there is no thread (either result was zero before the thread was created, or else the thread creation itself failed. - if (bfs) { - if (bfs->files) { - // the files were allocated, so we have to free all the bufs. - for (int i=0; ifiles[i].buf[j]) - toku_free(bfs->files[i].buf[j]); - bfs->files[i].buf[j]=NULL; - } - } - toku_free(bfs->files); - bfs->files=NULL; - } - if (cond_inited) { - toku_cond_destroy(&bfs->cond); // don't check error status - } - if (mutex_inited) { - toku_mutex_destroy(&bfs->mutex); // don't check error status - } - toku_free(bfs); - } - return result; -} - -int panic_dbufio_fileset(DBUFIO_FILESET bfs, int error) { - toku_mutex_lock(&bfs->mutex); - panic(bfs, error); - toku_cond_broadcast(&bfs->cond); - toku_mutex_unlock(&bfs->mutex); - return 0; -} - -int destroy_dbufio_fileset (DBUFIO_FILESET bfs) { - int result = 0; - { - void *retval; - int r = toku_pthread_join(bfs->iothread, &retval); - assert(r==0); - assert(retval==NULL); - } - { - toku_mutex_destroy(&bfs->mutex); - } - { - toku_cond_destroy(&bfs->cond); - } - if (bfs->files) { - for (int i=0; iN; i++) { - for (int j=0; j<2; j++) { - //printf("%s:%d free([%d][%d]=%p\n", __FILE__, __LINE__, i,j, bfs->files[i].buf[j]); - toku_free(bfs->files[i].buf[j]); - } - } - toku_free(bfs->files); - } - toku_free(bfs); - return result; -} - -int dbufio_fileset_read (DBUFIO_FILESET bfs, int filenum, void *buf_v, size_t count, size_t *n_read) { - char *buf = (char*)buf_v; - struct dbufio_file *dbf = &bfs->files[filenum]; - if (dbf->error_code[0]!=0) return dbf->error_code[0]; - if (dbf->offset_in_buf + count <= dbf->n_in_buf[0]) { - // Enough data is present to do it all now - memcpy(buf, dbf->buf[0]+dbf->offset_in_buf, count); - dbf->offset_in_buf += count; - dbf->offset_in_uncompressed_file += count; - *n_read = count; - return 0; - } else if (dbf->n_in_buf[0] > dbf->offset_in_buf) { - // There is something in buf[0] - size_t this_count = dbf->n_in_buf[0]-dbf->offset_in_buf; - assert(dbf->offset_in_buf + this_count <= bfs->bufsize); - memcpy(buf, dbf->buf[0]+dbf->offset_in_buf, this_count); - dbf->offset_in_buf += this_count; - dbf->offset_in_uncompressed_file += this_count; - size_t sub_n_read; - int r = dbufio_fileset_read(bfs, filenum, buf+this_count, count-this_count, &sub_n_read); - if (r==0) { - *n_read = this_count + sub_n_read; - return 0; - } else { - // The error code will have been saved. We got some data so return that - *n_read = this_count; - return 0; - } - } else { - // There is nothing in buf[0]. So we need to swap buffers - toku_mutex_lock(&bfs->mutex); - while (1) { - if (dbf->second_buf_ready) { - dbf->n_in_buf[0] = dbf->n_in_buf[1]; - { - char *tmp = dbf->buf[0]; - dbf->buf[0] = dbf->buf[1]; - dbf->buf[1] = tmp; - } - dbf->error_code[0] = dbf->error_code[1]; - dbf->second_buf_ready = false; - dbf->offset_in_buf = 0; - if (!dbf->io_done) { - // Don't enqueue it if the I/O is all done. - //printf("%s:%d enq [%ld]\n", __FILE__, __LINE__, dbf-&bfs->files[0]); - enq(bfs, dbf); - } - toku_cond_broadcast(&bfs->cond); - toku_mutex_unlock(&bfs->mutex); - if (dbf->error_code[0]==0) { - assert(dbf->n_in_buf[0]>0); - return dbufio_fileset_read(bfs, filenum, buf_v, count, n_read); - } else { - *n_read = 0; - return dbf->error_code[0]; - } - } else { - toku_cond_wait(&bfs->cond, &bfs->mutex); - } - } - assert(0); // cannot get here. - } -} - -void -dbufio_print(DBUFIO_FILESET bfs) { - fprintf(stderr, "%s:%d bfs=%p", __FILE__, __LINE__, bfs); - if (bfs->panic) - fprintf(stderr, " panic=%d", bfs->panic_errno); - fprintf(stderr, " N=%d %d %" PRIuMAX, bfs->N, bfs->n_not_done, bfs->bufsize); - for (int i = 0; i < bfs->N; i++) { - struct dbufio_file *dbf = &bfs->files[i]; - if (dbf->error_code[0] || dbf->error_code[1]) - fprintf(stderr, " %d=[%d,%d]", i, dbf->error_code[0], dbf->error_code[1]); - } - fprintf(stderr, "\n"); - -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/dbufio.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/dbufio.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/dbufio.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/dbufio.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,112 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_DBUFIO_H -#define TOKU_DBUFIO_H -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." - -#include -#include - -/* Maintain a set of files for reading, with double buffering for the reads. */ - -/* A DBUFIO_FILESET is a set of files. The files are indexed from 0 to N-1, where N is specified when the set is created (and the files are also provided when the set is creaed). */ -/* An implementation would typically use a separate thread or asynchronous I/O to fetch ahead data for each file. The system will typically fill two buffers of size M for each file. One buffer is being read out of using dbuf_read(), and the other buffer is either empty (waiting on the asynchronous I/O to start), being filled in by the asynchronous I/O mechanism, or is waiting for the caller to read data from it. */ -typedef struct dbufio_fileset *DBUFIO_FILESET; - -int create_dbufio_fileset (DBUFIO_FILESET *bfsp, int N, int fds[/*N*/], size_t bufsize, bool compressed); - -int destroy_dbufio_fileset(DBUFIO_FILESET); - -int dbufio_fileset_read (DBUFIO_FILESET bfs, int filenum, void *buf_v, size_t count, size_t *n_read); - -int panic_dbufio_fileset(DBUFIO_FILESET, int error); - -void dbufio_print(DBUFIO_FILESET); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/fifo.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/fifo.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/fifo.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/fifo.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,253 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fifo.h" -#include "xids.h" -#include "ybt.h" -#include -#include - -struct fifo { - int n_items_in_fifo; - char *memory; // An array of bytes into which fifo_entries are embedded. - int memory_size; // How big is fifo_memory - int memory_used; // How many bytes are in use? -}; - -static void fifo_init(struct fifo *fifo) { - fifo->n_items_in_fifo = 0; - fifo->memory = 0; - fifo->memory_size = 0; - fifo->memory_used = 0; -} - -__attribute__((const,nonnull)) -static int fifo_entry_size(struct fifo_entry *entry) { - return sizeof (struct fifo_entry) + entry->keylen + entry->vallen - + xids_get_size(&entry->xids_s) - - sizeof(XIDS_S); //Prevent double counting from fifo_entry+xids_get_size -} - -__attribute__((const,nonnull)) -size_t toku_ft_msg_memsize_in_fifo(FT_MSG msg) { - // This must stay in sync with fifo_entry_size because that's what we - // really trust. But sometimes we only have an in-memory FT_MSG, not - // a serialized fifo_entry so we have to fake it. - return sizeof (struct fifo_entry) + msg->u.id.key->size + msg->u.id.val->size - + xids_get_size(msg->xids) - - sizeof(XIDS_S); -} - -int toku_fifo_create(FIFO *ptr) { - struct fifo *XMALLOC(fifo); - if (fifo == 0) return ENOMEM; - fifo_init(fifo); - *ptr = fifo; - return 0; -} - -void toku_fifo_resize(FIFO fifo, size_t new_size) { - XREALLOC_N(new_size, fifo->memory); - fifo->memory_size = new_size; -} - -void toku_fifo_free(FIFO *ptr) { - FIFO fifo = *ptr; - if (fifo->memory) toku_free(fifo->memory); - fifo->memory=0; - toku_free(fifo); - *ptr = 0; -} - -int toku_fifo_n_entries(FIFO fifo) { - return fifo->n_items_in_fifo; -} - -static int next_power_of_two (int n) { - int r = 4096; - while (r < n) { - r*=2; - assert(r>0); - } - return r; -} - -int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *data, unsigned int datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, int32_t *dest) { - int need_space_here = sizeof(struct fifo_entry) - + keylen + datalen - + xids_get_size(xids) - - sizeof(XIDS_S); //Prevent double counting - int need_space_total = fifo->memory_used+need_space_here; - if (fifo->memory == NULL || need_space_total > fifo->memory_size) { - // resize the fifo to the next power of 2 greater than the needed space - int next_2 = next_power_of_two(need_space_total); - toku_fifo_resize(fifo, next_2); - } - struct fifo_entry *entry = (struct fifo_entry *)(fifo->memory + fifo->memory_used); - fifo_entry_set_msg_type(entry, type); - entry->msn = msn; - xids_cpy(&entry->xids_s, xids); - entry->is_fresh = is_fresh; - entry->keylen = keylen; - unsigned char *e_key = xids_get_end_of_array(&entry->xids_s); - memcpy(e_key, key, keylen); - entry->vallen = datalen; - memcpy(e_key + keylen, data, datalen); - if (dest) { - *dest = fifo->memory_used; - } - fifo->n_items_in_fifo++; - fifo->memory_used += need_space_here; - return 0; -} - -int toku_fifo_iterate_internal_start(FIFO UU(fifo)) { return 0; } -int toku_fifo_iterate_internal_has_more(FIFO fifo, int off) { return off < fifo->memory_used; } -int toku_fifo_iterate_internal_next(FIFO fifo, int off) { - struct fifo_entry *e = (struct fifo_entry *)(fifo->memory + off); - return off + fifo_entry_size(e); -} -struct fifo_entry * toku_fifo_iterate_internal_get_entry(FIFO fifo, int off) { - return (struct fifo_entry *)(fifo->memory + off); -} -size_t toku_fifo_internal_entry_memsize(struct fifo_entry *e) { - return fifo_entry_size(e); -} - -void toku_fifo_iterate (FIFO fifo, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, void*), void *arg) { - FIFO_ITERATE(fifo, - key, keylen, data, datalen, type, msn, xids, is_fresh, - f(key,keylen,data,datalen,type,msn,xids,is_fresh, arg)); -} - -unsigned int toku_fifo_buffer_size_in_use (FIFO fifo) { - return fifo->memory_used; -} - -unsigned long toku_fifo_memory_size_in_use(FIFO fifo) { - return sizeof(*fifo)+fifo->memory_used; -} - -unsigned long toku_fifo_memory_footprint(FIFO fifo) { - size_t size_used = toku_memory_footprint(fifo->memory, fifo->memory_used); - long rval = sizeof(*fifo) + size_used; - return rval; -} - -DBT *fill_dbt_for_fifo_entry(DBT *dbt, const struct fifo_entry *entry) { - return toku_fill_dbt(dbt, xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen); -} - -struct fifo_entry *toku_fifo_get_entry(FIFO fifo, int off) { - return toku_fifo_iterate_internal_get_entry(fifo, off); -} - -void toku_fifo_clone(FIFO orig_fifo, FIFO* cloned_fifo) { - struct fifo *XMALLOC(new_fifo); - assert(new_fifo); - new_fifo->n_items_in_fifo = orig_fifo->n_items_in_fifo; - new_fifo->memory_used = orig_fifo->memory_used; - new_fifo->memory_size = new_fifo->memory_used; - XMALLOC_N(new_fifo->memory_size, new_fifo->memory); - memcpy( - new_fifo->memory, - orig_fifo->memory, - new_fifo->memory_size - ); - *cloned_fifo = new_fifo; -} - -bool toku_are_fifos_same(FIFO fifo1, FIFO fifo2) { - return ( - fifo1->memory_used == fifo2->memory_used && - memcmp(fifo1->memory, fifo2->memory, fifo1->memory_used) == 0 - ); -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/fifo.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/fifo.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/fifo.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/fifo.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,182 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FIFO_H -#define FIFO_H -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fttypes.h" -#include "xids-internal.h" -#include "xids.h" - - -// If the fifo_entry is unpacked, the compiler aligns the xids array and we waste a lot of space -struct __attribute__((__packed__)) fifo_entry { - unsigned int keylen; - unsigned int vallen; - unsigned char type; - bool is_fresh; - MSN msn; - XIDS_S xids_s; -}; - -// get and set the ft message type for a fifo entry. -// it is internally stored as a single unsigned char. -static inline enum ft_msg_type -fifo_entry_get_msg_type(const struct fifo_entry * entry) -{ - enum ft_msg_type msg_type; - msg_type = (enum ft_msg_type) entry->type; - return msg_type; -} - -static inline void -fifo_entry_set_msg_type(struct fifo_entry * entry, - enum ft_msg_type msg_type) -{ - unsigned char type = (unsigned char) msg_type; - entry->type = type; -} - -typedef struct fifo *FIFO; - -int toku_fifo_create(FIFO *); - -void toku_fifo_resize(FIFO fifo, size_t new_size); - -void toku_fifo_free(FIFO *); - -int toku_fifo_n_entries(FIFO); - -int toku_fifo_enq (FIFO, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, int32_t *dest); - -unsigned int toku_fifo_buffer_size_in_use (FIFO fifo); -unsigned long toku_fifo_memory_size_in_use(FIFO fifo); // return how much memory in the fifo holds useful data - -unsigned long toku_fifo_memory_footprint(FIFO fifo); // return how much memory the fifo occupies - -void toku_fifo_iterate(FIFO, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, void*), void*); - -#define FIFO_ITERATE(fifo,keyvar,keylenvar,datavar,datalenvar,typevar,msnvar,xidsvar,is_freshvar,body) ({ \ - for (int fifo_iterate_off = toku_fifo_iterate_internal_start(fifo); \ - toku_fifo_iterate_internal_has_more(fifo, fifo_iterate_off); \ - fifo_iterate_off = toku_fifo_iterate_internal_next(fifo, fifo_iterate_off)) { \ - struct fifo_entry *e = toku_fifo_iterate_internal_get_entry(fifo, fifo_iterate_off); \ - ITEMLEN keylenvar = e->keylen; \ - ITEMLEN datalenvar = e->vallen; \ - enum ft_msg_type typevar = fifo_entry_get_msg_type(e); \ - MSN msnvar = e->msn; \ - XIDS xidsvar = &e->xids_s; \ - bytevec keyvar = xids_get_end_of_array(xidsvar); \ - bytevec datavar = (const uint8_t*)keyvar + e->keylen; \ - bool is_freshvar = e->is_fresh; \ - body; \ - } }) - -#define FIFO_CURRENT_ENTRY_MEMSIZE toku_fifo_internal_entry_memsize(e) - -// Internal functions for the iterator. -int toku_fifo_iterate_internal_start(FIFO fifo); -int toku_fifo_iterate_internal_has_more(FIFO fifo, int off); -int toku_fifo_iterate_internal_next(FIFO fifo, int off); -struct fifo_entry * toku_fifo_iterate_internal_get_entry(FIFO fifo, int off); -size_t toku_fifo_internal_entry_memsize(struct fifo_entry *e) __attribute__((const,nonnull)); -size_t toku_ft_msg_memsize_in_fifo(FT_MSG msg) __attribute__((const,nonnull)); - -DBT *fill_dbt_for_fifo_entry(DBT *dbt, const struct fifo_entry *entry); -struct fifo_entry *toku_fifo_get_entry(FIFO fifo, int off); - -void toku_fifo_clone(FIFO orig_fifo, FIFO* cloned_fifo); - -bool toku_are_fifos_same(FIFO fifo1, FIFO fifo2); - - - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +89,15 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include +#include + +#include "ft/serialize/block_table.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-flusher.h" +#include "ft/ft-internal.h" +#include "ft/ft.h" +#include "ft/node.h" -#include -#include -#include -#include #include static void @@ -103,23 +106,23 @@ uint32_t* fullhash, void* extra) { - FT h = (FT) extra; - BLOCKNUM name; - toku_allocate_blocknum(h->blocktable, &name, h); - *cachekey = name; - *fullhash = toku_cachetable_hash(h->cf, name); + FT ft = (FT) extra; + BLOCKNUM blocknum; + ft->blocktable.allocate_blocknum(&blocknum, ft); + *cachekey = blocknum; + *fullhash = toku_cachetable_hash(ft->cf, blocknum); } void cachetable_put_empty_node_with_dep_nodes( - FT h, + FT ft, uint32_t num_dependent_nodes, FTNODE* dependent_nodes, - BLOCKNUM* name, //output + BLOCKNUM* blocknum, //output uint32_t* fullhash, //output FTNODE* result) { - FTNODE XMALLOC(new_node); + FTNODE XCALLOC(new_node); PAIR dependent_pairs[num_dependent_nodes]; enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes]; for (uint32_t i = 0; i < num_dependent_nodes; i++) { @@ -128,18 +131,18 @@ } toku_cachetable_put_with_dep_pairs( - h->cf, + ft->cf, ftnode_get_key_and_fullhash, new_node, make_pair_attr(sizeof(FTNODE)), - get_write_callbacks_for_node(h), - h, + get_write_callbacks_for_node(ft), + ft, num_dependent_nodes, dependent_pairs, dependent_dirty_bits, - name, + blocknum, fullhash, - toku_node_save_ct_pair); + toku_ftnode_save_ct_pair); *result = new_node; } @@ -153,13 +156,13 @@ FTNODE* dependent_nodes) { uint32_t fullhash = 0; - BLOCKNUM name; + BLOCKNUM blocknum; cachetable_put_empty_node_with_dep_nodes( ft, num_dependent_nodes, dependent_nodes, - &name, + &blocknum, &fullhash, result); @@ -170,7 +173,7 @@ toku_initialize_empty_ftnode( *result, - name, + blocknum, height, n_children, ft->h->layout_version, @@ -207,8 +210,8 @@ uint32_t fullhash, UNLOCKERS unlockers, ANCESTORS ancestors, - const PIVOT_BOUNDS bounds, - FTNODE_FETCH_EXTRA bfe, + const pivot_bounds &bounds, + ftnode_fetch_extra *bfe, bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this FTNODE *node_p, bool* msgs_applied) @@ -318,10 +321,10 @@ void toku_pin_ftnode_with_dep_nodes( - FT h, + FT ft, BLOCKNUM blocknum, uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, + ftnode_fetch_extra *bfe, pair_lock_type lock_type, uint32_t num_dependent_nodes, FTNODE *dependent_nodes, @@ -337,12 +340,12 @@ } int r = toku_cachetable_get_and_pin_with_dep_pairs( - h->cf, + ft->cf, blocknum, fullhash, &node_v, NULL, - get_write_callbacks_for_node(h), + get_write_callbacks_for_node(ft), toku_ftnode_fetch_callback, toku_ftnode_pf_req_callback, toku_ftnode_pf_callback, @@ -355,7 +358,7 @@ invariant_zero(r); FTNODE node = (FTNODE) node_v; if (lock_type != PL_READ && node->height > 0 && move_messages) { - toku_move_ftnode_messages_to_stale(h, node); + toku_move_ftnode_messages_to_stale(ft, node); } *node_p = node; } @@ -363,7 +366,7 @@ void toku_pin_ftnode(FT ft, BLOCKNUM blocknum, uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, + ftnode_fetch_extra *bfe, pair_lock_type lock_type, FTNODE *node_p, bool move_messages) { @@ -408,15 +411,15 @@ // Effect: Swap the blocknum, fullhash, and PAIR for for a and b // Requires: Both nodes are pinned { - BLOCKNUM tmp_blocknum = a->thisnodename; + BLOCKNUM tmp_blocknum = a->blocknum; uint32_t tmp_fullhash = a->fullhash; PAIR tmp_pair = a->ct_pair; - a->thisnodename = b->thisnodename; + a->blocknum = b->blocknum; a->fullhash = b->fullhash; a->ct_pair = b->ct_pair; - b->thisnodename = tmp_blocknum; + b->blocknum = tmp_blocknum; b->fullhash = tmp_fullhash; b->ct_pair = tmp_pair; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.h 2014-10-08 13:19:51.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_CACHETABLE_WRAPPERS_H -#define FT_CACHETABLE_WRAPPERS_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +87,14 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include "cachetable.h" +#include "ft/cachetable/cachetable.h" +#include "ft/ft-internal.h" +#include "ft/node.h" /** * Put an empty node (that is, no fields filled) into the cachetable. @@ -102,7 +103,7 @@ */ void cachetable_put_empty_node_with_dep_nodes( - FT h, + FT ft, uint32_t num_dependent_nodes, FTNODE* dependent_nodes, BLOCKNUM* name, //output @@ -117,7 +118,7 @@ */ void create_new_ftnode_with_dep_nodes( - FT h, + FT ft, FTNODE *result, int height, int n_children, @@ -146,8 +147,8 @@ uint32_t fullhash, UNLOCKERS unlockers, ANCESTORS ancestors, - const PIVOT_BOUNDS pbounds, - FTNODE_FETCH_EXTRA bfe, + const pivot_bounds &bounds, + ftnode_fetch_extra *bfe, bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this FTNODE *node_p, bool* msgs_applied @@ -155,10 +156,10 @@ // Pins an ftnode without dependent pairs void toku_pin_ftnode( - FT h, + FT ft, BLOCKNUM blocknum, uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, + ftnode_fetch_extra *bfe, pair_lock_type lock_type, FTNODE *node_p, bool move_messages @@ -167,10 +168,10 @@ // Pins an ftnode with dependent pairs // Unlike toku_pin_ftnode_for_query, this function blocks until the node is pinned. void toku_pin_ftnode_with_dep_nodes( - FT h, + FT ft, BLOCKNUM blocknum, uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, + ftnode_fetch_extra *bfe, pair_lock_type lock_type, uint32_t num_dependent_nodes, FTNODE *dependent_nodes, @@ -187,10 +188,8 @@ /** * Effect: Unpin an ftnode. */ -void toku_unpin_ftnode(FT h, FTNODE node); +void toku_unpin_ftnode(FT ft, FTNODE node); void toku_unpin_ftnode_read_only(FT ft, FTNODE node); // Effect: Swaps pair values of two pinned nodes void toku_ftnode_swap_pair_values(FTNODE nodea, FTNODE nodeb); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +89,17 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "ft.h" -#include "ft-internal.h" -#include "ft-cachetable-wrappers.h" -#include "log-internal.h" +#include -#include +#include "ft/serialize/block_table.h" +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-internal.h" +#include "ft/logger/log-internal.h" +#include "ft/log_header.h" +#include "ft/node.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_node-serialize.h" #include #include @@ -107,10 +112,10 @@ // hold lock around setting and clearing of dirty bit // (see cooperative use of dirty bit in ft_begin_checkpoint()) - toku_ft_lock (ft); + toku_ft_lock(ft); ft->h->root_xid_that_created = new_root_xid_that_created; ft->h->dirty = 1; - toku_ft_unlock (ft); + toku_ft_unlock(ft); } static void @@ -118,9 +123,10 @@ //header and checkpoint_header have same Blocktable pointer //cannot destroy since it is still in use by CURRENT assert(ft->h->type == FT_CURRENT); - toku_blocktable_destroy(&ft->blocktable); - if (ft->descriptor.dbt.data) toku_free(ft->descriptor.dbt.data); - if (ft->cmp_descriptor.dbt.data) toku_free(ft->cmp_descriptor.dbt.data); + ft->blocktable.destroy(); + ft->cmp.destroy(); + toku_destroy_dbt(&ft->descriptor.dbt); + toku_destroy_dbt(&ft->cmp_descriptor.dbt); toku_ft_destroy_reflock(ft); toku_free(ft->h); } @@ -187,7 +193,7 @@ } // Maps to cf->begin_checkpoint_userdata -// Create checkpoint-in-progress versions of header and translation (btt) (and fifo for now...). +// Create checkpoint-in-progress versions of header and translation (btt) // Has access to fd (it is protected). // // Not reentrant for a single FT (see ft_checkpoint) @@ -199,7 +205,7 @@ assert(ft->checkpoint_header == NULL); ft_copy_for_checkpoint_unlocked(ft, checkpoint_lsn); ft->h->dirty = 0; // this is only place this bit is cleared (in currentheader) - toku_block_translation_note_start_checkpoint_unlocked(ft->blocktable); + ft->blocktable.note_start_checkpoint_unlocked(); toku_ft_unlock (ft); } @@ -235,8 +241,6 @@ static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) { FT ft = (FT) header_v; FT_HEADER ch = ft->checkpoint_header; - //printf("%s:%d allocated_limit=%lu writing queue to %lu\n", __FILE__, __LINE__, - // block_allocator_allocated_limit(h->block_allocator), h->unused_blocks.b*h->nodesize); assert(ch); assert(ch->type == FT_CHECKPOINT_INPROGRESS); if (ch->dirty) { // this is only place this bit is tested (in checkpoint_header) @@ -251,16 +255,15 @@ ft_hack_highest_unused_msn_for_upgrade_for_checkpoint(ft); // write translation and header to disk (or at least to OS internal buffer) - toku_serialize_ft_to(fd, ch, ft->blocktable, ft->cf); + toku_serialize_ft_to(fd, ch, &ft->blocktable, ft->cf); ch->dirty = 0; // this is only place this bit is cleared (in checkpoint_header) // fsync the cachefile toku_cachefile_fsync(cf); ft->h->checkpoint_count++; // checkpoint succeeded, next checkpoint will save to alternate header location ft->h->checkpoint_lsn = ch->checkpoint_lsn; //Header updated. - } - else { - toku_block_translation_note_skipped_checkpoint(ft->blocktable); + } else { + ft->blocktable.note_skipped_checkpoint(); } } @@ -268,14 +271,12 @@ // free unused disk space // (i.e. tell BlockAllocator to liberate blocks used by previous checkpoint). // Must have access to fd (protected) -static void ft_end_checkpoint (CACHEFILE UU(cachefile), int fd, void *header_v) { +static void ft_end_checkpoint(CACHEFILE UU(cf), int fd, void *header_v) { FT ft = (FT) header_v; assert(ft->h->type == FT_CURRENT); - toku_block_translation_note_end_checkpoint(ft->blocktable, fd); - if (ft->checkpoint_header) { - toku_free(ft->checkpoint_header); - ft->checkpoint_header = NULL; - } + ft->blocktable.note_end_checkpoint(fd); + toku_free(ft->checkpoint_header); + ft->checkpoint_header = nullptr; } // maps to cf->close_userdata @@ -309,14 +310,16 @@ } } if (ft->h->dirty) { // this is the only place this bit is tested (in currentheader) - if (logger) { //Rollback cachefile MUST NOT BE CLOSED DIRTY - //It can be checkpointed only via 'checkpoint' - assert(logger->rollback_cachefile != cachefile); + bool do_checkpoint = true; + if (logger && logger->rollback_cachefile == cachefile) { + do_checkpoint = false; + } + if (do_checkpoint) { + ft_begin_checkpoint(lsn, header_v); + ft_checkpoint(cachefile, fd, ft); + ft_end_checkpoint(cachefile, fd, header_v); + assert(!ft->h->dirty); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary) } - ft_begin_checkpoint(lsn, header_v); - ft_checkpoint(cachefile, fd, ft); - ft_end_checkpoint(cachefile, fd, header_v); - assert(!ft->h->dirty); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary) } } @@ -358,11 +361,6 @@ // End of Functions that are callbacks to the cachefile ///////////////////////////////////////////////////////////////////////// -void toku_node_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p) { - FTNODE CAST_FROM_VOIDP(node, value_data); - node->ct_pair = p; -} - static void setup_initial_ft_root_node(FT ft, BLOCKNUM blocknum) { FTNODE XCALLOC(node); toku_initialize_empty_ftnode(node, blocknum, 0, 1, ft->h->layout_version, ft->h->flags); @@ -373,7 +371,7 @@ toku_cachetable_put(ft->cf, blocknum, fullhash, node, make_ftnode_pair_attr(node), get_write_callbacks_for_node(ft), - toku_node_save_ct_pair); + toku_ftnode_save_ct_pair); toku_unpin_ftnode(ft, node); } @@ -384,7 +382,8 @@ toku_list_init(&ft->live_ft_handles); - ft->compare_fun = options->compare_fun; + // intuitively, the comparator points to the FT's cmp descriptor + ft->cmp.create(options->compare_fun, &ft->cmp_descriptor, options->memcmp_magic); ft->update_fun = options->update_fun; if (ft->cf != NULL) { @@ -405,7 +404,7 @@ ft_note_pin_by_checkpoint, ft_note_unpin_by_checkpoint); - toku_block_verify_no_free_blocknums(ft->blocktable); + ft->blocktable.verify_no_free_blocknums(); } @@ -449,16 +448,13 @@ invariant(ftp); FT XCALLOC(ft); - memset(&ft->descriptor, 0, sizeof(ft->descriptor)); - memset(&ft->cmp_descriptor, 0, sizeof(ft->cmp_descriptor)); - ft->h = ft_header_create(options, make_blocknum(0), (txn ? txn->txnid.parent_id64: TXNID_NONE)); toku_ft_init_reflock(ft); // Assign blocknum for root block, also dirty the header - toku_blocktable_create_new(&ft->blocktable); - toku_allocate_blocknum(ft->blocktable, &ft->h->root_blocknum, ft); + ft->blocktable.create(); + ft->blocktable.allocate_blocknum(&ft->h->root_blocknum, ft); ft_init(ft, options, cf); @@ -471,33 +467,29 @@ // If the cachefile has not been initialized, then don't modify anything. // max_acceptable_lsn is the latest acceptable checkpointed version of the file. { - { - FT h; - if ((h = (FT) toku_cachefile_get_userdata(cf))!=0) { - *header = h; - assert(ft_handle->options.update_fun == h->update_fun); - assert(ft_handle->options.compare_fun == h->compare_fun); - return 0; - } - } - FT h = nullptr; - int r; - { - int fd = toku_cachefile_get_fd(cf); - r = toku_deserialize_ft_from(fd, max_acceptable_lsn, &h); - if (r == TOKUDB_BAD_CHECKSUM) { - fprintf(stderr, "Checksum failure while reading header in file %s.\n", toku_cachefile_fname_in_env(cf)); - assert(false); // make absolutely sure we crash before doing anything else - } - } - if (r!=0) return r; - // GCC 4.8 seems to get confused by the gotos in the deserialize code and think h is maybe uninitialized. - invariant_notnull(h); - h->cf = cf; - h->compare_fun = ft_handle->options.compare_fun; - h->update_fun = ft_handle->options.update_fun; + FT ft = nullptr; + if ((ft = (FT) toku_cachefile_get_userdata(cf)) != nullptr) { + *header = ft; + assert(ft_handle->options.update_fun == ft->update_fun); + return 0; + } + + int fd = toku_cachefile_get_fd(cf); + int r = toku_deserialize_ft_from(fd, max_acceptable_lsn, &ft); + if (r == TOKUDB_BAD_CHECKSUM) { + fprintf(stderr, "Checksum failure while reading header in file %s.\n", toku_cachefile_fname_in_env(cf)); + assert(false); // make absolutely sure we crash before doing anything else + } else if (r != 0) { + return r; + } + + invariant_notnull(ft); + // intuitively, the comparator points to the FT's cmp descriptor + ft->cmp.create(ft_handle->options.compare_fun, &ft->cmp_descriptor, ft_handle->options.memcmp_magic); + ft->update_fun = ft_handle->options.update_fun; + ft->cf = cf; toku_cachefile_set_userdata(cf, - (void*)h, + reinterpret_cast(ft), ft_log_fassociate_during_checkpoint, ft_close, ft_free, @@ -506,7 +498,7 @@ ft_end_checkpoint, ft_note_pin_by_checkpoint, ft_note_unpin_by_checkpoint); - *header = h; + *header = ft; return 0; } @@ -548,12 +540,12 @@ } // Verifies there exists exactly one ft handle and returns it. -FT_HANDLE toku_ft_get_only_existing_ft_handle(FT h) { +FT_HANDLE toku_ft_get_only_existing_ft_handle(FT ft) { FT_HANDLE ft_handle_ret = NULL; - toku_ft_grab_reflock(h); - assert(toku_list_num_elements_est(&h->live_ft_handles) == 1); - ft_handle_ret = toku_list_struct(toku_list_head(&h->live_ft_handles), struct ft_handle, live_ft_handle_link); - toku_ft_release_reflock(h); + toku_ft_grab_reflock(ft); + assert(toku_list_num_elements_est(&ft->live_ft_handles) == 1); + ft_handle_ret = toku_list_struct(toku_list_head(&ft->live_ft_handles), struct ft_handle, live_ft_handle_link); + toku_ft_release_reflock(ft); return ft_handle_ret; } @@ -618,6 +610,7 @@ .compression_method = compression_method, .fanout = fanout, .flags = 0, + .memcmp_magic = 0, .compare_fun = NULL, .update_fun = NULL }; @@ -628,27 +621,27 @@ // Open an ft for use by redirect. The new ft must have the same dict_id as the old_ft passed in. (FILENUM is assigned by the ft_handle_open() function.) static int -ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTXN txn, FT old_h) { - FT_HANDLE t; - assert(old_h->dict_id.dictid != DICTIONARY_ID_NONE.dictid); - toku_ft_handle_create(&t); - toku_ft_set_bt_compare(t, old_h->compare_fun); - toku_ft_set_update(t, old_h->update_fun); - toku_ft_handle_set_nodesize(t, old_h->h->nodesize); - toku_ft_handle_set_basementnodesize(t, old_h->h->basementnodesize); - toku_ft_handle_set_compression_method(t, old_h->h->compression_method); - toku_ft_handle_set_fanout(t, old_h->h->fanout); - CACHETABLE ct = toku_cachefile_get_cachetable(old_h->cf); - int r = toku_ft_handle_open_with_dict_id(t, fname_in_env, 0, 0, ct, txn, old_h->dict_id); +ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTXN txn, FT old_ft) { + FT_HANDLE ft_handle; + assert(old_ft->dict_id.dictid != DICTIONARY_ID_NONE.dictid); + toku_ft_handle_create(&ft_handle); + toku_ft_set_bt_compare(ft_handle, old_ft->cmp.get_compare_func()); + toku_ft_set_update(ft_handle, old_ft->update_fun); + toku_ft_handle_set_nodesize(ft_handle, old_ft->h->nodesize); + toku_ft_handle_set_basementnodesize(ft_handle, old_ft->h->basementnodesize); + toku_ft_handle_set_compression_method(ft_handle, old_ft->h->compression_method); + toku_ft_handle_set_fanout(ft_handle, old_ft->h->fanout); + CACHETABLE ct = toku_cachefile_get_cachetable(old_ft->cf); + int r = toku_ft_handle_open_with_dict_id(ft_handle, fname_in_env, 0, 0, ct, txn, old_ft->dict_id); if (r != 0) { goto cleanup; } - assert(t->ft->dict_id.dictid == old_h->dict_id.dictid); - *new_ftp = t; + assert(ft_handle->ft->dict_id.dictid == old_ft->dict_id.dictid); + *new_ftp = ft_handle; cleanup: if (r != 0) { - toku_ft_handle_close(t); + toku_ft_handle_close(ft_handle); } return r; } @@ -656,81 +649,81 @@ // This function performs most of the work to redirect a dictionary to different file. // It is called for redirect and to abort a redirect. (This function is almost its own inverse.) static int -dictionary_redirect_internal(const char *dst_fname_in_env, FT src_h, TOKUTXN txn, FT *dst_hp) { +dictionary_redirect_internal(const char *dst_fname_in_env, FT src_ft, TOKUTXN txn, FT *dst_ftp) { int r; - FILENUM src_filenum = toku_cachefile_filenum(src_h->cf); + FILENUM src_filenum = toku_cachefile_filenum(src_ft->cf); FILENUM dst_filenum = FILENUM_NONE; - FT dst_h = NULL; + FT dst_ft = NULL; struct toku_list *list; // open a dummy ft based off of // dst_fname_in_env to get the header // then we will change all the ft's to have - // their headers point to dst_h instead of src_h + // their headers point to dst_ft instead of src_ft FT_HANDLE tmp_dst_ft = NULL; - r = ft_handle_open_for_redirect(&tmp_dst_ft, dst_fname_in_env, txn, src_h); + r = ft_handle_open_for_redirect(&tmp_dst_ft, dst_fname_in_env, txn, src_ft); if (r != 0) { goto cleanup; } - dst_h = tmp_dst_ft->ft; + dst_ft = tmp_dst_ft->ft; // some sanity checks on dst_filenum - dst_filenum = toku_cachefile_filenum(dst_h->cf); + dst_filenum = toku_cachefile_filenum(dst_ft->cf); assert(dst_filenum.fileid!=FILENUM_NONE.fileid); assert(dst_filenum.fileid!=src_filenum.fileid); //Cannot be same file. - // for each live ft_handle, ft_handle->ft is currently src_h + // for each live ft_handle, ft_handle->ft is currently src_ft // we want to change it to dummy_dst - toku_ft_grab_reflock(src_h); - while (!toku_list_empty(&src_h->live_ft_handles)) { - list = src_h->live_ft_handles.next; + toku_ft_grab_reflock(src_ft); + while (!toku_list_empty(&src_ft->live_ft_handles)) { + list = src_ft->live_ft_handles.next; FT_HANDLE src_handle = NULL; src_handle = toku_list_struct(list, struct ft_handle, live_ft_handle_link); toku_list_remove(&src_handle->live_ft_handle_link); - toku_ft_note_ft_handle_open(dst_h, src_handle); + toku_ft_note_ft_handle_open(dst_ft, src_handle); if (src_handle->redirect_callback) { src_handle->redirect_callback(src_handle, src_handle->redirect_callback_extra); } } - assert(dst_h); - // making sure that we are not leaking src_h - assert(toku_ft_needed_unlocked(src_h)); - toku_ft_release_reflock(src_h); + assert(dst_ft); + // making sure that we are not leaking src_ft + assert(toku_ft_needed_unlocked(src_ft)); + toku_ft_release_reflock(src_ft); toku_ft_handle_close(tmp_dst_ft); - *dst_hp = dst_h; + *dst_ftp = dst_ft; cleanup: return r; } -//This is the 'abort redirect' function. The redirect of old_h to new_h was done -//and now must be undone, so here we redirect new_h back to old_h. +//This is the 'abort redirect' function. The redirect of old_ft to new_ft was done +//and now must be undone, so here we redirect new_ft back to old_ft. int -toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) { - char *old_fname_in_env = toku_cachefile_fname_in_env(old_h->cf); +toku_dictionary_redirect_abort(FT old_ft, FT new_ft, TOKUTXN txn) { + char *old_fname_in_env = toku_cachefile_fname_in_env(old_ft->cf); int r; { - FILENUM old_filenum = toku_cachefile_filenum(old_h->cf); - FILENUM new_filenum = toku_cachefile_filenum(new_h->cf); + FILENUM old_filenum = toku_cachefile_filenum(old_ft->cf); + FILENUM new_filenum = toku_cachefile_filenum(new_ft->cf); assert(old_filenum.fileid!=new_filenum.fileid); //Cannot be same file. //No living fts in old header. - toku_ft_grab_reflock(old_h); - assert(toku_list_empty(&old_h->live_ft_handles)); - toku_ft_release_reflock(old_h); + toku_ft_grab_reflock(old_ft); + assert(toku_list_empty(&old_ft->live_ft_handles)); + toku_ft_release_reflock(old_ft); } - FT dst_h; - // redirect back from new_h to old_h - r = dictionary_redirect_internal(old_fname_in_env, new_h, txn, &dst_h); + FT dst_ft; + // redirect back from new_ft to old_ft + r = dictionary_redirect_internal(old_fname_in_env, new_ft, txn, &dst_ft); if (r == 0) { - assert(dst_h == old_h); + assert(dst_ft == old_ft); } return r; } @@ -879,18 +872,17 @@ s->verify_time_sec = ft->h->time_of_last_verification; } -void -toku_ft_get_fractal_tree_info64(FT ft, struct ftinfo64 *s) { - toku_blocktable_get_info64(ft->blocktable, s); +void toku_ft_get_fractal_tree_info64(FT ft, struct ftinfo64 *info) { + ft->blocktable.get_info64(info); } int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *iter_extra) { uint64_t this_checkpoint_count = ft->h->checkpoint_count; - return toku_blocktable_iterate_translation_tables(ft->blocktable, this_checkpoint_count, iter, iter_extra); + return ft->blocktable.iterate_translation_tables(this_checkpoint_count, iter, iter_extra); } void -toku_ft_update_descriptor(FT ft, DESCRIPTOR d) +toku_ft_update_descriptor(FT ft, DESCRIPTOR desc) // Effect: Changes the descriptor in a tree (log the change, make sure it makes it to disk eventually). // requires: the ft is fully user-opened with a valid cachefile. // descriptor updates cannot happen in parallel for an FT @@ -898,7 +890,7 @@ { assert(ft->cf); int fd = toku_cachefile_get_fd(ft->cf); - toku_ft_update_descriptor_with_fd(ft, d, fd); + toku_ft_update_descriptor_with_fd(ft, desc, fd); } // upadate the descriptor for an ft and serialize it using @@ -907,41 +899,30 @@ // update a descriptor before the ft is fully opened and has // a valid cachefile. void -toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR d, int fd) { +toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd) { // the checksum is four bytes, so that's where the magic number comes from // make space for the new descriptor and write it out to disk DISKOFF offset, size; - size = toku_serialize_descriptor_size(d) + 4; - toku_realloc_descriptor_on_disk(ft->blocktable, size, &offset, ft, fd); - toku_serialize_descriptor_contents_to_fd(fd, d, offset); + size = toku_serialize_descriptor_size(desc) + 4; + ft->blocktable.realloc_descriptor_on_disk(size, &offset, ft, fd); + toku_serialize_descriptor_contents_to_fd(fd, desc, offset); // cleanup the old descriptor and set the in-memory descriptor to the new one - if (ft->descriptor.dbt.data) { - toku_free(ft->descriptor.dbt.data); - } - ft->descriptor.dbt.size = d->dbt.size; - ft->descriptor.dbt.data = toku_memdup(d->dbt.data, d->dbt.size); + toku_destroy_dbt(&ft->descriptor.dbt); + toku_clone_dbt(&ft->descriptor.dbt, desc->dbt); } -void -toku_ft_update_cmp_descriptor(FT ft) { - if (ft->cmp_descriptor.dbt.data != NULL) { - toku_free(ft->cmp_descriptor.dbt.data); - } - ft->cmp_descriptor.dbt.size = ft->descriptor.dbt.size; - ft->cmp_descriptor.dbt.data = toku_xmemdup( - ft->descriptor.dbt.data, - ft->descriptor.dbt.size - ); +void toku_ft_update_cmp_descriptor(FT ft) { + // cleanup the old cmp descriptor and clone it as the in-memory descriptor + toku_destroy_dbt(&ft->cmp_descriptor.dbt); + toku_clone_dbt(&ft->cmp_descriptor.dbt, ft->descriptor.dbt); } -DESCRIPTOR -toku_ft_get_descriptor(FT_HANDLE ft_handle) { +DESCRIPTOR toku_ft_get_descriptor(FT_HANDLE ft_handle) { return &ft_handle->ft->descriptor; } -DESCRIPTOR -toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle) { +DESCRIPTOR toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle) { return &ft_handle->ft->cmp_descriptor; } @@ -1066,8 +1047,8 @@ struct garbage_helper_extra *CAST_FROM_VOIDP(info, extra); FTNODE node; FTNODE_DISK_DATA ndd; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, info->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(info->ft); int fd = toku_cachefile_get_fd(info->ft->cf); int r = toku_deserialize_ftnode_from(fd, blocknum, 0, &node, &ndd, &bfe); if (r != 0) { @@ -1101,7 +1082,7 @@ .total_space = 0, .used_space = 0 }; - toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED, garbage_helper, &info, true, true); + ft->blocktable.iterate(block_table::TRANSLATION_CHECKPOINTED, garbage_helper, &info, true, true); *total_space = info.total_space; *used_space = info.used_space; } @@ -1111,8 +1092,6 @@ #error #endif - - #define xstr(X) str(X) #define str(X) #X #define static_version_string xstr(DB_VERSION_MAJOR) "." \ @@ -1122,10 +1101,9 @@ struct toku_product_name_strings_struct toku_product_name_strings; char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH]; -void -tokudb_update_product_name_strings(void) { - //DO ALL STRINGS HERE.. maybe have a separate FT layer version as well - { // Version string +void tokuft_update_product_name_strings(void) { + // DO ALL STRINGS HERE.. maybe have a separate FT layer version as well + { int n = snprintf(toku_product_name_strings.db_version, sizeof(toku_product_name_strings.db_version), "%s %s", toku_product_name, static_version_string); @@ -1177,7 +1155,7 @@ *lockfd = toku_os_lock_file(lockfname); if (*lockfd < 0) { int e = get_error_errno(); - fprintf(stderr, "Couldn't start tokudb because some other tokudb process is using the same directory [%s] for [%s]\n", lock_dir, which); + fprintf(stderr, "Couldn't start tokuft because some other tokuft process is using the same directory [%s] for [%s]\n", lock_dir, which); return e; } return 0; @@ -1195,10 +1173,10 @@ return 0; } -int tokudb_num_envs = 0; +int tokuft_num_envs = 0; int db_env_set_toku_product_name(const char *name) { - if (tokudb_num_envs > 0) { + if (tokuft_num_envs > 0) { return EINVAL; } if (!name || strlen(name) < 1) { @@ -1209,7 +1187,7 @@ } if (strncmp(toku_product_name, name, sizeof(toku_product_name))) { strcpy(toku_product_name, name); - tokudb_update_product_name_strings(); + tokuft_update_product_name_strings(); } return 0; } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-flusher.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-flusher.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-flusher.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-flusher.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,22 +89,27 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include + +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-internal.h" +#include "ft/ft-flusher.h" +#include "ft/ft-flusher-internal.h" +#include "ft/node.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/ft_node-serialize.h" +#include "portability/toku_assert.h" +#include "portability/toku_atomic.h" +#include "util/status.h" +#include "util/context.h" /* Status is intended for display to humans to help understand system behavior. * It does not need to be perfectly thread-safe. */ static FT_FLUSHER_STATUS_S ft_flusher_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ft_flusher_status, k, c, t, "ft flusher: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_flusher_status, k, c, t, "ft flusher: " l, inc) #define STATUS_VALUE(x) ft_flusher_status.status[x].value.num void toku_ft_flusher_status_init(void) { @@ -179,25 +184,21 @@ find_heaviest_child(FTNODE node) { int max_child = 0; - int max_weight = toku_bnc_nbytesinbuf(BNC(node, 0)) + BP_WORKDONE(node, 0); - int i; + uint64_t max_weight = toku_bnc_nbytesinbuf(BNC(node, 0)) + BP_WORKDONE(node, 0); - if (0) printf("%s:%d weights: %d", __FILE__, __LINE__, max_weight); - paranoid_invariant(node->n_children>0); - for (i=1; in_children; i++) { -#ifdef TOKU_DEBUG_PARANOID - if (BP_WORKDONE(node,i)) { - assert(toku_bnc_nbytesinbuf(BNC(node,i)) > 0); + invariant(node->n_children > 0); + for (int i = 1; i < node->n_children; i++) { + uint64_t bytes_in_buf = toku_bnc_nbytesinbuf(BNC(node, i)); + uint64_t workdone = BP_WORKDONE(node, i); + if (workdone > 0) { + invariant(bytes_in_buf > 0); } -#endif - int this_weight = toku_bnc_nbytesinbuf(BNC(node,i)) + BP_WORKDONE(node,i);; - if (0) printf(" %d", this_weight); + uint64_t this_weight = bytes_in_buf + workdone; if (max_weight < this_weight) { max_child = i; max_weight = this_weight; } } - if (0) printf("\n"); return max_child; } @@ -235,7 +236,7 @@ } static void -maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT h) +maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT ft) { // If the node is already fully in memory, as in upgrade, we don't // need to destroy the basement nodes because they are all equally @@ -247,7 +248,7 @@ if (BP_STATE(child, i) == PT_AVAIL && node->max_msn_applied_to_node_on_disk.msn < BLB_MAX_MSN_APPLIED(child, i).msn) { - toku_evict_bn_from_memory(child, i, h); + toku_evict_bn_from_memory(child, i, ft); } } } @@ -255,14 +256,14 @@ static void ft_merge_child( - FT h, + FT ft, FTNODE node, int childnum_to_merge, bool *did_react, struct flusher_advice *fa); static int -pick_heaviest_child(FT UU(h), +pick_heaviest_child(FT UU(ft), FTNODE parent, void* UU(extra)) { @@ -307,11 +308,11 @@ recurse_if_child_is_gorged(FTNODE child, void* extra) { struct flush_status_update_extra *fste = (flush_status_update_extra *)extra; - return toku_ft_nonleaf_is_gorged(child, fste->nodesize); + return toku_ftnode_nonleaf_is_gorged(child, fste->nodesize); } int -default_pick_child_after_split(FT UU(h), +default_pick_child_after_split(FT UU(ft), FTNODE UU(parent), int UU(childnuma), int UU(childnumb), @@ -322,7 +323,7 @@ void default_merge_child(struct flusher_advice *fa, - FT h, + FT ft, FTNODE parent, int childnum, FTNODE child, @@ -334,13 +335,13 @@ // we are just going to unpin child and // let ft_merge_child pin it again // - toku_unpin_ftnode(h, child); + toku_unpin_ftnode(ft, child); // // // it is responsibility of ft_merge_child to unlock parent // bool did_react; - ft_merge_child(h, parent, childnum, &did_react, fa); + ft_merge_child(ft, parent, childnum, &did_react, fa); } void @@ -397,7 +398,7 @@ }; static int -ctm_pick_child(FT h, +ctm_pick_child(FT ft, FTNODE parent, void* extra) { @@ -405,13 +406,8 @@ int childnum; if (parent->height == 1 && ctme->is_last_child) { childnum = parent->n_children - 1; - } - else { - childnum = toku_ftnode_which_child( - parent, - &ctme->target_key, - &h->cmp_descriptor, - h->compare_fun); + } else { + childnum = toku_ftnode_which_child(parent, &ctme->target_key, ft->cmp); } return childnum; } @@ -428,7 +424,7 @@ static void ctm_maybe_merge_child(struct flusher_advice *fa, - FT h, + FT ft, FTNODE parent, int childnum, FTNODE child, @@ -437,19 +433,19 @@ if (child->height == 0) { (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED), 1); } - default_merge_child(fa, h, parent, childnum, child, extra); + default_merge_child(fa, ft, parent, childnum, child, extra); } static void ct_maybe_merge_child(struct flusher_advice *fa, - FT h, + FT ft, FTNODE parent, int childnum, FTNODE child, void* extra) { if (child->height > 0) { - default_merge_child(fa, h, parent, childnum, child, extra); + default_merge_child(fa, ft, parent, childnum, child, extra); } else { struct ctm_extra ctme; @@ -471,8 +467,7 @@ ctme.is_last_child = false; pivot_to_save = childnum; } - const DBT *pivot = &parent->childkeys[pivot_to_save]; - toku_clone_dbt(&ctme.target_key, *pivot); + toku_clone_dbt(&ctme.target_key, parent->pivotkeys.get_pivot(pivot_to_save)); // at this point, ctme is properly setup, now we can do the merge struct flusher_advice new_fa; @@ -486,24 +481,24 @@ default_pick_child_after_split, &ctme); - toku_unpin_ftnode(h, parent); - toku_unpin_ftnode(h, child); + toku_unpin_ftnode(ft, parent); + toku_unpin_ftnode(ft, child); FTNODE root_node = NULL; { uint32_t fullhash; CACHEKEY root; - toku_calculate_root_offset_pointer(h, &root, &fullhash); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, h); - toku_pin_ftnode(h, root, fullhash, &bfe, PL_WRITE_EXPENSIVE, &root_node, true); - toku_assert_entire_node_in_memory(root_node); + toku_calculate_root_offset_pointer(ft, &root, &fullhash); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode(ft, root, fullhash, &bfe, PL_WRITE_EXPENSIVE, &root_node, true); + toku_ftnode_assert_fully_in_memory(root_node); } (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED), 1); (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1); - toku_ft_flush_some_child(h, root_node, &new_fa); + toku_ft_flush_some_child(ft, root_node, &new_fa); (void) toku_sync_fetch_and_sub(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1); @@ -545,13 +540,12 @@ // a leaf node that is not entirely in memory. If so, then // we cannot be sure if the node is reactive. // -static bool may_node_be_reactive(FT ft, FTNODE node) +static bool ft_ftnode_may_be_reactive(FT ft, FTNODE node) { if (node->height == 0) { return true; - } - else { - return (get_nonleaf_reactivity(node, ft->h->fanout) != RE_STABLE); + } else { + return toku_ftnode_get_nonleaf_reactivity(node, ft->h->fanout) != RE_STABLE; } } @@ -576,51 +570,48 @@ paranoid_invariant(node->height>0); paranoid_invariant(0 <= childnum); paranoid_invariant(childnum < node->n_children); - toku_assert_entire_node_in_memory(node); - toku_assert_entire_node_in_memory(childa); - toku_assert_entire_node_in_memory(childb); + toku_ftnode_assert_fully_in_memory(node); + toku_ftnode_assert_fully_in_memory(childa); + toku_ftnode_assert_fully_in_memory(childb); NONLEAF_CHILDINFO old_bnc = BNC(node, childnum); paranoid_invariant(toku_bnc_nbytesinbuf(old_bnc)==0); - int cnum; WHEN_NOT_GCOV( - if (toku_ft_debug_mode) { - int i; - printf("%s:%d Child %d splitting on %s\n", __FILE__, __LINE__, childnum, (char*)splitk->data); - printf("%s:%d oldsplitkeys:", __FILE__, __LINE__); - for(i=0; in_children-1; i++) printf(" %s", (char *) node->childkeys[i].data); - printf("\n"); - } - ) + if (toku_ft_debug_mode) { + printf("%s:%d Child %d splitting on %s\n", __FILE__, __LINE__, childnum, (char*)splitk->data); + printf("%s:%d oldsplitkeys:", __FILE__, __LINE__); + for(int i = 0; i < node->n_children - 1; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i).data); + printf("\n"); + } + ) node->dirty = 1; XREALLOC_N(node->n_children+1, node->bp); - XREALLOC_N(node->n_children, node->childkeys); // Slide the children over. // suppose n_children is 10 and childnum is 5, meaning node->childnum[5] just got split // this moves node->bp[6] through node->bp[9] over to // node->bp[7] through node->bp[10] - for (cnum=node->n_children; cnum>childnum+1; cnum--) { + for (int cnum=node->n_children; cnum>childnum+1; cnum--) { node->bp[cnum] = node->bp[cnum-1]; } memset(&node->bp[childnum+1],0,sizeof(node->bp[0])); node->n_children++; - paranoid_invariant(BP_BLOCKNUM(node, childnum).b==childa->thisnodename.b); // use the same child + paranoid_invariant(BP_BLOCKNUM(node, childnum).b==childa->blocknum.b); // use the same child // We never set the rightmost blocknum to be the root. // Instead, we wait for the root to split and let promotion initialize the rightmost // blocknum to be the first non-root leaf node on the right extreme to recieve an insert. invariant(ft->h->root_blocknum.b != ft->rightmost_blocknum.b); - if (childa->thisnodename.b == ft->rightmost_blocknum.b) { + if (childa->blocknum.b == ft->rightmost_blocknum.b) { // The rightmost leaf (a) split into (a) and (b). We want (b) to swap pair values // with (a), now that it is the new rightmost leaf. This keeps the rightmost blocknum // constant, the same the way we keep the root blocknum constant. toku_ftnode_swap_pair_values(childa, childb); - BP_BLOCKNUM(node, childnum) = childa->thisnodename; + BP_BLOCKNUM(node, childnum) = childa->blocknum; } - BP_BLOCKNUM(node, childnum+1) = childb->thisnodename; + BP_BLOCKNUM(node, childnum+1) = childb->blocknum; BP_WORKDONE(node, childnum+1) = 0; BP_STATE(node,childnum+1) = PT_AVAIL; @@ -633,29 +624,21 @@ } set_BNC(node, childnum+1, new_bnc); - // Slide the keys over - { - for (cnum=node->n_children-2; cnum>childnum; cnum--) { - toku_copy_dbt(&node->childkeys[cnum], node->childkeys[cnum-1]); - } - //if (logger) assert((t->flags&TOKU_DB_DUPSORT)==0); // the setpivot is wrong for TOKU_DB_DUPSORT, so recovery will be broken. - toku_copy_dbt(&node->childkeys[childnum], *splitk); - node->totalchildkeylens += splitk->size; - } + // Insert the new split key , sliding the other keys over + node->pivotkeys.insert_at(splitk, childnum); WHEN_NOT_GCOV( - if (toku_ft_debug_mode) { - int i; - printf("%s:%d splitkeys:", __FILE__, __LINE__); - for(i=0; in_children-2; i++) printf(" %s", (char*)node->childkeys[i].data); - printf("\n"); - } - ) + if (toku_ft_debug_mode) { + printf("%s:%d splitkeys:", __FILE__, __LINE__); + for (int i = 0; i < node->n_children - 2; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i).data); + printf("\n"); + } + ) /* Keep pushing to the children, but not if the children would require a pushdown */ - toku_assert_entire_node_in_memory(node); - toku_assert_entire_node_in_memory(childa); - toku_assert_entire_node_in_memory(childb); + toku_ftnode_assert_fully_in_memory(node); + toku_ftnode_assert_fully_in_memory(childa); + toku_ftnode_assert_fully_in_memory(childb); VERIFY_NODE(t, node); VERIFY_NODE(t, childa); @@ -680,7 +663,7 @@ // Effect: get the disk size of a leafentry { paranoid_invariant(node->height == 0); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); uint64_t retval = 0; for (int i = 0; i < node->n_children; i++) { retval += BLB_DATA(node, i)->get_disk_size(); @@ -771,8 +754,8 @@ static void ftnode_finalize_split(FTNODE node, FTNODE B, MSN max_msn_applied_to_node) { // Effect: Finalizes a split by updating some bits and dirtying both nodes - toku_assert_entire_node_in_memory(node); - toku_assert_entire_node_in_memory(B); + toku_ftnode_assert_fully_in_memory(node); + toku_ftnode_assert_fully_in_memory(B); verify_all_in_mempool(node); verify_all_in_mempool(B); @@ -788,7 +771,7 @@ void ftleaf_split( - FT h, + FT ft, FTNODE node, FTNODE *nodea, FTNODE *nodeb, @@ -837,7 +820,7 @@ // So, we must call this before evaluating // those two values cachetable_put_empty_node_with_dep_nodes( - h, + ft, num_dependent_nodes, dependent_nodes, &name, @@ -851,7 +834,7 @@ paranoid_invariant(node->height==0); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); verify_all_in_mempool(node); MSN max_msn_applied_to_node = node->max_msn_applied_to_node_on_disk; @@ -893,13 +876,12 @@ name, 0, num_children_in_b, - h->h->layout_version, - h->h->flags); + ft->h->layout_version, + ft->h->flags); B->fullhash = fullhash; } else { B = *nodeb; - REALLOC_N(num_children_in_b-1, B->childkeys); REALLOC_N(num_children_in_b, B->bp); B->n_children = num_children_in_b; for (int i = 0; i < num_children_in_b; i++) { @@ -951,20 +933,10 @@ // the child index in the original node that corresponds to the // first node in the right node of the split - int base_index = num_left_bns - (split_on_boundary ? 0 : 1); - // make pivots in B - for (int i=0; i < num_children_in_b-1; i++) { - toku_copy_dbt(&B->childkeys[i], node->childkeys[i+base_index]); - B->totalchildkeylens += node->childkeys[i+base_index].size; - node->totalchildkeylens -= node->childkeys[i+base_index].size; - toku_init_dbt(&node->childkeys[i+base_index]); - } - if (split_on_boundary && num_left_bns < node->n_children) { - if (splitk) { - toku_copy_dbt(splitk, node->childkeys[num_left_bns - 1]); - } else { - toku_destroy_dbt(&node->childkeys[num_left_bns - 1]); - } + int split_idx = num_left_bns - (split_on_boundary ? 0 : 1); + node->pivotkeys.split_at(split_idx, &B->pivotkeys); + if (split_on_boundary && num_left_bns < node->n_children && splitk) { + toku_copyref_dbt(splitk, node->pivotkeys.get_pivot(num_left_bns - 1)); } else if (splitk) { bn_data* bd = BLB_DATA(node, num_left_bns - 1); uint32_t keylen; @@ -976,7 +948,6 @@ node->n_children = num_children_in_node; REALLOC_N(num_children_in_node, node->bp); - REALLOC_N(num_children_in_node-1, node->childkeys); } ftnode_finalize_split(node, B, max_msn_applied_to_node); @@ -986,7 +957,7 @@ void ft_nonleaf_split( - FT h, + FT ft, FTNODE node, FTNODE *nodea, FTNODE *nodeb, @@ -996,7 +967,7 @@ { //VERIFY_NODE(t,node); STATUS_VALUE(FT_FLUSHER_SPLIT_NONLEAF)++; - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); int old_n_children = node->n_children; int n_children_in_a = old_n_children/2; int n_children_in_b = old_n_children-n_children_in_a; @@ -1004,14 +975,12 @@ FTNODE B; paranoid_invariant(node->height>0); paranoid_invariant(node->n_children>=2); // Otherwise, how do we split? We need at least two children to split. */ - create_new_ftnode_with_dep_nodes(h, &B, node->height, n_children_in_b, num_dependent_nodes, dependent_nodes); + create_new_ftnode_with_dep_nodes(ft, &B, node->height, n_children_in_b, num_dependent_nodes, dependent_nodes); { /* The first n_children_in_a go into node a. * That means that the first n_children_in_a-1 keys go into node a. * The splitter key is key number n_children_in_a */ - int i; - - for (i=n_children_in_a; ibp[targchild] = node->bp[i]; memset(&node->bp[i], 0, sizeof(node->bp[0])); - - // Delete a child, removing the preceeding pivot key. The child number must be > 0 - { - paranoid_invariant(i>0); - if (i>n_children_in_a) { - toku_copy_dbt(&B->childkeys[targchild-1], node->childkeys[i-1]); - B->totalchildkeylens += node->childkeys[i-1].size; - node->totalchildkeylens -= node->childkeys[i-1].size; - toku_init_dbt(&node->childkeys[i-1]); - } - } } - node->n_children=n_children_in_a; + // the split key for our parent is the rightmost pivot key in node + node->pivotkeys.split_at(n_children_in_a, &B->pivotkeys); + toku_clone_dbt(splitk, node->pivotkeys.get_pivot(n_children_in_a - 1)); + node->pivotkeys.delete_at(n_children_in_a - 1); - toku_copy_dbt(splitk, node->childkeys[n_children_in_a-1]); - node->totalchildkeylens -= node->childkeys[n_children_in_a-1].size; - - REALLOC_N(n_children_in_a, node->bp); - REALLOC_N(n_children_in_a-1, node->childkeys); + node->n_children = n_children_in_a; + REALLOC_N(node->n_children, node->bp); } ftnode_finalize_split(node, B, max_msn_applied_to_node); @@ -1060,7 +1018,7 @@ // static void ft_split_child( - FT h, + FT ft, FTNODE node, int childnum, FTNODE child, @@ -1079,12 +1037,12 @@ dep_nodes[0] = node; dep_nodes[1] = child; if (child->height==0) { - ftleaf_split(h, child, &nodea, &nodeb, &splitk, true, split_mode, 2, dep_nodes); + ftleaf_split(ft, child, &nodea, &nodeb, &splitk, true, split_mode, 2, dep_nodes); } else { - ft_nonleaf_split(h, child, &nodea, &nodeb, &splitk, 2, dep_nodes); + ft_nonleaf_split(ft, child, &nodea, &nodeb, &splitk, 2, dep_nodes); } // printf("%s:%d child did split\n", __FILE__, __LINE__); - handle_split_of_child (h, node, childnum, nodea, nodeb, &splitk); + handle_split_of_child (ft, node, childnum, nodea, nodeb, &splitk); // for test call_flusher_thread_callback(flt_flush_during_split); @@ -1093,42 +1051,44 @@ // now we need to unlock node, // and possibly continue // flushing one of the children - int picked_child = fa->pick_child_after_split(h, node, childnum, childnum + 1, fa->extra); - toku_unpin_ftnode(h, node); + int picked_child = fa->pick_child_after_split(ft, node, childnum, childnum + 1, fa->extra); + toku_unpin_ftnode(ft, node); if (picked_child == childnum || (picked_child < 0 && nodea->height > 0 && fa->should_recursively_flush(nodea, fa->extra))) { - toku_unpin_ftnode(h, nodeb); - toku_ft_flush_some_child(h, nodea, fa); + toku_unpin_ftnode(ft, nodeb); + toku_ft_flush_some_child(ft, nodea, fa); } else if (picked_child == childnum + 1 || (picked_child < 0 && nodeb->height > 0 && fa->should_recursively_flush(nodeb, fa->extra))) { - toku_unpin_ftnode(h, nodea); - toku_ft_flush_some_child(h, nodeb, fa); + toku_unpin_ftnode(ft, nodea); + toku_ft_flush_some_child(ft, nodeb, fa); } else { - toku_unpin_ftnode(h, nodea); - toku_unpin_ftnode(h, nodeb); + toku_unpin_ftnode(ft, nodea); + toku_unpin_ftnode(ft, nodeb); } + + toku_destroy_dbt(&splitk); } static void bring_node_fully_into_memory(FTNODE node, FT ft) { - if (!is_entire_node_in_memory(node)) { - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); + if (!toku_ftnode_fully_in_memory(node)) { + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); toku_cachetable_pf_pinned_pair( node, toku_ftnode_pf_callback, &bfe, ft->cf, - node->thisnodename, - toku_cachetable_hash(ft->cf, node->thisnodename) + node->blocknum, + toku_cachetable_hash(ft->cf, node->blocknum) ); } } static void flush_this_child( - FT h, + FT ft, FTNODE node, FTNODE child, int childnum, @@ -1136,14 +1096,14 @@ // Effect: Push everything in the CHILDNUMth buffer of node down into the child. { update_flush_status(child, 0); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); if (fa->should_destroy_basement_nodes(fa)) { - maybe_destroy_child_blbs(node, child, h); + maybe_destroy_child_blbs(node, child, ft); } - bring_node_fully_into_memory(child, h); - toku_assert_entire_node_in_memory(child); + bring_node_fully_into_memory(child, ft); + toku_ftnode_assert_fully_in_memory(child); paranoid_invariant(node->height>0); - paranoid_invariant(child->thisnodename.b!=0); + paranoid_invariant(child->blocknum.b!=0); // VERIFY_NODE does not work off client thread as of now //VERIFY_NODE(t, child); node->dirty = 1; @@ -1155,7 +1115,7 @@ // now we have a bnc to flush to the child. pass down the parent's // oldest known referenced xid as we flush down to the child. - toku_bnc_flush_to_child(h, bnc, child, node->oldest_referenced_xid_known); + toku_bnc_flush_to_child(ft, bnc, child, node->oldest_referenced_xid_known); destroy_nonleaf_childinfo(bnc); } @@ -1163,8 +1123,8 @@ merge_leaf_nodes(FTNODE a, FTNODE b) { STATUS_VALUE(FT_FLUSHER_MERGE_LEAF)++; - toku_assert_entire_node_in_memory(a); - toku_assert_entire_node_in_memory(b); + toku_ftnode_assert_fully_in_memory(a); + toku_ftnode_assert_fully_in_memory(b); paranoid_invariant(a->height == 0); paranoid_invariant(b->height == 0); paranoid_invariant(a->n_children > 0); @@ -1186,52 +1146,47 @@ // of a gets eliminated because we do not have a pivot to store for it (because it has no elements) const bool a_has_tail = a_last_bd->num_klpairs() > 0; - // move each basement node from b to a - // move the pivots, adding one of what used to be max(a) - // move the estimates int num_children = a->n_children + b->n_children; if (!a_has_tail) { - uint lastchild = a->n_children-1; + int lastchild = a->n_children - 1; BASEMENTNODE bn = BLB(a, lastchild); - { - // verify that last basement in a is empty, then destroy mempool - size_t used_space = a_last_bd->get_disk_size(); - invariant_zero(used_space); - } + + // verify that last basement in a is empty, then destroy mempool + size_t used_space = a_last_bd->get_disk_size(); + invariant_zero(used_space); destroy_basement_node(bn); - set_BNULL(a, a->n_children-1); + set_BNULL(a, lastchild); num_children--; - } - - //realloc pivots and basement nodes in a - REALLOC_N(num_children, a->bp); - REALLOC_N(num_children-1, a->childkeys); - - // fill in pivot for what used to be max of node 'a', if it is needed - if (a_has_tail) { + if (lastchild < a->pivotkeys.num_pivots()) { + a->pivotkeys.delete_at(lastchild); + } + } else { + // fill in pivot for what used to be max of node 'a', if it is needed uint32_t keylen; void *key; - int rr = a_last_bd->fetch_key_and_len(a_last_bd->num_klpairs() - 1, &keylen, &key); - invariant_zero(rr); - toku_memdup_dbt(&a->childkeys[a->n_children-1], key, keylen); - a->totalchildkeylens += keylen; + int r = a_last_bd->fetch_key_and_len(a_last_bd->num_klpairs() - 1, &keylen, &key); + invariant_zero(r); + DBT pivotkey; + toku_fill_dbt(&pivotkey, key, keylen); + a->pivotkeys.replace_at(&pivotkey, a->n_children - 1); } + // realloc basement nodes in `a' + REALLOC_N(num_children, a->bp); + + // move each basement node from b to a uint32_t offset = a_has_tail ? a->n_children : a->n_children - 1; for (int i = 0; i < b->n_children; i++) { - a->bp[i+offset] = b->bp[i]; - memset(&b->bp[i],0,sizeof(b->bp[0])); - if (i < (b->n_children-1)) { - toku_copy_dbt(&a->childkeys[i+offset], b->childkeys[i]); - toku_init_dbt(&b->childkeys[i]); - } + a->bp[i + offset] = b->bp[i]; + memset(&b->bp[i], 0, sizeof(b->bp[0])); } - a->totalchildkeylens += b->totalchildkeylens; - a->n_children = num_children; + + // append b's pivots to a's pivots + a->pivotkeys.append(b->pivotkeys); // now that all the data has been moved from b to a, we can destroy the data in b - // b can remain untouched, as it will be destroyed later - b->totalchildkeylens = 0; + a->n_children = num_children; + b->pivotkeys.destroy(); b->n_children = 0; } @@ -1255,7 +1210,7 @@ maybe_merge_pinned_leaf_nodes( FTNODE a, FTNODE b, - DBT *parent_splitk, + const DBT *parent_splitk, bool *did_merge, bool *did_rebalance, DBT *splitk, @@ -1268,7 +1223,7 @@ { unsigned int sizea = toku_serialize_ftnode_size(a); unsigned int sizeb = toku_serialize_ftnode_size(b); - uint32_t num_leafentries = get_leaf_num_entries(a) + get_leaf_num_entries(b); + uint32_t num_leafentries = toku_ftnode_leaf_num_entries(a) + toku_ftnode_leaf_num_entries(b); if (num_leafentries > 1 && (sizea + sizeb)*4 > (nodesize*3)) { // the combined size is more than 3/4 of a node, so don't merge them. *did_merge = false; @@ -1279,7 +1234,6 @@ return; } // one is less than 1/4 of a node, and together they are more than 3/4 of a node. - toku_destroy_dbt(parent_splitk); // We don't need the parent_splitk any more. If we need a splitk (if we don't merge) we'll malloc a new one. *did_rebalance = true; balance_leaf_nodes(a, b, splitk); } else { @@ -1287,7 +1241,6 @@ *did_merge = true; *did_rebalance = false; toku_init_dbt(splitk); - toku_destroy_dbt(parent_splitk); // if we are merging, the splitk gets freed. merge_leaf_nodes(a, b); } } @@ -1301,28 +1254,20 @@ bool *did_rebalance, DBT *splitk) { - toku_assert_entire_node_in_memory(a); - toku_assert_entire_node_in_memory(b); - paranoid_invariant(parent_splitk->data); + toku_ftnode_assert_fully_in_memory(a); + toku_ftnode_assert_fully_in_memory(b); + invariant_notnull(parent_splitk->data); + int old_n_children = a->n_children; int new_n_children = old_n_children + b->n_children; + XREALLOC_N(new_n_children, a->bp); - memcpy(a->bp + old_n_children, - b->bp, - b->n_children*sizeof(b->bp[0])); - memset(b->bp,0,b->n_children*sizeof(b->bp[0])); - - XREALLOC_N(new_n_children-1, a->childkeys); - toku_copy_dbt(&a->childkeys[old_n_children-1], *parent_splitk); - a->totalchildkeylens += parent_splitk->size; - for (int i = 0; i < b->n_children - 1; ++i) { - toku_copy_dbt(&a->childkeys[old_n_children + i], b->childkeys[i]); - a->totalchildkeylens += b->childkeys[i].size; - toku_init_dbt(&b->childkeys[i]); - } - a->n_children = new_n_children; + memcpy(a->bp + old_n_children, b->bp, b->n_children * sizeof(b->bp[0])); + memset(b->bp, 0, b->n_children * sizeof(b->bp[0])); - b->totalchildkeylens = 0; + a->pivotkeys.insert_at(parent_splitk, old_n_children - 1); + a->pivotkeys.append(b->pivotkeys); + a->n_children = new_n_children; b->n_children = 0; a->dirty = 1; @@ -1338,7 +1283,7 @@ static void maybe_merge_pinned_nodes( FTNODE parent, - DBT *parent_splitk, + const DBT *parent_splitk, FTNODE a, FTNODE b, bool *did_merge, @@ -1366,9 +1311,9 @@ { MSN msn_max; paranoid_invariant(a->height == b->height); - toku_assert_entire_node_in_memory(parent); - toku_assert_entire_node_in_memory(a); - toku_assert_entire_node_in_memory(b); + toku_ftnode_assert_fully_in_memory(parent); + toku_ftnode_assert_fully_in_memory(a); + toku_ftnode_assert_fully_in_memory(b); parent->dirty = 1; // just to make sure { MSN msna = a->max_msn_applied_to_node_on_disk; @@ -1389,13 +1334,9 @@ } } -static void merge_remove_key_callback( - BLOCKNUM *bp, - bool for_checkpoint, - void *extra) -{ - FT h = (FT) extra; - toku_free_blocknum(h->blocktable, bp, h, for_checkpoint); +static void merge_remove_key_callback(BLOCKNUM *bp, bool for_checkpoint, void *extra) { + FT ft = (FT) extra; + ft->blocktable.free_blocknum(bp, ft, for_checkpoint); } // @@ -1404,7 +1345,7 @@ // static void ft_merge_child( - FT h, + FT ft, FTNODE node, int childnum_to_merge, bool *did_react, @@ -1413,7 +1354,7 @@ // this function should not be called // if the child is not mergable paranoid_invariant(node->n_children > 1); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); int childnuma,childnumb; if (childnum_to_merge > 0) { @@ -1435,10 +1376,10 @@ FTNODE childa, childb; { - uint32_t childfullhash = compute_child_fullhash(h->cf, node, childnuma); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, h); - toku_pin_ftnode_with_dep_nodes(h, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &node, &childa, true); + uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnuma); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &node, &childa, true); } // for test call_flusher_thread_callback(flt_flush_before_pin_second_node_for_merge); @@ -1446,17 +1387,17 @@ FTNODE dep_nodes[2]; dep_nodes[0] = node; dep_nodes[1] = childa; - uint32_t childfullhash = compute_child_fullhash(h->cf, node, childnumb); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, h); - toku_pin_ftnode_with_dep_nodes(h, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 2, dep_nodes, &childb, true); + uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnumb); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 2, dep_nodes, &childb, true); } if (toku_bnc_n_entries(BNC(node,childnuma))>0) { - flush_this_child(h, node, childa, childnuma, fa); + flush_this_child(ft, node, childa, childnuma, fa); } if (toku_bnc_n_entries(BNC(node,childnumb))>0) { - flush_this_child(h, node, childb, childnumb, fa); + flush_this_child(ft, node, childb, childnumb, fa); } // now we have both children pinned in main memory, and cachetable locked, @@ -1466,26 +1407,14 @@ { DBT splitk; toku_init_dbt(&splitk); - DBT *old_split_key = &node->childkeys[childnuma]; - unsigned int deleted_size = old_split_key->size; - maybe_merge_pinned_nodes(node, &node->childkeys[childnuma], childa, childb, &did_merge, &did_rebalance, &splitk, h->h->nodesize); - if (childa->height>0) { - for (int i=0; i+1n_children; i++) { - paranoid_invariant(childa->childkeys[i].data); - } - } + const DBT old_split_key = node->pivotkeys.get_pivot(childnuma); + maybe_merge_pinned_nodes(node, &old_split_key, childa, childb, &did_merge, &did_rebalance, &splitk, ft->h->nodesize); //toku_verify_estimates(t,childa); // the tree did react if a merge (did_merge) or rebalance (new spkit key) occurred *did_react = (bool)(did_merge || did_rebalance); - if (did_merge) { - paranoid_invariant(!splitk.data); - } else { - paranoid_invariant(splitk.data); - } - - node->totalchildkeylens -= deleted_size; // The key was free()'d inside the maybe_merge_pinned_nodes. if (did_merge) { + invariant_null(splitk.data); NONLEAF_CHILDINFO remaining_bnc = BNC(node, childnuma); NONLEAF_CHILDINFO merged_bnc = BNC(node, childnumb); for (unsigned int i = 0; i < (sizeof remaining_bnc->flow) / (sizeof remaining_bnc->flow[0]); ++i) { @@ -1498,19 +1427,16 @@ &node->bp[childnumb+1], (node->n_children-childnumb)*sizeof(node->bp[0])); REALLOC_N(node->n_children, node->bp); - memmove(&node->childkeys[childnuma], - &node->childkeys[childnuma+1], - (node->n_children-childnumb)*sizeof(node->childkeys[0])); - REALLOC_N(node->n_children-1, node->childkeys); + node->pivotkeys.delete_at(childnuma); // Handle a merge of the rightmost leaf node. - if (did_merge && childb->thisnodename.b == h->rightmost_blocknum.b) { - invariant(childb->thisnodename.b != h->h->root_blocknum.b); + if (did_merge && childb->blocknum.b == ft->rightmost_blocknum.b) { + invariant(childb->blocknum.b != ft->h->root_blocknum.b); toku_ftnode_swap_pair_values(childa, childb); - BP_BLOCKNUM(node, childnuma) = childa->thisnodename; + BP_BLOCKNUM(node, childnuma) = childa->blocknum; } - paranoid_invariant(BP_BLOCKNUM(node, childnuma).b == childa->thisnodename.b); + paranoid_invariant(BP_BLOCKNUM(node, childnuma).b == childa->blocknum.b); childa->dirty = 1; // just to make sure childb->dirty = 1; // just to make sure } else { @@ -1519,10 +1445,11 @@ // pretty far down the tree) // If we didn't merge the nodes, then we need the correct pivot. - toku_copy_dbt(&node->childkeys[childnuma], splitk); - node->totalchildkeylens += node->childkeys[childnuma].size; + invariant_notnull(splitk.data); + node->pivotkeys.replace_at(&splitk, childnuma); node->dirty = 1; } + toku_destroy_dbt(&splitk); } // // now we possibly flush the children @@ -1533,10 +1460,10 @@ // merge_remove_key_callback will free the blocknum int rrb = toku_cachetable_unpin_and_remove( - h->cf, + ft->cf, childb->ct_pair, merge_remove_key_callback, - h + ft ); assert_zero(rrb); @@ -1545,7 +1472,7 @@ // unlock the parent paranoid_invariant(node->dirty); - toku_unpin_ftnode(h, node); + toku_unpin_ftnode(ft, node); } else { // for test @@ -1553,14 +1480,14 @@ // unlock the parent paranoid_invariant(node->dirty); - toku_unpin_ftnode(h, node); - toku_unpin_ftnode(h, childb); + toku_unpin_ftnode(ft, node); + toku_unpin_ftnode(ft, childb); } if (childa->height > 0 && fa->should_recursively_flush(childa, fa->extra)) { - toku_ft_flush_some_child(h, childa, fa); + toku_ft_flush_some_child(ft, childa, fa); } else { - toku_unpin_ftnode(h, childa); + toku_unpin_ftnode(ft, childa); } } @@ -1577,7 +1504,7 @@ int dirtied = 0; NONLEAF_CHILDINFO bnc = NULL; paranoid_invariant(parent->height>0); - toku_assert_entire_node_in_memory(parent); + toku_ftnode_assert_fully_in_memory(parent); TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known; // pick the child we want to flush to @@ -1588,13 +1515,13 @@ // get the child into memory BLOCKNUM targetchild = BP_BLOCKNUM(parent, childnum); - toku_verify_blocknum_allocated(ft->blocktable, targetchild); + ft->blocktable.verify_blocknum_allocated(targetchild); uint32_t childfullhash = compute_child_fullhash(ft->cf, parent, childnum); FTNODE child; - struct ftnode_fetch_extra bfe; + ftnode_fetch_extra bfe; // Note that we don't read the entire node into memory yet. // The idea is let's try to do the minimum work before releasing the parent lock - fill_bfe_for_min_read(&bfe, ft); + bfe.create_for_min_read(ft); toku_pin_ftnode_with_dep_nodes(ft, targetchild, childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &parent, &child, true); // for test @@ -1608,9 +1535,9 @@ // Let's do a quick check to see if the child may be reactive // If the child cannot be reactive, then we can safely unlock // the parent before finishing reading in the entire child node. - bool may_child_be_reactive = may_node_be_reactive(ft, child); + bool may_child_be_reactive = ft_ftnode_may_be_reactive(ft, child); - paranoid_invariant(child->thisnodename.b!=0); + paranoid_invariant(child->blocknum.b!=0); // only do the following work if there is a flush to perform if (toku_bnc_n_entries(BNC(parent, childnum)) > 0 || parent->height == 1) { @@ -1649,7 +1576,7 @@ // we wont be splitting/merging child // and we have already replaced the bnc // for the root with a fresh one - enum reactivity child_re = get_node_reactivity(ft, child); + enum reactivity child_re = toku_ftnode_get_reactivity(ft, child); if (parent && child_re == RE_STABLE) { toku_unpin_ftnode(ft, parent); parent = NULL; @@ -1679,7 +1606,7 @@ // let's get the reactivity of the child again, // it is possible that the flush got rid of some values // and now the parent is no longer reactive - child_re = get_node_reactivity(ft, child); + child_re = toku_ftnode_get_reactivity(ft, child); // if the parent has been unpinned above, then // this is our only option, even if the child is not stable // if the child is not stable, we'll handle it the next @@ -1724,6 +1651,78 @@ } } +void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known) { + paranoid_invariant(bnc); + + TOKULOGGER logger = toku_cachefile_logger(ft->cf); + TXN_MANAGER txn_manager = logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr; + TXNID oldest_referenced_xid_for_simple_gc = TXNID_NONE; + + txn_manager_state txn_state_for_gc(txn_manager); + bool do_garbage_collection = child->height == 0 && txn_manager != nullptr; + if (do_garbage_collection) { + txn_state_for_gc.init(); + oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager); + } + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_for_simple_gc, + child->oldest_referenced_xid_known, + true); + struct flush_msg_fn { + FT ft; + FTNODE child; + NONLEAF_CHILDINFO bnc; + txn_gc_info *gc_info; + + STAT64INFO_S stats_delta; + size_t remaining_memsize = bnc->msg_buffer.buffer_size_in_use(); + + flush_msg_fn(FT t, FTNODE n, NONLEAF_CHILDINFO nl, txn_gc_info *g) : + ft(t), child(n), bnc(nl), gc_info(g), remaining_memsize(bnc->msg_buffer.buffer_size_in_use()) { + stats_delta = { 0, 0 }; + } + int operator()(const ft_msg &msg, bool is_fresh) { + size_t flow_deltas[] = { 0, 0 }; + size_t memsize_in_buffer = message_buffer::msg_memsize_in_buffer(msg); + if (remaining_memsize <= bnc->flow[0]) { + // this message is in the current checkpoint's worth of + // the end of the message buffer + flow_deltas[0] = memsize_in_buffer; + } else if (remaining_memsize <= bnc->flow[0] + bnc->flow[1]) { + // this message is in the last checkpoint's worth of the + // end of the message buffer + flow_deltas[1] = memsize_in_buffer; + } + toku_ftnode_put_msg( + ft->cmp, + ft->update_fun, + child, + -1, + msg, + is_fresh, + gc_info, + flow_deltas, + &stats_delta + ); + remaining_memsize -= memsize_in_buffer; + return 0; + } + } flush_fn(ft, child, bnc, &gc_info); + bnc->msg_buffer.iterate(flush_fn); + + child->oldest_referenced_xid_known = parent_oldest_referenced_xid_known; + + invariant(flush_fn.remaining_memsize == 0); + if (flush_fn.stats_delta.numbytes || flush_fn.stats_delta.numrows) { + toku_ft_update_stats(&ft->in_memory_stats, flush_fn.stats_delta); + } + if (do_garbage_collection) { + size_t buffsize = bnc->msg_buffer.buffer_size_in_use(); + // may be misleading if there's a broadcast message in there + toku_ft_status_note_msg_bytes_out(buffsize); + } +} + static void update_cleaner_status( FTNODE node, @@ -1841,11 +1840,11 @@ void *extraargs) { FTNODE node = (FTNODE) ftnode_pv; - invariant(node->thisnodename.b == blocknum.b); + invariant(node->blocknum.b == blocknum.b); invariant(node->fullhash == fullhash); invariant(node->height > 0); // we should never pick a leaf node (for now at least) - FT h = (FT) extraargs; - bring_node_fully_into_memory(node, h); + FT ft = (FT) extraargs; + bring_node_fully_into_memory(node, ft); int childnum = find_heaviest_child(node); update_cleaner_status(node, childnum); @@ -1853,16 +1852,16 @@ if (toku_bnc_nbytesinbuf(BNC(node, childnum)) > 0) { struct flusher_advice fa; struct flush_status_update_extra fste; - ct_flusher_advice_init(&fa, &fste, h->h->nodesize); - toku_ft_flush_some_child(h, node, &fa); + ct_flusher_advice_init(&fa, &fste, ft->h->nodesize); + toku_ft_flush_some_child(ft, node, &fa); } else { - toku_unpin_ftnode(h, node); + toku_unpin_ftnode(ft, node); } return 0; } struct flusher_extra { - FT h; + FT ft; FTNODE node; NONLEAF_CHILDINFO bnc; TXNID parent_oldest_referenced_xid_known; @@ -1887,12 +1886,12 @@ // destroyed its basement nodes if necessary, so we now need to either // read them back in, or just do the regular partial fetch. If we // don't, that means fe->node is a parent, so we need to do this anyway. - bring_node_fully_into_memory(fe->node,fe->h); + bring_node_fully_into_memory(fe->node,fe->ft); fe->node->dirty = 1; struct flusher_advice fa; struct flush_status_update_extra fste; - flt_flusher_advice_init(&fa, &fste, fe->h->h->nodesize); + flt_flusher_advice_init(&fa, &fste, fe->ft->h->nodesize); if (fe->bnc) { // In this case, we have a bnc to flush to a node @@ -1901,7 +1900,7 @@ call_flusher_thread_callback(flt_flush_before_applying_inbox); toku_bnc_flush_to_child( - fe->h, + fe->ft, fe->bnc, fe->node, fe->parent_oldest_referenced_xid_known @@ -1912,11 +1911,11 @@ // If so, call toku_ft_flush_some_child on the node (because this flush intends to // pass a meaningful oldest referenced xid for simple garbage collection), and it is the // responsibility of the flush to unlock the node. otherwise, we unlock it here. - if (fe->node->height > 0 && toku_ft_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) { - toku_ft_flush_some_child(fe->h, fe->node, &fa); + if (fe->node->height > 0 && toku_ftnode_nonleaf_is_gorged(fe->node, fe->ft->h->nodesize)) { + toku_ft_flush_some_child(fe->ft, fe->node, &fa); } else { - toku_unpin_ftnode(fe->h,fe->node); + toku_unpin_ftnode(fe->ft,fe->node); } } else { @@ -1924,25 +1923,25 @@ // bnc, which means we are tasked with flushing some // buffer in the node. // It is the responsibility of flush some child to unlock the node - toku_ft_flush_some_child(fe->h, fe->node, &fa); + toku_ft_flush_some_child(fe->ft, fe->node, &fa); } - remove_background_job_from_cf(fe->h->cf); + remove_background_job_from_cf(fe->ft->cf); toku_free(fe); } static void place_node_and_bnc_on_background_thread( - FT h, + FT ft, FTNODE node, NONLEAF_CHILDINFO bnc, TXNID parent_oldest_referenced_xid_known) { struct flusher_extra *XMALLOC(fe); - fe->h = h; + fe->ft = ft; fe->node = node; fe->bnc = bnc; fe->parent_oldest_referenced_xid_known = parent_oldest_referenced_xid_known; - cachefile_kibbutz_enq(h->cf, flush_node_fun, fe); + cachefile_kibbutz_enq(ft->cf, flush_node_fun, fe); } // @@ -1958,7 +1957,7 @@ // child needs to be split/merged), then we place the parent on the background thread. // The parent will be unlocked on the background thread // -void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent) +void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent) { toku::context flush_ctx(CTX_FLUSH); TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known; @@ -1972,24 +1971,24 @@ // see if we can pin the child // FTNODE child; - uint32_t childfullhash = compute_child_fullhash(h->cf, parent, childnum); - int r = toku_maybe_pin_ftnode_clean(h, BP_BLOCKNUM(parent, childnum), childfullhash, PL_WRITE_EXPENSIVE, &child); + uint32_t childfullhash = compute_child_fullhash(ft->cf, parent, childnum); + int r = toku_maybe_pin_ftnode_clean(ft, BP_BLOCKNUM(parent, childnum), childfullhash, PL_WRITE_EXPENSIVE, &child); if (r != 0) { // In this case, we could not lock the child, so just place the parent on the background thread // In the callback, we will use toku_ft_flush_some_child, which checks to // see if we should blow away the old basement nodes. - place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known); + place_node_and_bnc_on_background_thread(ft, parent, NULL, parent_oldest_referenced_xid_known); } else { // // successfully locked child // - bool may_child_be_reactive = may_node_be_reactive(h, child); + bool may_child_be_reactive = ft_ftnode_may_be_reactive(ft, child); if (!may_child_be_reactive) { // We're going to unpin the parent, so before we do, we must // check to see if we need to blow away the basement nodes to // keep the MSN invariants intact. - maybe_destroy_child_blbs(parent, child, h); + maybe_destroy_child_blbs(parent, child, ft); // // can detach buffer and unpin root here @@ -2007,17 +2006,17 @@ // so, because we know for sure the child is not // reactive, we can unpin the parent // - place_node_and_bnc_on_background_thread(h, child, bnc, parent_oldest_referenced_xid_known); - toku_unpin_ftnode(h, parent); + place_node_and_bnc_on_background_thread(ft, child, bnc, parent_oldest_referenced_xid_known); + toku_unpin_ftnode(ft, parent); } else { // because the child may be reactive, we need to // put parent on background thread. // As a result, we unlock the child here. - toku_unpin_ftnode(h, child); + toku_unpin_ftnode(ft, child); // Again, we'll have the parent on the background thread, so // we don't need to destroy the basement nodes yet. - place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known); + place_node_and_bnc_on_background_thread(ft, parent, NULL, parent_oldest_referenced_xid_known); } } } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-flusher.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-flusher.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-flusher.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-flusher.h 2014-10-08 13:19:51.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_FLUSHER_H -#define FT_FLUSHER_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,11 +86,12 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// This must be first to make the 64-bit file mode work right in Linux -#include "fttypes.h" +#include "ft/ft-internal.h" typedef enum { FT_FLUSHER_CLEANER_TOTAL_NODES = 0, // total number of nodes whose buffers are potentially flushed by cleaner thread @@ -152,10 +151,31 @@ * Puts a workitem on the flusher thread queue, scheduling the node to be * flushed by toku_ft_flush_some_child. */ -void -toku_ft_flush_node_on_background_thread( +void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent); + +enum split_mode { + SPLIT_EVENLY, + SPLIT_LEFT_HEAVY, + SPLIT_RIGHT_HEAVY +}; + + +// Given pinned node and pinned child, split child into two +// and update node with information about its new child. +void toku_ft_split_child( FT ft, - FTNODE parent + FTNODE node, + int childnum, + FTNODE child, + enum split_mode split_mode + ); + +// Given pinned node, merge childnum with a neighbor and update node with +// information about the change +void toku_ft_merge_child( + FT ft, + FTNODE node, + int childnum ); /** @@ -166,9 +186,10 @@ * nodea is the left node that results from the split * splitk is the right-most key of nodea */ +// TODO: Rename toku_ft_leaf_split void ftleaf_split( - FT h, + FT ft, FTNODE node, FTNODE *nodea, FTNODE *nodeb, @@ -189,8 +210,9 @@ * but it does not guarantee that the resulting nodes are smaller than nodesize. */ void +// TODO: Rename toku_ft_nonleaf_split ft_nonleaf_split( - FT h, + FT ft, FTNODE node, FTNODE *nodea, FTNODE *nodeb, @@ -199,8 +221,6 @@ FTNODE* dependent_nodes ); - - /************************************************************************ * HOT optimize, should perhaps be factored out to its own header file * ************************************************************************ @@ -231,7 +251,5 @@ */ int toku_ft_hot_optimize(FT_HANDLE ft_h, DBT* left, DBT* right, - int (*progress_callback)(void *extra, float progress), - void *progress_extra, uint64_t* loops_run); - -#endif // End of header guardian. + int (*progress_callback)(void *extra, float progress), + void *progress_extra, uint64_t* loops_run); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-flusher-internal.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-flusher-internal.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-flusher-internal.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-flusher-internal.h 2014-10-08 13:19:51.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_FLUSHER_INTERNAL_H -#define FT_FLUSHER_INTERNAL_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,11 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include - #define flt_flush_before_applying_inbox 1 #define flt_flush_before_child_pin 2 #define ft_flush_aflter_child_pin 3 @@ -115,7 +113,7 @@ * Cleaner thread merging leaf nodes: follow down to a key * Hot optimize table: follow down to the right of a key */ -typedef int (*FA_PICK_CHILD)(FT h, FTNODE parent, void* extra); +typedef int (*FA_PICK_CHILD)(FT ft, FTNODE parent, void* extra); /** * Decide whether to call `toku_ft_flush_some_child` on the child if it is @@ -139,7 +137,7 @@ * Hot optimize table: just do the merge */ typedef void (*FA_MAYBE_MERGE_CHILD)(struct flusher_advice *fa, - FT h, + FT ft, FTNODE parent, int childnum, FTNODE child, @@ -172,7 +170,7 @@ * by `ft_split_child`. If -1 is returned, `ft_split_child` defaults to * the old behavior. */ -typedef int (*FA_PICK_CHILD_AFTER_SPLIT)(FT h, +typedef int (*FA_PICK_CHILD_AFTER_SPLIT)(FT ft, FTNODE node, int childnuma, int childnumb, @@ -223,18 +221,16 @@ void default_merge_child(struct flusher_advice *fa, - FT h, + FT ft, FTNODE parent, int childnum, FTNODE child, void* extra); int -default_pick_child_after_split(FT h, +default_pick_child_after_split(FT ft, FTNODE parent, int childnuma, int childnumb, void *extra); - -#endif // End of header guardian. diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft.h 2014-10-08 13:19:51.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_H -#define FT_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,17 +86,20 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "fttypes.h" -#include "ybt.h" #include -#include "cachetable.h" -#include "log.h" -#include "ft-search.h" -#include "ft-ops.h" -#include "compress.h" + +#include "ft/cachetable/cachetable.h" +#include "ft/ft-ops.h" +#include "ft/logger/log.h" +#include "util/dbt.h" + +typedef struct ft *FT; +typedef struct ft_options *FT_OPTIONS; // unlink a ft from the filesystem with or without a txn. // if with a txn, then the unlink happens on commit. @@ -110,8 +111,11 @@ void toku_ft_grab_reflock(FT ft); void toku_ft_release_reflock(FT ft); +void toku_ft_lock(struct ft *ft); +void toku_ft_unlock(struct ft *ft); + void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn); -void toku_ft_free (FT h); +void toku_ft_free (FT ft); int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_h, CACHEFILE cf, LSN max_acceptable_lsn, FT *header); void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live); @@ -123,7 +127,7 @@ // will have to read in the ft in a new cachefile and new FT object. void toku_ft_evict_from_memory(FT ft, bool oplsn_valid, LSN oplsn); -FT_HANDLE toku_ft_get_only_existing_ft_handle(FT h); +FT_HANDLE toku_ft_get_only_existing_ft_handle(FT ft); void toku_ft_note_hot_begin(FT_HANDLE ft_h); void toku_ft_note_hot_complete(FT_HANDLE ft_h, bool success, MSN msn_at_start_of_hot); @@ -142,29 +146,29 @@ int toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) __attribute__ ((warn_unused_result)); int toku_dictionary_redirect (const char *dst_fname_in_env, FT_HANDLE old_ft, TOKUTXN txn); -void toku_reset_root_xid_that_created(FT h, TXNID new_root_xid_that_created); +void toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created); // Reset the root_xid_that_created field to the given value. // This redefines which xid created the dictionary. -void toku_ft_add_txn_ref(FT h); -void toku_ft_remove_txn_ref(FT h); +void toku_ft_add_txn_ref(FT ft); +void toku_ft_remove_txn_ref(FT ft); -void toku_calculate_root_offset_pointer ( FT h, CACHEKEY* root_key, uint32_t *roothash); -void toku_ft_set_new_root_blocknum(FT h, CACHEKEY new_root_key); -LSN toku_ft_checkpoint_lsn(FT h) __attribute__ ((warn_unused_result)); -void toku_ft_stat64 (FT h, struct ftstat64_s *s); -void toku_ft_get_fractal_tree_info64 (FT h, struct ftinfo64 *s); +void toku_calculate_root_offset_pointer (FT ft, CACHEKEY* root_key, uint32_t *roothash); +void toku_ft_set_new_root_blocknum(FT ft, CACHEKEY new_root_key); +LSN toku_ft_checkpoint_lsn(FT ft) __attribute__ ((warn_unused_result)); +void toku_ft_stat64 (FT ft, struct ftstat64_s *s); +void toku_ft_get_fractal_tree_info64 (FT ft, struct ftinfo64 *s); int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *iter_extra); // unconditionally set the descriptor for an open FT. can't do this when // any operation has already occurred on the ft. // see toku_ft_change_descriptor(), which is the transactional version // used by the ydb layer. it better describes the client contract. -void toku_ft_update_descriptor(FT ft, DESCRIPTOR d); +void toku_ft_update_descriptor(FT ft, DESCRIPTOR desc); // use this version if the FT is not fully user-opened with a valid cachefile. // this is a clean hack to get deserialization code to update a descriptor // while the FT and cf are in the process of opening, for upgrade purposes -void toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR d, int fd); +void toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd); void toku_ft_update_cmp_descriptor(FT ft); // get the descriptor for a ft. safe to read as long as clients honor the @@ -174,9 +178,17 @@ DESCRIPTOR toku_ft_get_descriptor(FT_HANDLE ft_handle); DESCRIPTOR toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle); +typedef struct { + // delta versions in basements could be negative + int64_t numrows; + int64_t numbytes; +} STAT64INFO_S, *STAT64INFO; +static const STAT64INFO_S ZEROSTATS = { .numrows = 0, .numbytes = 0}; + void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta); void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta); +typedef void (*remove_ft_ref_callback)(FT ft, void *extra); void toku_ft_remove_reference(FT ft, bool oplsn_valid, LSN oplsn, remove_ft_ref_callback remove_ref, void *extra); @@ -189,7 +201,6 @@ void toku_ft_get_compression_method(FT ft, enum toku_compression_method *methodp); void toku_ft_set_fanout(FT ft, unsigned int fanout); void toku_ft_get_fanout(FT ft, unsigned int *fanout); -void toku_node_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p); // mark the ft as a blackhole. any message injections will be a no op. void toku_ft_set_blackhole(FT_HANDLE ft_handle); @@ -198,15 +209,17 @@ // The difference between the two is MVCC garbage. void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space); +// TODO: Should be in portability int get_num_cores(void); + +// TODO: Use the cachetable's worker pool instead of something managed by the FT... struct toku_thread_pool *get_ft_pool(void); -void dump_bad_block(unsigned char *vp, uint64_t size); +// TODO: Should be in portability int toku_single_process_lock(const char *lock_dir, const char *which, int *lockfd); - int toku_single_process_unlock(int *lockfd); -void tokudb_update_product_name_strings(void); +void tokuft_update_product_name_strings(void); #define TOKU_MAX_PRODUCT_NAME_LENGTH (256) extern char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH]; @@ -219,5 +232,4 @@ }; extern struct toku_product_name_strings_struct toku_product_name_strings; -extern int tokudb_num_envs; -#endif +extern int tokuft_num_envs; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-hot-flusher.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-hot-flusher.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-hot-flusher.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-hot-flusher.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,14 +89,17 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include -#include -#include -#include -#include -#include -#include +#include + +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-flusher.h" +#include "ft/ft-flusher-internal.h" +#include "ft/ft-internal.h" +#include "ft/node.h" +#include "portability/toku_atomic.h" +#include "util/context.h" +#include "util/status.h" // Member Descirption: // 1. highest_pivot_key - this is the key that corresponds to the @@ -119,7 +122,7 @@ static FT_HOT_STATUS_S hot_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(hot_status, k, c, t, "hot: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(hot_status, k, c, t, "hot: " l, inc) #define STATUS_VALUE(x) hot_status.status[x].value.num @@ -168,7 +171,7 @@ } static int -hot_just_pick_child(FT h, +hot_just_pick_child(FT ft, FTNODE parent, struct hot_flusher_extra *flusher) { @@ -183,10 +186,7 @@ childnum = 0; } else { // Find the pivot boundary. - childnum = toku_ftnode_hot_next_child(parent, - &flusher->highest_pivot_key, - &h->cmp_descriptor, - h->compare_fun); + childnum = toku_ftnode_hot_next_child(parent, &flusher->highest_pivot_key, ft->cmp); } return childnum; @@ -201,19 +201,19 @@ // child node. if (childnum < (parent->n_children - 1)) { toku_destroy_dbt(&flusher->max_current_key); - toku_clone_dbt(&flusher->max_current_key, parent->childkeys[childnum]); + toku_clone_dbt(&flusher->max_current_key, parent->pivotkeys.get_pivot(childnum)); } } // Picks which child toku_ft_flush_some_child will use for flushing and // recursion. static int -hot_pick_child(FT h, +hot_pick_child(FT ft, FTNODE parent, void *extra) { struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra; - int childnum = hot_just_pick_child(h, parent, flusher); + int childnum = hot_just_pick_child(ft, parent, flusher); // Now we determine the percentage of the tree flushed so far. @@ -243,14 +243,14 @@ // one to flush into. This gives it a chance to do that, and update the // keys it maintains. static int -hot_pick_child_after_split(FT h, +hot_pick_child_after_split(FT ft, FTNODE parent, int childnuma, int childnumb, void *extra) { struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra; - int childnum = hot_just_pick_child(h, parent, flusher); + int childnum = hot_just_pick_child(ft, parent, flusher); assert(childnum == childnuma || childnum == childnumb); hot_update_flusher_keys(parent, childnum, flusher); if (parent->height == 1) { @@ -330,8 +330,8 @@ // Get root node (the first parent of each successive HOT // call.) toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft_handle->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); toku_pin_ftnode(ft_handle->ft, (BLOCKNUM) root_key, fullhash, @@ -339,7 +339,7 @@ PL_WRITE_EXPENSIVE, &root, true); - toku_assert_entire_node_in_memory(root); + toku_ftnode_assert_fully_in_memory(root); } // Prepare HOT diagnostics. @@ -385,8 +385,7 @@ else if (right) { // if we have flushed past the bounds set for us, // set rightmost_leaf_seen so we exit - FAKE_DB(db, &ft_handle->ft->cmp_descriptor); - int cmp = ft_handle->ft->compare_fun(&db, &flusher.max_current_key, right); + int cmp = ft_handle->ft->cmp(&flusher.max_current_key, right); if (cmp > 0) { flusher.rightmost_leaf_seen = 1; } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-internal.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-internal.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-internal.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-internal.h 2014-10-08 13:19:51.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_INTERNAL_H -#define FT_INTERNAL_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +87,22 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include +#include "portability/toku_config.h" +#include "portability/toku_list.h" +#include "portability/toku_race_tools.h" + +#include "ft/cachetable/cachetable.h" +#include "ft/comparator.h" +#include "ft/ft.h" +#include "ft/ft-ops.h" +#include "ft/node.h" +#include "ft/serialize/block_table.h" +#include "ft/txn/rollback.h" // Symbol TOKUDB_REVISION is not defined by fractal-tree makefiles, so // BUILD_ID of 1000 indicates development build of main, not a release build. @@ -103,22 +112,8 @@ #error #endif -#include "ft_layout_version.h" -#include "block_allocator.h" -#include "cachetable.h" -#include "fifo.h" -#include "ft-ops.h" -#include "toku_list.h" -#include -#include "leafentry.h" -#include "block_table.h" -#include "compress.h" -#include -#include -#include "bndata.h" +struct ft_search; -enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */ -enum { FT_MSG_OVERHEAD = (2 + sizeof(MSN)) }; // the type plus freshness plus MSN enum { FT_DEFAULT_FANOUT = 16 }; enum { FT_DEFAULT_NODE_SIZE = 4 * 1024 * 1024 }; enum { FT_DEFAULT_BASEMENT_NODE_SIZE = 128 * 1024 }; @@ -127,341 +122,14 @@ // happen into the rightmost leaf node due to promotion. enum { FT_SEQINSERT_SCORE_THRESHOLD = 100 }; -// -// Field in ftnode_fetch_extra that tells the -// partial fetch callback what piece of the node -// is needed by the ydb -// -enum ftnode_fetch_type { - ftnode_fetch_none=1, // no partitions needed. - ftnode_fetch_subset, // some subset of partitions needed - ftnode_fetch_prefetch, // this is part of a prefetch call - ftnode_fetch_all, // every partition is needed - ftnode_fetch_keymatch, // one child is needed if it holds both keys -}; - -static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) UU(); -static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) { - switch (type) { - case ftnode_fetch_none: - case ftnode_fetch_subset: - case ftnode_fetch_prefetch: - case ftnode_fetch_all: - case ftnode_fetch_keymatch: - return true; - default: - return false; - } -} - -// -// An extra parameter passed to cachetable functions -// That is used in all types of fetch callbacks. -// The contents help the partial fetch and fetch -// callbacks retrieve the pieces of a node necessary -// for the ensuing operation (flush, query, ...) -// -struct ftnode_fetch_extra { - enum ftnode_fetch_type type; - // needed for reading a node off disk - FT h; - // used in the case where type == ftnode_fetch_subset - // parameters needed to find out which child needs to be decompressed (so it can be read) - ft_search_t* search; - DBT range_lock_left_key, range_lock_right_key; - bool left_is_neg_infty, right_is_pos_infty; - // states if we should try to aggressively fetch basement nodes - // that are not specifically needed for current query, - // but may be needed for other cursor operations user is doing - // For example, if we have not disabled prefetching, - // and the user is doing a dictionary wide scan, then - // even though a query may only want one basement node, - // we fetch all basement nodes in a leaf node. - bool disable_prefetching; - // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback - // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it - int child_to_read; - // when we read internal nodes, we want to read all the data off disk in one I/O - // then we'll treat it as normal and only decompress the needed partitions etc. - - bool read_all_partitions; - // Accounting: How many bytes were read, and how much time did we spend doing I/O? - uint64_t bytes_read; - tokutime_t io_time; - tokutime_t decompress_time; - tokutime_t deserialize_time; -}; - -struct toku_fifo_entry_key_msn_heaviside_extra { - DESCRIPTOR desc; - ft_compare_func cmp; - FIFO fifo; - const DBT *key; - MSN msn; -}; - -// comparison function for inserting messages into a -// ftnode_nonleaf_childinfo's message_tree -int -toku_fifo_entry_key_msn_heaviside(const int32_t &v, const struct toku_fifo_entry_key_msn_heaviside_extra &extra); - -struct toku_fifo_entry_key_msn_cmp_extra { - DESCRIPTOR desc; - ft_compare_func cmp; - FIFO fifo; -}; - -// same thing for qsort_r -int -toku_fifo_entry_key_msn_cmp(const struct toku_fifo_entry_key_msn_cmp_extra &extrap, const int &a, const int &b); - -typedef toku::omt off_omt_t; -typedef toku::omt marked_off_omt_t; - -// data of an available partition of a nonleaf ftnode -struct ftnode_nonleaf_childinfo { - FIFO buffer; - off_omt_t broadcast_list; - marked_off_omt_t fresh_message_tree; - off_omt_t stale_message_tree; - uint64_t flow[2]; // current and last checkpoint -}; - -unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc); -int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc); -long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc); -long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc); -void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp); -void toku_bnc_empty(NONLEAF_CHILDINFO bnc); -void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known); -bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull)); -bool toku_ft_nonleaf_is_gorged(FTNODE node, uint32_t nodesize); - -enum reactivity get_nonleaf_reactivity(FTNODE node, unsigned int fanout); -enum reactivity get_node_reactivity(FT ft, FTNODE node); -uint32_t get_leaf_num_entries(FTNODE node); - -// data of an available partition of a leaf ftnode -struct ftnode_leaf_basement_node { - bn_data data_buffer; - unsigned int seqinsert; // number of sequential inserts to this leaf - MSN max_msn_applied; // max message sequence number applied - bool stale_ancestor_messages_applied; - STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk -}; - -enum pt_state { // declare this to be packed so that when used below it will only take 1 byte. - PT_INVALID = 0, - PT_ON_DISK = 1, - PT_COMPRESSED = 2, - PT_AVAIL = 3}; - -enum ftnode_child_tag { - BCT_INVALID = 0, - BCT_NULL, - BCT_SUBBLOCK, - BCT_LEAF, - BCT_NONLEAF -}; - -typedef struct ftnode_child_pointer { - union { - struct sub_block *subblock; - struct ftnode_nonleaf_childinfo *nonleaf; - struct ftnode_leaf_basement_node *leaf; - } u; - enum ftnode_child_tag tag; -} FTNODE_CHILD_POINTER; - - -struct ftnode_disk_data { - // - // stores the offset to the beginning of the partition on disk from the ftnode, and the length, needed to read a partition off of disk - // the value is only meaningful if the node is clean. If the node is dirty, then the value is meaningless - // The START is the distance from the end of the compressed node_info data, to the beginning of the compressed partition - // The SIZE is the size of the compressed partition. - // Rationale: We cannot store the size from the beginning of the node since we don't know how big the header will be. - // However, later when we are doing aligned writes, we won't be able to store the size from the end since we want things to align. - uint32_t start; - uint32_t size; -}; -#define BP_START(node_dd,i) ((node_dd)[i].start) -#define BP_SIZE(node_dd,i) ((node_dd)[i].size) - - -// a ftnode partition, associated with a child of a node -struct ftnode_partition { - // the following three variables are used for nonleaf nodes - // for leaf nodes, they are meaningless - BLOCKNUM blocknum; // blocknum of child - - // How many bytes worth of work was performed by messages in each buffer. - uint64_t workdone; - - // - // pointer to the partition. Depending on the state, they may be different things - // if state == PT_INVALID, then the node was just initialized and ptr == NULL - // if state == PT_ON_DISK, then ptr == NULL - // if state == PT_COMPRESSED, then ptr points to a struct sub_block* - // if state == PT_AVAIL, then ptr is: - // a struct ftnode_nonleaf_childinfo for internal nodes, - // a struct ftnode_leaf_basement_node for leaf nodes - // - struct ftnode_child_pointer ptr; - // - // at any time, the partitions may be in one of the following three states (stored in pt_state): - // PT_INVALID - means that the partition was just initialized - // PT_ON_DISK - means that the partition is not in memory and needs to be read from disk. To use, must read off disk and decompress - // PT_COMPRESSED - means that the partition is compressed in memory. To use, must decompress - // PT_AVAIL - means the partition is decompressed and in memory - // - enum pt_state state; // make this an enum to make debugging easier. - - // clock count used to for pe_callback to determine if a node should be evicted or not - // for now, saturating the count at 1 - uint8_t clock_count; -}; - -struct ftnode { - MSN max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk - unsigned int flags; - BLOCKNUM thisnodename; // Which block number is this node? - int layout_version; // What version of the data structure? - int layout_version_original; // different (<) from layout_version if upgraded from a previous version (useful for debugging) - int layout_version_read_from_disk; // transient, not serialized to disk, (useful for debugging) - uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk - int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */ - int dirty; - uint32_t fullhash; - int n_children; //for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced. - // for leaf nodes, represents number of basement nodes - unsigned int totalchildkeylens; - DBT *childkeys; /* Pivot keys. Child 0's keys are <= childkeys[0]. Child 1's keys are <= childkeys[1]. - Child 1's keys are > childkeys[0]. */ - - // What's the oldest referenced xid that this node knows about? The real oldest - // referenced xid might be younger, but this is our best estimate. We use it - // as a heuristic to transition provisional mvcc entries from provisional to - // committed (from implicity committed to really committed). - // - // A better heuristic would be the oldest live txnid, but we use this since it - // still works well most of the time, and its readily available on the inject - // code path. - TXNID oldest_referenced_xid_known; - - // array of size n_children, consisting of ftnode partitions - // each one is associated with a child - // for internal nodes, the ith partition corresponds to the ith message buffer - // for leaf nodes, the ith partition corresponds to the ith basement node - struct ftnode_partition *bp; - PAIR ct_pair; -}; - -// ftnode partition macros -// BP stands for ftnode_partition -#define BP_BLOCKNUM(node,i) ((node)->bp[i].blocknum) -#define BP_STATE(node,i) ((node)->bp[i].state) -#define BP_WORKDONE(node, i)((node)->bp[i].workdone) - -// -// macros for managing a node's clock -// Should be managed by ft-ops.c, NOT by serialize/deserialize -// +uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum); -// -// BP_TOUCH_CLOCK uses a compare and swap because multiple threads -// that have a read lock on an internal node may try to touch the clock -// simultaneously -// -#define BP_TOUCH_CLOCK(node, i) ((node)->bp[i].clock_count = 1) -#define BP_SWEEP_CLOCK(node, i) ((node)->bp[i].clock_count = 0) -#define BP_SHOULD_EVICT(node, i) ((node)->bp[i].clock_count == 0) -// not crazy about having these two here, one is for the case where we create new -// nodes, such as in splits and creating new roots, and the other is for when -// we are deserializing a node and not all bp's are touched -#define BP_INIT_TOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 1) -#define BP_INIT_UNTOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 0) - -// internal node macros -static inline void set_BNULL(FTNODE node, int i) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - node->bp[i].ptr.tag = BCT_NULL; -} -static inline bool is_BNULL (FTNODE node, int i) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - return node->bp[i].ptr.tag == BCT_NULL; -} -static inline NONLEAF_CHILDINFO BNC(FTNODE node, int i) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - FTNODE_CHILD_POINTER p = node->bp[i].ptr; - paranoid_invariant(p.tag==BCT_NONLEAF); - return p.u.nonleaf; -} -static inline void set_BNC(FTNODE node, int i, NONLEAF_CHILDINFO nl) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; - p->tag = BCT_NONLEAF; - p->u.nonleaf = nl; -} - -static inline BASEMENTNODE BLB(FTNODE node, int i) { - paranoid_invariant(i >= 0); - // The optimizer really doesn't like it when we compare - // i to n_children as signed integers. So we assert that - // n_children is in fact positive before doing a comparison - // on the values forcibly cast to unsigned ints. - paranoid_invariant(node->n_children > 0); - paranoid_invariant((unsigned) i < (unsigned) node->n_children); - FTNODE_CHILD_POINTER p = node->bp[i].ptr; - paranoid_invariant(p.tag==BCT_LEAF); - return p.u.leaf; -} -static inline void set_BLB(FTNODE node, int i, BASEMENTNODE bn) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; - p->tag = BCT_LEAF; - p->u.leaf = bn; -} - -static inline SUB_BLOCK BSB(FTNODE node, int i) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - FTNODE_CHILD_POINTER p = node->bp[i].ptr; - paranoid_invariant(p.tag==BCT_SUBBLOCK); - return p.u.subblock; -} -static inline void set_BSB(FTNODE node, int i, SUB_BLOCK sb) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; - p->tag = BCT_SUBBLOCK; - p->u.subblock = sb; -} - -// ftnode leaf basementnode macros, -#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied) -#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied) -#define BLB_DATA(node,i) (&(BLB(node,i)->data_buffer)) -#define BLB_NBYTESINDATA(node,i) (BLB_DATA(node,i)->get_disk_size()) -#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert) - -/* pivot flags (must fit in 8 bits) */ -enum { - FT_PIVOT_TRUNC = 4, - FT_PIVOT_FRONT_COMPRESS = 8, +enum ft_type { + FT_CURRENT = 1, + FT_CHECKPOINT_INPROGRESS }; -uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum); - // The ft_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata. - -enum ft_type {FT_CURRENT=1, FT_CHECKPOINT_INPROGRESS}; - struct ft_header { enum ft_type type; @@ -474,7 +142,7 @@ // LSN of creation of "checkpoint-begin" record in log. LSN checkpoint_lsn; - // see ft_layout_version.h. maybe don't need this if we assume + // see serialize/ft_layout_version.h. maybe don't need this if we assume // it's always the current version after deserializing const int layout_version; // different (<) from layout_version if upgraded from a previous @@ -529,6 +197,7 @@ STAT64INFO_S on_disk_stats; }; +typedef struct ft_header *FT_HEADER; // ft_header is always the current version. struct ft { @@ -540,20 +209,23 @@ CACHEFILE cf; // unique id for dictionary DICTIONARY_ID dict_id; - ft_compare_func compare_fun; - ft_update_func update_fun; // protected by locktree DESCRIPTOR_S descriptor; - // protected by locktree and user. User - // makes sure this is only changed - // when no activity on tree + + // protected by locktree and user. + // User makes sure this is only changed when no activity on tree DESCRIPTOR_S cmp_descriptor; + // contains a pointer to cmp_descriptor (above) - their lifetimes are bound + toku::comparator cmp; + + // the update function always utilizes the cmp_descriptor, not the regular one + ft_update_func update_fun; // These are not read-only: // protected by blocktable lock - BLOCK_TABLE blocktable; + block_table blocktable; // protected by atomic builtins STAT64INFO_S in_memory_stats; @@ -598,7 +270,7 @@ // descriptor. We don't bother setting any other fields because // the comparison function doesn't need it, and we would like to // reduce the CPU work done per comparison. -#define FAKE_DB(db, desc) struct __toku_db db; do { db.cmp_descriptor = desc; } while (0) +#define FAKE_DB(db, desc) struct __toku_db db; do { db.cmp_descriptor = const_cast(desc); } while (0) struct ft_options { unsigned int nodesize; @@ -606,6 +278,7 @@ enum toku_compression_method compression_method; unsigned int fanout; unsigned int flags; + uint8_t memcmp_magic; ft_compare_func compare_fun; ft_update_func update_fun; }; @@ -625,435 +298,169 @@ PAIR_ATTR make_ftnode_pair_attr(FTNODE node); PAIR_ATTR make_invalid_pair_attr(void); -/* serialization code */ -void -toku_create_compressed_partition_from_available( - FTNODE node, - int childnum, - enum toku_compression_method compression_method, - SUB_BLOCK sb - ); -void rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize); -int toku_serialize_ftnode_to_memory (FTNODE node, - FTNODE_DISK_DATA* ndd, - unsigned int basementnodesize, - enum toku_compression_method compression_method, - bool do_rebalancing, - bool in_parallel, - /*out*/ size_t *n_bytes_to_write, - /*out*/ size_t *n_uncompressed_bytes, - /*out*/ char **bytes_to_write); -int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT h, bool for_checkpoint); -int toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized, - FT h, bool for_checkpoint); -void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized); -int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT h); -int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe); -int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe); -int toku_deserialize_ftnode_from (int fd, BLOCKNUM off, uint32_t /*fullhash*/, FTNODE *ftnode, FTNODE_DISK_DATA* ndd, struct ftnode_fetch_extra* bfe); - -// For verifying old, non-upgraded nodes (versions 13 and 14). -int -decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum); -// - -//////////////// TODO: Move these function declarations -int -deserialize_ft_from_fd_into_rbuf(int fd, - toku_off_t offset_of_header, - struct rbuf *rb, - uint64_t *checkpoint_count, - LSN *checkpoint_lsn, - uint32_t * version_p); - -int -deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version); - -void read_block_from_fd_into_rbuf( - int fd, - BLOCKNUM blocknum, - FT h, - struct rbuf *rb - ); +// +// Field in ftnode_fetch_extra that tells the +// partial fetch callback what piece of the node +// is needed by the ydb +// +enum ftnode_fetch_type { + ftnode_fetch_none = 1, // no partitions needed. + ftnode_fetch_subset, // some subset of partitions needed + ftnode_fetch_prefetch, // this is part of a prefetch call + ftnode_fetch_all, // every partition is needed + ftnode_fetch_keymatch, // one child is needed if it holds both keys +}; -int -read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb); +// Info passed to cachetable fetch callbacks to say which parts of a node +// should be fetched (perhaps a subset, perhaps the whole thing, depending +// on operation) +class ftnode_fetch_extra { +public: + // Used when the whole node must be in memory, such as for flushes. + void create_for_full_read(FT ft); + + // A subset of children are necessary. Used by point queries. + void create_for_subset_read(FT ft, ft_search *search, const DBT *left, const DBT *right, + bool left_is_neg_infty, bool right_is_pos_infty, + bool disable_prefetching, bool read_all_partitions); + + // No partitions are necessary - only pivots and/or subtree estimates. + // Currently used for stat64. + void create_for_min_read(FT ft); + + // Used to prefetch partitions that fall within the bounds given by the cursor. + void create_for_prefetch(FT ft, struct ft_cursor *cursor); + + // Only a portion of the node (within a keyrange) is required. + // Used by keysrange when the left and right key are in the same basement node. + void create_for_keymatch(FT ft, const DBT *left, const DBT *right, + bool disable_prefetching, bool read_all_partitions); + + void destroy(void); -int -verify_ftnode_sub_block (struct sub_block *sb); + // return: true if a specific childnum is required to be in memory + bool wants_child_available(int childnum) const; -void -just_decompress_sub_block(struct sub_block *sb); + // return: the childnum of the leftmost child that is required to be in memory + int leftmost_child_wanted(FTNODE node) const; -/* Beginning of ft-node-deserialize.c helper functions. */ -void initialize_ftnode(FTNODE node, BLOCKNUM blocknum); -int read_and_check_magic(struct rbuf *rb); -int read_and_check_version(FTNODE node, struct rbuf *rb); -void read_node_info(FTNODE node, struct rbuf *rb, int version); -void allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd); -int check_node_info_checksum(struct rbuf *rb); -void read_legacy_node_info(FTNODE node, struct rbuf *rb, int version); -int check_legacy_end_checksum(struct rbuf *rb); -/* End of ft-node-deserialization.c helper functions. */ + // return: the childnum of the rightmost child that is required to be in memory + int rightmost_child_wanted(FTNODE node) const; -unsigned int toku_serialize_ftnode_size(FTNODE node); /* How much space will it take? */ + // needed for reading a node off disk + FT ft; -void toku_verify_or_set_counts(FTNODE); + enum ftnode_fetch_type type; -size_t toku_serialize_ft_size (FT_HEADER h); -void toku_serialize_ft_to (int fd, FT_HEADER h, BLOCK_TABLE blocktable, CACHEFILE cf); -void toku_serialize_ft_to_wbuf ( - struct wbuf *wbuf, - FT_HEADER h, - DISKOFF translation_location_on_disk, - DISKOFF translation_size_on_disk - ); -int toku_deserialize_ft_from (int fd, LSN max_acceptable_lsn, FT *ft); -void toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset); -void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc); -BASEMENTNODE toku_create_empty_bn(void); -BASEMENTNODE toku_create_empty_bn_no_buffer(void); // create a basement node with a null buffer. -NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo); -BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn); -NONLEAF_CHILDINFO toku_create_empty_nl(void); -// FIXME needs toku prefix -void destroy_basement_node (BASEMENTNODE bn); -// FIXME needs toku prefix -void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl); -void toku_destroy_ftnode_internals(FTNODE node); -void toku_ftnode_free (FTNODE *node); -bool is_entire_node_in_memory(FTNODE node); -void toku_assert_entire_node_in_memory(FTNODE node); + // used in the case where type == ftnode_fetch_subset + // parameters needed to find out which child needs to be decompressed (so it can be read) + ft_search *search; + DBT range_lock_left_key, range_lock_right_key; + bool left_is_neg_infty, right_is_pos_infty; -// append a child node to a parent node -void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey); + // states if we should try to aggressively fetch basement nodes + // that are not specifically needed for current query, + // but may be needed for other cursor operations user is doing + // For example, if we have not disabled prefetching, + // and the user is doing a dictionary wide scan, then + // even though a query may only want one basement node, + // we fetch all basement nodes in a leaf node. + bool disable_prefetching; -// append a message to a nonleaf node child buffer -void toku_ft_append_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val); + // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback + // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it + int child_to_read; -STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode); + // when we read internal nodes, we want to read all the data off disk in one I/O + // then we'll treat it as normal and only decompress the needed partitions etc. + bool read_all_partitions; -//#define SLOW -#ifdef SLOW -#define VERIFY_NODE(t,n) (toku_verify_or_set_counts(n), toku_verify_estimates(t,n)) -#else -#define VERIFY_NODE(t,n) ((void)0) -#endif + // Accounting: How many bytes were read, and how much time did we spend doing I/O? + uint64_t bytes_read; + tokutime_t io_time; + tokutime_t decompress_time; + tokutime_t deserialize_time; -void toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe); -void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint); -void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time); -void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time); +private: + void _create_internal(FT ft_); +}; +// Only exported for tests. +// Cachetable callbacks for ftnodes. void toku_ftnode_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs); void toku_ftnode_checkpoint_complete_callback(void *value_data); -void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone); -int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs); +void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM blocknum, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone); +int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs); void toku_ftnode_pe_est_callback(void* ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs); int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *extraargs, void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra); bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs); int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep); int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs); -void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h); -BASEMENTNODE toku_detach_bn(FTNODE node, int childnum); - -// Given pinned node and pinned child, split child into two -// and update node with information about its new child. -void toku_ft_split_child( - FT h, - FTNODE node, - int childnum, - FTNODE child, - enum split_mode split_mode - ); -// Given pinned node, merge childnum with a neighbor and update node with -// information about the change -void toku_ft_merge_child( - FT ft, - FTNODE node, - int childnum - ); -static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT h) { - CACHETABLE_WRITE_CALLBACK wc; - wc.flush_callback = toku_ftnode_flush_callback; - wc.pe_est_callback = toku_ftnode_pe_est_callback; - wc.pe_callback = toku_ftnode_pe_callback; - wc.cleaner_callback = toku_ftnode_cleaner_callback; - wc.clone_callback = toku_ftnode_clone_callback; - wc.checkpoint_complete_callback = toku_ftnode_checkpoint_complete_callback; - wc.write_extraargs = h; - return wc; -} - -static const FTNODE null_ftnode=0; - -/* an ft cursor is represented as a kv pair in a tree */ -struct ft_cursor { - struct toku_list cursors_link; - FT_HANDLE ft_handle; - DBT key, val; // The key-value pair that the cursor currently points to - DBT range_lock_left_key, range_lock_right_key; - bool prefetching; - bool left_is_neg_infty, right_is_pos_infty; - bool is_snapshot_read; // true if query is read_committed, false otherwise - bool is_leaf_mode; - bool disable_prefetching; - bool is_temporary; - int out_of_range_error; - int direction; - TOKUTXN ttxn; - FT_CHECK_INTERRUPT_CALLBACK interrupt_cb; - void *interrupt_cb_extra; -}; -// -// Helper function to fill a ftnode_fetch_extra with data -// that will tell the fetch callback that the entire node is -// necessary. Used in cases where the entire node -// is required, such as for flushes. -// -static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h) { - bfe->type = ftnode_fetch_all; - bfe->h = h; - bfe->search = NULL; - toku_init_dbt(&bfe->range_lock_left_key); - toku_init_dbt(&bfe->range_lock_right_key); - bfe->left_is_neg_infty = false; - bfe->right_is_pos_infty = false; - bfe->child_to_read = -1; - bfe->disable_prefetching = false; - bfe->read_all_partitions = false; - bfe->bytes_read = 0; - bfe->io_time = 0; - bfe->deserialize_time = 0; - bfe->decompress_time = 0; -} +CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft); -// -// Helper function to fill a ftnode_fetch_extra with data -// that will tell the fetch callback that an explicit range of children is -// necessary. Used in cases where the portion of the node that is required -// is known in advance, e.g. for keysrange when the left and right key -// are in the same basement node. -// -static inline void fill_bfe_for_keymatch( - struct ftnode_fetch_extra *bfe, - FT h, - const DBT *left, - const DBT *right, - bool disable_prefetching, - bool read_all_partitions - ) -{ - paranoid_invariant(h->h->type == FT_CURRENT); - bfe->type = ftnode_fetch_keymatch; - bfe->h = h; - bfe->search = nullptr; - toku_init_dbt(&bfe->range_lock_left_key); - toku_init_dbt(&bfe->range_lock_right_key); - if (left) { - toku_copyref_dbt(&bfe->range_lock_left_key, *left); - } - - if (right) { - toku_copyref_dbt(&bfe->range_lock_right_key, *right); - } - bfe->left_is_neg_infty = left == nullptr; - bfe->right_is_pos_infty = right == nullptr; - bfe->child_to_read = -1; - bfe->disable_prefetching = disable_prefetching; - bfe->read_all_partitions = read_all_partitions; - bfe->bytes_read = 0; - bfe->io_time = 0; - bfe->deserialize_time = 0; - bfe->decompress_time = 0; -} - -// -// Helper function to fill a ftnode_fetch_extra with data -// that will tell the fetch callback that some subset of the node -// necessary. Used in cases where some of the node is required -// such as for a point query. -// -static inline void fill_bfe_for_subset_read( - struct ftnode_fetch_extra *bfe, - FT h, - ft_search_t* search, - const DBT *left, - const DBT *right, - bool left_is_neg_infty, - bool right_is_pos_infty, - bool disable_prefetching, - bool read_all_partitions - ) -{ - paranoid_invariant(h->h->type == FT_CURRENT); - bfe->type = ftnode_fetch_subset; - bfe->h = h; - bfe->search = search; - toku_init_dbt(&bfe->range_lock_left_key); - toku_init_dbt(&bfe->range_lock_right_key); - if (left) { - toku_copyref_dbt(&bfe->range_lock_left_key, *left); - } - if (right) { - toku_copyref_dbt(&bfe->range_lock_right_key, *right); - } - bfe->left_is_neg_infty = left_is_neg_infty; - bfe->right_is_pos_infty = right_is_pos_infty; - bfe->child_to_read = -1; - bfe->disable_prefetching = disable_prefetching; - bfe->read_all_partitions = read_all_partitions; - bfe->bytes_read = 0; - bfe->io_time = 0; - bfe->deserialize_time = 0; - bfe->decompress_time = 0; -} - -// -// Helper function to fill a ftnode_fetch_extra with data -// that will tell the fetch callback that no partitions are -// necessary, only the pivots and/or subtree estimates. -// Currently used for stat64. -// -static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) { - paranoid_invariant(h->h->type == FT_CURRENT); - bfe->type = ftnode_fetch_none; - bfe->h = h; - bfe->search = NULL; - toku_init_dbt(&bfe->range_lock_left_key); - toku_init_dbt(&bfe->range_lock_right_key); - bfe->left_is_neg_infty = false; - bfe->right_is_pos_infty = false; - bfe->child_to_read = -1; - bfe->disable_prefetching = false; - bfe->read_all_partitions = false; - bfe->bytes_read = 0; - bfe->io_time = 0; - bfe->deserialize_time = 0; - bfe->decompress_time = 0; -} - -static inline void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) { - paranoid_invariant(bfe->type == ftnode_fetch_prefetch); - toku_destroy_dbt(&bfe->range_lock_left_key); - toku_destroy_dbt(&bfe->range_lock_right_key); -} - -// this is in a strange place because it needs the cursor struct to be defined -static inline void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, - FT h, - FT_CURSOR c) { - paranoid_invariant(h->h->type == FT_CURRENT); - bfe->type = ftnode_fetch_prefetch; - bfe->h = h; - bfe->search = NULL; - toku_init_dbt(&bfe->range_lock_left_key); - toku_init_dbt(&bfe->range_lock_right_key); - const DBT *left = &c->range_lock_left_key; - if (left->data) { - toku_clone_dbt(&bfe->range_lock_left_key, *left); - } - const DBT *right = &c->range_lock_right_key; - if (right->data) { - toku_clone_dbt(&bfe->range_lock_right_key, *right); - } - bfe->left_is_neg_infty = c->left_is_neg_infty; - bfe->right_is_pos_infty = c->right_is_pos_infty; - bfe->child_to_read = -1; - bfe->disable_prefetching = c->disable_prefetching; - bfe->read_all_partitions = false; - bfe->bytes_read = 0; - bfe->io_time = 0; - bfe->deserialize_time = 0; - bfe->decompress_time = 0; -} - -struct ancestors { - FTNODE node; // This is the root node if next is NULL. - int childnum; // which buffer holds messages destined to the node whose ancestors this list represents. - ANCESTORS next; // Parent of this node (so next->node.(next->childnum) refers to this node). -}; -struct pivot_bounds { - const DBT * const lower_bound_exclusive; - const DBT * const upper_bound_inclusive; // NULL to indicate negative or positive infinity (which are in practice exclusive since there are now transfinite keys in messages). -}; +// This is only exported for tests. +// append a child node to a parent node +void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey); -__attribute__((nonnull)) -void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node); -void toku_apply_ancestors_messages_to_node (FT_HANDLE t, FTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool* msgs_applied, int child_to_read); -__attribute__((nonnull)) -bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds, MSN *const max_msn_in_path, int child_to_read); -__attribute__((nonnull)) -void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read); +// This is only exported for tests. +// append a message to a nonleaf node child buffer +void toku_ft_append_to_child_buffer(const toku::comparator &cmp, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val); -__attribute__((const,nonnull)) -size_t toku_ft_msg_memsize_in_fifo(FT_MSG msg); +STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode); -int -toku_ft_search_which_child( - DESCRIPTOR desc, - ft_compare_func cmp, - FTNODE node, - ft_search_t *search - ); +//#define SLOW +#ifdef SLOW +#define VERIFY_NODE(t,n) (toku_verify_or_set_counts(n), toku_verify_estimates(t,n)) +#else +#define VERIFY_NODE(t,n) ((void)0) +#endif -bool -toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum); +void toku_verify_or_set_counts(FTNODE); -int -toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node); -int -toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node); +// TODO: consider moving this to ft/pivotkeys.cc +class pivot_bounds { +public: + pivot_bounds(const DBT &lbe_dbt, const DBT &ubi_dbt); + + pivot_bounds next_bounds(FTNODE node, int childnum) const; + + const DBT *lbe() const; + const DBT *ubi() const; + + static pivot_bounds infinite_bounds(); + +private: + DBT _prepivotkey(FTNODE node, int childnum, const DBT &lbe_dbt) const; + DBT _postpivotkey(FTNODE node, int childnum, const DBT &ubi_dbt) const; + + // if toku_dbt_is_empty() is true for either bound, then it represents + // negative or positive infinity (which are exclusive in practice) + const DBT _lower_bound_exclusive; + const DBT _upper_bound_inclusive; +}; // allocate a block number // allocate and initialize a ftnode // put the ftnode into the cache table -void toku_create_new_ftnode (FT_HANDLE t, FTNODE *result, int height, int n_children); - -// Effect: Fill in N as an empty ftnode. -void toku_initialize_empty_ftnode (FTNODE n, BLOCKNUM nodename, int height, int num_children, - int layout_version, unsigned int flags); - -int toku_ftnode_which_child(FTNODE node, const DBT *k, - DESCRIPTOR desc, ft_compare_func cmp) - __attribute__((__warn_unused_result__)); - -/** - * Finds the next child for HOT to flush to, given that everything up to - * and including k has been flattened. - * - * If k falls between pivots in node, then we return the childnum where k - * lies. - * - * If k is equal to some pivot, then we return the next (to the right) - * childnum. - */ -int toku_ftnode_hot_next_child(FTNODE node, - const DBT *k, - DESCRIPTOR desc, - ft_compare_func cmp); +void toku_create_new_ftnode(FT_HANDLE ft_handle, FTNODE *result, int height, int n_children); /* Stuff for testing */ // toku_testsetup_initialize() must be called before any other test_setup_xxx() functions are called. void toku_testsetup_initialize(void); int toku_testsetup_leaf(FT_HANDLE ft_h, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens); -int toku_testsetup_nonleaf (FT_HANDLE ft_h, int height, BLOCKNUM *diskoff, int n_children, BLOCKNUM *children, char **keys, int *keylens); +int toku_testsetup_nonleaf (FT_HANDLE ft_h, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, char **keys, int *keylens); int toku_testsetup_root(FT_HANDLE ft_h, BLOCKNUM); int toku_testsetup_get_sersize(FT_HANDLE ft_h, BLOCKNUM); // Return the size on disk. int toku_testsetup_insert_to_leaf (FT_HANDLE ft_h, BLOCKNUM, const char *key, int keylen, const char *val, int vallen); int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_h, BLOCKNUM, enum ft_msg_type, const char *key, int keylen, const char *val, int vallen); void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t); -void toku_ft_root_put_msg(FT h, FT_MSG msg, txn_gc_info *gc_info); +void toku_ft_root_put_msg(FT ft, const ft_msg &msg, txn_gc_info *gc_info); -void -toku_get_node_for_verify( - BLOCKNUM blocknum, - FT_HANDLE ft_h, - FTNODE* nodep - ); +// TODO: Rename +void toku_get_node_for_verify(BLOCKNUM blocknum, FT_HANDLE ft_h, FTNODE* nodep); int toku_verify_ftnode (FT_HANDLE ft_h, @@ -1217,61 +624,37 @@ TOKU_ENGINE_STATUS_ROW_S status[FT_STATUS_NUM_ROWS]; } FT_STATUS_S, *FT_STATUS; -void toku_ft_get_status(FT_STATUS); +void toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe); +void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint); +void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time); +void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time); +void toku_ft_status_note_msn_discard(void); +void toku_ft_status_note_update(bool broadcast); +void toku_ft_status_note_msg_bytes_out(size_t buffsize); +void toku_ft_status_note_ftnode(int height, bool created); // created = false means destroyed -void -toku_ft_bn_apply_msg_once( - BASEMENTNODE bn, - const FT_MSG msg, - uint32_t idx, - LEAFENTRY le, - txn_gc_info *gc_info, - uint64_t *workdonep, - STAT64INFO stats_to_update - ); - -void -toku_ft_bn_apply_msg( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - BASEMENTNODE bn, - FT_MSG msg, - txn_gc_info *gc_info, - uint64_t *workdone, - STAT64INFO stats_to_update - ); - -void -toku_ft_leaf_apply_msg( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - FTNODE node, - int target_childnum, - FT_MSG msg, - txn_gc_info *gc_info, - uint64_t *workdone, - STAT64INFO stats_to_update - ); - -void -toku_ft_node_put_msg( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - FTNODE node, - int target_childnum, - FT_MSG msg, - bool is_fresh, - txn_gc_info *gc_info, - size_t flow_deltas[], - STAT64INFO stats_to_update - ); +void toku_ft_get_status(FT_STATUS); void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra); -int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h) __attribute__((nonnull)); -int toku_upgrade_msn_from_root_to_header(int fd, FT h) __attribute__((nonnull)); +// For upgrade +int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) __attribute__((nonnull)); +int toku_upgrade_msn_from_root_to_header(int fd, FT ft) __attribute__((nonnull)); + +// A callback function is invoked with the key, and the data. +// The pointers (to the bytevecs) must not be modified. The data must be copied out before the callback function returns. +// Note: In the thread-safe version, the ftnode remains locked while the callback function runs. So return soon, and don't call the ft code from the callback function. +// If the callback function returns a nonzero value (an error code), then that error code is returned from the get function itself. +// The cursor object will have been updated (so that if result==0 the current value is the value being passed) +// (If r!=0 then the cursor won't have been updated.) +// If r!=0, it's up to the callback function to return that value of r. +// A 'key' pointer of NULL means that element is not found (effectively infinity or +// -infinity depending on direction) +// When lock_only is false, the callback does optional lock tree locking and then processes the key and val. +// When lock_only is true, the callback only does optional lock tree locking. +typedef int (*FT_GET_CALLBACK_FUNCTION)(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only); -#endif +typedef bool (*FT_CHECK_INTERRUPT_CALLBACK)(void *extra); + +struct ft_cursor; +int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, struct ft_cursor *ftcursor, bool can_bulk_fetch); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft_layout_version.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft_layout_version.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft_layout_version.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft_layout_version.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,134 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_LAYOUT_VERSION_H -#define FT_LAYOUT_VERSION_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -//Must be defined before other recursive headers could include logger.h -enum ft_layout_version_e { - FT_LAYOUT_VERSION_5 = 5, - FT_LAYOUT_VERSION_6 = 6, // Diff from 5 to 6: Add leafentry_estimate - FT_LAYOUT_VERSION_7 = 7, // Diff from 6 to 7: Add exact-bit to leafentry_estimate #818, add magic to header #22, add per-subdatase flags #333 - FT_LAYOUT_VERSION_8 = 8, // Diff from 7 to 8: Use murmur instead of crc32. We are going to make a simplification and stop supporting version 7 and before. Current As of Beta 1.0.6 - FT_LAYOUT_VERSION_9 = 9, // Diff from 8 to 9: Variable-sized blocks and compression. - FT_LAYOUT_VERSION_10 = 10, // Diff from 9 to 10: Variable number of compressed sub-blocks per block, disk byte order == intel byte order, Subtree estimates instead of just leafentry estimates, translation table, dictionary descriptors, checksum in header, subdb support removed from ft layer - FT_LAYOUT_VERSION_11 = 11, // Diff from 10 to 11: Nested transaction leafentries (completely redesigned). FT_CMDs on disk now support XIDS (multiple txnids) instead of exactly one. - FT_LAYOUT_VERSION_12 = 12, // Diff from 11 to 12: Added FT_CMD 'FT_INSERT_NO_OVERWRITE', compressed block format, num old blocks - FT_LAYOUT_VERSION_13 = 13, // Diff from 12 to 13: Fixed loader pivot bug, added build_id to every node, timestamps to ft - FT_LAYOUT_VERSION_14 = 14, // Diff from 13 to 14: Added MVCC; deprecated TOKU_DB_VALCMP_BUILTIN(_13); Remove fingerprints; Support QUICKLZ; add end-to-end checksum on uncompressed data. - FT_LAYOUT_VERSION_15 = 15, // Diff from 14 to 15: basement nodes, last verification time - FT_LAYOUT_VERSION_16 = 16, // Dr. No: No subtree estimates, partition layout information represented more transparently. - // ALERT ALERT ALERT: version 16 never released to customers, internal and beta use only - FT_LAYOUT_VERSION_17 = 17, // Dr. No: Add STAT64INFO_S to ft header - FT_LAYOUT_VERSION_18 = 18, // Dr. No: Add HOT info to ft header - FT_LAYOUT_VERSION_19 = 19, // Doofenshmirtz: Add compression method, highest_unused_msn_for_upgrade - FT_LAYOUT_VERSION_20 = 20, // Deadshot: Add compression method to log_fcreate, - // mgr_last_xid after begin checkpoint, - // last_xid to shutdown - FT_LAYOUT_VERSION_21 = 21, // Ming: Add max_msn_in_ft to header, - // Removed log suppression logentry - FT_LAYOUT_VERSION_22 = 22, // Ming: Add oldest known referenced xid to each ftnode, for better garbage collection - FT_LAYOUT_VERSION_23 = 23, // Ming: Fix upgrade path #5902 - FT_LAYOUT_VERSION_24 = 24, // Riddler: change logentries that log transactions to store TXNID_PAIRs instead of TXNIDs - FT_LAYOUT_VERSION_25 = 25, // SecretSquirrel: ROLLBACK_LOG_NODES (on disk and in memory) now just use blocknum (instead of blocknum + hash) to point to other log nodes. same for xstillopen log entry - FT_LAYOUT_VERSION_26 = 26, // Hojo: basements store key/vals separately on disk for fixed klpair length BNs - FT_LAYOUT_VERSION_27 = 27, // serialize message trees with nonleaf buffers to avoid key, msn sort on deserialize - FT_NEXT_VERSION, // the version after the current version - FT_LAYOUT_VERSION = FT_NEXT_VERSION-1, // A hack so I don't have to change this line. - FT_LAYOUT_MIN_SUPPORTED_VERSION = FT_LAYOUT_VERSION_13, // Minimum version supported - - // Define these symbolically so the knowledge of exactly which layout version got rid of fingerprints isn't spread all over the code. - FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT = FT_LAYOUT_VERSION_13, - FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM = FT_LAYOUT_VERSION_14, - FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES = FT_LAYOUT_VERSION_15, -}; - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ftloader-callback.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ftloader-callback.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ftloader-callback.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ftloader-callback.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,199 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include -#include -#include - -#include "ftloader-internal.h" -#include "ybt.h" - -static void error_callback_lock(ft_loader_error_callback loader_error) { - toku_mutex_lock(&loader_error->mutex); -} - -static void error_callback_unlock(ft_loader_error_callback loader_error) { - toku_mutex_unlock(&loader_error->mutex); -} - -void ft_loader_init_error_callback(ft_loader_error_callback loader_error) { - memset(loader_error, 0, sizeof *loader_error); - toku_init_dbt(&loader_error->key); - toku_init_dbt(&loader_error->val); - toku_mutex_init(&loader_error->mutex, NULL); -} - -void ft_loader_destroy_error_callback(ft_loader_error_callback loader_error) { - toku_mutex_destroy(&loader_error->mutex); - toku_destroy_dbt(&loader_error->key); - toku_destroy_dbt(&loader_error->val); - memset(loader_error, 0, sizeof *loader_error); -} - -int ft_loader_get_error(ft_loader_error_callback loader_error) { - error_callback_lock(loader_error); - int r = loader_error->error; - error_callback_unlock(loader_error); - return r; -} - -void ft_loader_set_error_function(ft_loader_error_callback loader_error, ft_loader_error_func error_function, void *error_extra) { - loader_error->error_callback = error_function; - loader_error->extra = error_extra; -} - -int ft_loader_set_error(ft_loader_error_callback loader_error, int error, DB *db, int which_db, DBT *key, DBT *val) { - int r; - error_callback_lock(loader_error); - if (loader_error->error) { // there can be only one - r = EEXIST; - } else { - r = 0; - loader_error->error = error; // set the error - loader_error->db = db; - loader_error->which_db = which_db; - if (key != nullptr) { - toku_clone_dbt(&loader_error->key, *key); - } - if (val != nullptr) { - toku_clone_dbt(&loader_error->val, *val); - } - } - error_callback_unlock(loader_error); - return r; -} - -int ft_loader_call_error_function(ft_loader_error_callback loader_error) { - int r; - error_callback_lock(loader_error); - r = loader_error->error; - if (r && loader_error->error_callback && !loader_error->did_callback) { - loader_error->did_callback = true; - loader_error->error_callback(loader_error->db, - loader_error->which_db, - loader_error->error, - &loader_error->key, - &loader_error->val, - loader_error->extra); - } - error_callback_unlock(loader_error); - return r; -} - -int ft_loader_set_error_and_callback(ft_loader_error_callback loader_error, int error, DB *db, int which_db, DBT *key, DBT *val) { - int r = ft_loader_set_error(loader_error, error, db, which_db, key, val); - if (r == 0) - r = ft_loader_call_error_function(loader_error); - return r; -} - -int ft_loader_init_poll_callback(ft_loader_poll_callback p) { - memset(p, 0, sizeof *p); - return 0; -} - -void ft_loader_destroy_poll_callback(ft_loader_poll_callback p) { - memset(p, 0, sizeof *p); -} - -void ft_loader_set_poll_function(ft_loader_poll_callback p, ft_loader_poll_func poll_function, void *poll_extra) { - p->poll_function = poll_function; - p->poll_extra = poll_extra; -} - -int ft_loader_call_poll_function(ft_loader_poll_callback p, float progress) { - int r = 0; - if (p->poll_function) - r = p->poll_function(p->poll_extra, progress); - return r; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ftloader.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ftloader.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ftloader.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ftloader.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,3345 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include - -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include "ftloader-internal.h" -#include "ft-internal.h" -#include "sub_block.h" -#include "sub_block_map.h" -#include "pqueue.h" -#include "dbufio.h" -#include "leafentry.h" -#include "log-internal.h" -#include "ft.h" - -static size_t (*os_fwrite_fun)(const void *,size_t,size_t,FILE*)=NULL; -void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) { - os_fwrite_fun=fwrite_fun; -} - -static size_t do_fwrite (const void *ptr, size_t size, size_t nmemb, FILE *stream) { - if (os_fwrite_fun) { - return os_fwrite_fun(ptr, size, nmemb, stream); - } else { - return fwrite(ptr, size, nmemb, stream); - } -} - - -// 1024 is the right size_factor for production. -// Different values for these sizes may be used for testing. -static uint32_t size_factor = 1024; -static uint32_t default_loader_nodesize = FT_DEFAULT_NODE_SIZE; -static uint32_t default_loader_basementnodesize = FT_DEFAULT_BASEMENT_NODE_SIZE; - -void -toku_ft_loader_set_size_factor(uint32_t factor) { -// For test purposes only - size_factor = factor; - default_loader_nodesize = (size_factor==1) ? (1<<15) : FT_DEFAULT_NODE_SIZE; -} - -uint64_t -toku_ft_loader_get_rowset_budget_for_testing (void) -// For test purposes only. In production, the rowset size is determined by negotation with the cachetable for some memory. (See #2613). -{ - return 16ULL*size_factor*1024ULL; -} - -void ft_loader_lock_init(FTLOADER bl) { - invariant(!bl->mutex_init); - toku_mutex_init(&bl->mutex, NULL); - bl->mutex_init = true; -} - -void ft_loader_lock_destroy(FTLOADER bl) { - if (bl->mutex_init) { - toku_mutex_destroy(&bl->mutex); - bl->mutex_init = false; - } -} - -static void ft_loader_lock(FTLOADER bl) { - invariant(bl->mutex_init); - toku_mutex_lock(&bl->mutex); -} - -static void ft_loader_unlock(FTLOADER bl) { - invariant(bl->mutex_init); - toku_mutex_unlock(&bl->mutex); -} - -static int add_big_buffer(struct file_info *file) { - int result = 0; - bool newbuffer = false; - if (file->buffer == NULL) { - file->buffer = toku_malloc(file->buffer_size); - if (file->buffer == NULL) - result = get_error_errno(); - else - newbuffer = true; - } - if (result == 0) { - int r = setvbuf(file->file, (char *) file->buffer, _IOFBF, file->buffer_size); - if (r != 0) { - result = get_error_errno(); - if (newbuffer) { - toku_free(file->buffer); - file->buffer = NULL; - } - } - } - return result; -} - -static void cleanup_big_buffer(struct file_info *file) { - if (file->buffer) { - toku_free(file->buffer); - file->buffer = NULL; - } -} - -int ft_loader_init_file_infos (struct file_infos *fi) { - int result = 0; - toku_mutex_init(&fi->lock, NULL); - fi->n_files = 0; - fi->n_files_limit = 1; - fi->n_files_open = 0; - fi->n_files_extant = 0; - MALLOC_N(fi->n_files_limit, fi->file_infos); - if (fi->file_infos == NULL) - result = get_error_errno(); - return result; -} - -void ft_loader_fi_destroy (struct file_infos *fi, bool is_error) -// Effect: Free the resources in the fi. -// If is_error then we close and unlink all the temp files. -// If !is_error then requires that all the temp files have been closed and destroyed -// No error codes are returned. If anything goes wrong with closing and unlinking then it's only in an is_error case, so we don't care. -{ - if (fi->file_infos == NULL) { - // ft_loader_init_file_infos guarantees this isn't null, so if it is, we know it hasn't been inited yet and we don't need to destroy it. - return; - } - toku_mutex_destroy(&fi->lock); - if (!is_error) { - invariant(fi->n_files_open==0); - invariant(fi->n_files_extant==0); - } - for (int i=0; in_files; i++) { - if (fi->file_infos[i].is_open) { - invariant(is_error); - toku_os_fclose(fi->file_infos[i].file); // don't check for errors, since we are in an error case. - } - if (fi->file_infos[i].is_extant) { - invariant(is_error); - unlink(fi->file_infos[i].fname); - toku_free(fi->file_infos[i].fname); - } - cleanup_big_buffer(&fi->file_infos[i]); - } - toku_free(fi->file_infos); - fi->n_files=0; - fi->n_files_limit=0; - fi->file_infos = NULL; -} - -static int open_file_add (struct file_infos *fi, - FILE *file, - char *fname, - /* out */ FIDX *idx) -{ - int result = 0; - toku_mutex_lock(&fi->lock); - if (fi->n_files >= fi->n_files_limit) { - fi->n_files_limit *=2; - XREALLOC_N(fi->n_files_limit, fi->file_infos); - } - invariant(fi->n_files < fi->n_files_limit); - fi->file_infos[fi->n_files].is_open = true; - fi->file_infos[fi->n_files].is_extant = true; - fi->file_infos[fi->n_files].fname = fname; - fi->file_infos[fi->n_files].file = file; - fi->file_infos[fi->n_files].n_rows = 0; - fi->file_infos[fi->n_files].buffer_size = FILE_BUFFER_SIZE; - fi->file_infos[fi->n_files].buffer = NULL; - result = add_big_buffer(&fi->file_infos[fi->n_files]); - if (result == 0) { - idx->idx = fi->n_files; - fi->n_files++; - fi->n_files_extant++; - fi->n_files_open++; - } - toku_mutex_unlock(&fi->lock); - return result; -} - -int ft_loader_fi_reopen (struct file_infos *fi, FIDX idx, const char *mode) { - int result = 0; - toku_mutex_lock(&fi->lock); - int i = idx.idx; - invariant(i>=0 && in_files); - invariant(!fi->file_infos[i].is_open); - invariant(fi->file_infos[i].is_extant); - fi->file_infos[i].file = toku_os_fopen(fi->file_infos[i].fname, mode); - if (fi->file_infos[i].file == NULL) { - result = get_error_errno(); - } else { - fi->file_infos[i].is_open = true; - // No longer need the big buffer for reopened files. Don't allocate the space, we need it elsewhere. - //add_big_buffer(&fi->file_infos[i]); - fi->n_files_open++; - } - toku_mutex_unlock(&fi->lock); - return result; -} - -int ft_loader_fi_close (struct file_infos *fi, FIDX idx, bool require_open) -{ - int result = 0; - toku_mutex_lock(&fi->lock); - invariant(idx.idx >=0 && idx.idx < fi->n_files); - if (fi->file_infos[idx.idx].is_open) { - invariant(fi->n_files_open>0); // loader-cleanup-test failure - fi->n_files_open--; - fi->file_infos[idx.idx].is_open = false; - int r = toku_os_fclose(fi->file_infos[idx.idx].file); - if (r) - result = get_error_errno(); - cleanup_big_buffer(&fi->file_infos[idx.idx]); - } else if (require_open) - result = EINVAL; - toku_mutex_unlock(&fi->lock); - return result; -} - -int ft_loader_fi_unlink (struct file_infos *fi, FIDX idx) { - int result = 0; - toku_mutex_lock(&fi->lock); - int id = idx.idx; - invariant(id >=0 && id < fi->n_files); - if (fi->file_infos[id].is_extant) { // must still exist - invariant(fi->n_files_extant>0); - fi->n_files_extant--; - invariant(!fi->file_infos[id].is_open); // must be closed before we unlink - fi->file_infos[id].is_extant = false; - int r = unlink(fi->file_infos[id].fname); - if (r != 0) - result = get_error_errno(); - toku_free(fi->file_infos[id].fname); - fi->file_infos[id].fname = NULL; - } else - result = EINVAL; - toku_mutex_unlock(&fi->lock); - return result; -} - -int -ft_loader_fi_close_all(struct file_infos *fi) { - int rval = 0; - for (int i = 0; i < fi->n_files; i++) { - int r; - FIDX idx = { i }; - r = ft_loader_fi_close(fi, idx, false); // ignore files that are already closed - if (rval == 0 && r) - rval = r; // capture first error - } - return rval; -} - -int ft_loader_open_temp_file (FTLOADER bl, FIDX *file_idx) -/* Effect: Open a temporary file in read-write mode. Save enough information to close and delete the file later. - * Return value: 0 on success, an error number otherwise. - * On error, *file_idx and *fnamep will be unmodified. - * The open file will be saved in bl->file_infos so that even if errors happen we can free them all. - */ -{ - int result = 0; - if (result) // debug hack - return result; - FILE *f = NULL; - int fd = -1; - char *fname = toku_strdup(bl->temp_file_template); - if (fname == NULL) - result = get_error_errno(); - else { - fd = mkstemp(fname); - if (fd < 0) { - result = get_error_errno(); - } else { - f = toku_os_fdopen(fd, "r+"); - if (f == NULL) - result = get_error_errno(); - else - result = open_file_add(&bl->file_infos, f, fname, file_idx); - } - } - if (result != 0) { - if (fd >= 0) { - toku_os_close(fd); - unlink(fname); - } - if (f != NULL) - toku_os_fclose(f); // don't check for error because we're already in an error case - if (fname != NULL) - toku_free(fname); - } - return result; -} - -void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error) { - ft_loader_lock_destroy(bl); - - // These frees rely on the fact that if you free a NULL pointer then nothing bad happens. - toku_free(bl->dbs); - toku_free(bl->descriptors); - toku_free(bl->root_xids_that_created); - if (bl->new_fnames_in_env) { - for (int i = 0; i < bl->N; i++) - toku_free((char*)bl->new_fnames_in_env[i]); - toku_free(bl->new_fnames_in_env); - } - toku_free(bl->extracted_datasizes); - toku_free(bl->bt_compare_funs); - toku_free((char*)bl->temp_file_template); - ft_loader_fi_destroy(&bl->file_infos, is_error); - - for (int i = 0; i < bl->N; i++) - destroy_rowset(&bl->rows[i]); - toku_free(bl->rows); - - for (int i = 0; i < bl->N; i++) - destroy_merge_fileset(&bl->fs[i]); - toku_free(bl->fs); - - if (bl->last_key) { - for (int i=0; i < bl->N; i++) { - toku_free(bl->last_key[i].data); - } - toku_free(bl->last_key); - bl->last_key = NULL; - } - - destroy_rowset(&bl->primary_rowset); - if (bl->primary_rowset_queue) { - queue_destroy(bl->primary_rowset_queue); - bl->primary_rowset_queue = nullptr; - } - - for (int i=0; iN; i++) { - if ( bl->fractal_queues ) { - invariant(bl->fractal_queues[i]==NULL); - } - } - toku_free(bl->fractal_threads); - toku_free(bl->fractal_queues); - toku_free(bl->fractal_threads_live); - - if (bl->did_reserve_memory) { - invariant(bl->cachetable); - toku_cachetable_release_reserved_memory(bl->cachetable, bl->reserved_memory); - } - - ft_loader_destroy_error_callback(&bl->error_callback); - ft_loader_destroy_poll_callback(&bl->poll_callback); - - //printf("Progress=%d/%d\n", bl->progress, PROGRESS_MAX); - - toku_free(bl); -} - -static void *extractor_thread (void*); - -#define MAX(a,b) (((a)<(b)) ? (b) : (a)) - -static uint64_t memory_per_rowset_during_extract (FTLOADER bl) -// Return how much memory can be allocated for each rowset. -{ - if (size_factor==1) { - return 16*1024; - } else { - // There is a primary rowset being maintained by the foreground thread. - // There could be two more in the queue. - // There is one rowset for each index (bl->N) being filled in. - // Later we may have sort_and_write operations spawning in parallel, and will need to account for that. - int n_copies = (1 // primary rowset - +EXTRACTOR_QUEUE_DEPTH // the number of primaries in the queue - +bl->N // the N rowsets being constructed by the extractor thread. - +bl->N // the N sort buffers - +1 // Give the extractor thread one more so that it can have temporary space for sorting. This is overkill. - ); - int64_t extra_reserved_memory = bl->N * FILE_BUFFER_SIZE; // for each index we are writing to a file at any given time. - int64_t tentative_rowset_size = ((int64_t)(bl->reserved_memory - extra_reserved_memory))/(n_copies); - return MAX(tentative_rowset_size, (int64_t)MIN_ROWSET_MEMORY); - } -} - -static unsigned ft_loader_get_fractal_workers_count(FTLOADER bl) { - unsigned w = 0; - while (1) { - ft_loader_lock(bl); - w = bl->fractal_workers; - ft_loader_unlock(bl); - if (w != 0) - break; - toku_pthread_yield(); // maybe use a cond var instead - } - return w; -} - -static void ft_loader_set_fractal_workers_count(FTLOADER bl) { - ft_loader_lock(bl); - if (bl->fractal_workers == 0) - bl->fractal_workers = 1; - ft_loader_unlock(bl); -} - -// To compute a merge, we have a certain amount of memory to work with. -// We perform only one fanin at a time. -// If the fanout is F then we are using -// F merges. Each merge uses -// DBUFIO_DEPTH buffers for double buffering. Each buffer is of size at least MERGE_BUF_SIZE -// so the memory is -// F*MERGE_BUF_SIZE*DBUFIO_DEPTH storage. -// We use some additional space to buffer the outputs. -// That's FILE_BUFFER_SIZE for writing to a merge file if we are writing to a mergefile. -// And we have FRACTAL_WRITER_ROWSETS*MERGE_BUF_SIZE per queue -// And if we are doing a fractal, each worker could have have a fractal tree that it's working on. -// -// DBUFIO_DEPTH*F*MERGE_BUF_SIZE + FRACTAL_WRITER_ROWSETS*MERGE_BUF_SIZE + WORKERS*NODESIZE*2 <= RESERVED_MEMORY - -static int64_t memory_avail_during_merge(FTLOADER bl, bool is_fractal_node) { - // avail memory = reserved memory - WORKERS*NODESIZE*2 for the last merge stage only - int64_t avail_memory = bl->reserved_memory; - if (is_fractal_node) { - // reserve space for the fractal writer thread buffers - avail_memory -= (int64_t)ft_loader_get_fractal_workers_count(bl) * (int64_t)default_loader_nodesize * 2; // compressed and uncompressed buffers - } - return avail_memory; -} - -static int merge_fanin (FTLOADER bl, bool is_fractal_node) { - // return number of temp files to read in this pass - int64_t memory_avail = memory_avail_during_merge(bl, is_fractal_node); - int64_t nbuffers = memory_avail / (int64_t)TARGET_MERGE_BUF_SIZE; - if (is_fractal_node) - nbuffers -= FRACTAL_WRITER_ROWSETS; - return MAX(nbuffers / (int64_t)DBUFIO_DEPTH, (int)MIN_MERGE_FANIN); -} - -static uint64_t memory_per_rowset_during_merge (FTLOADER bl, int merge_factor, bool is_fractal_node // if it is being sent to a q - ) { - int64_t memory_avail = memory_avail_during_merge(bl, is_fractal_node); - int64_t nbuffers = DBUFIO_DEPTH * merge_factor; - if (is_fractal_node) - nbuffers += FRACTAL_WRITER_ROWSETS; - return MAX(memory_avail / nbuffers, (int64_t)MIN_MERGE_BUF_SIZE); -} - -int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, - CACHETABLE cachetable, - generate_row_for_put_func g, - DB *src_db, - int N, FT_HANDLE fts[/*N*/], DB* dbs[/*N*/], - const char *new_fnames_in_env[/*N*/], - ft_compare_func bt_compare_functions[/*N*/], - const char *temp_file_template, - LSN load_lsn, - TOKUTXN txn, - bool reserve_memory, - uint64_t reserve_memory_size, - bool compress_intermediates, - bool allow_puts) -// Effect: Allocate and initialize a FTLOADER, but do not create the extractor thread. -{ - FTLOADER CALLOC(bl); // initialized to all zeros (hence CALLOC) - if (!bl) return get_error_errno(); - - bl->generate_row_for_put = g; - bl->cachetable = cachetable; - if (reserve_memory && bl->cachetable) { - bl->did_reserve_memory = true; - bl->reserved_memory = toku_cachetable_reserve_memory(bl->cachetable, 2.0/3.0, reserve_memory_size); // allocate 2/3 of the unreserved part (which is 3/4 of the memory to start with). - } - else { - bl->did_reserve_memory = false; - bl->reserved_memory = 512*1024*1024; // if no cache table use 512MB. - } - bl->compress_intermediates = compress_intermediates; - bl->allow_puts = allow_puts; - bl->src_db = src_db; - bl->N = N; - bl->load_lsn = load_lsn; - if (txn) { - bl->load_root_xid = txn->txnid.parent_id64; - } - else { - bl->load_root_xid = TXNID_NONE; - } - - ft_loader_init_error_callback(&bl->error_callback); - ft_loader_init_poll_callback(&bl->poll_callback); - -#define MY_CALLOC_N(n,v) CALLOC_N(n,v); if (!v) { int r = get_error_errno(); toku_ft_loader_internal_destroy(bl, true); return r; } -#define SET_TO_MY_STRDUP(lval, s) do { char *v = toku_strdup(s); if (!v) { int r = get_error_errno(); toku_ft_loader_internal_destroy(bl, true); return r; } lval = v; } while (0) - - MY_CALLOC_N(N, bl->root_xids_that_created); - for (int i=0; iroot_xids_that_created[i]=fts[i]->ft->h->root_xid_that_created; - MY_CALLOC_N(N, bl->dbs); - for (int i=0; idbs[i]=dbs[i]; - MY_CALLOC_N(N, bl->descriptors); - for (int i=0; idescriptors[i]=&fts[i]->ft->descriptor; - MY_CALLOC_N(N, bl->new_fnames_in_env); - for (int i=0; inew_fnames_in_env[i], new_fnames_in_env[i]); - MY_CALLOC_N(N, bl->extracted_datasizes); // the calloc_n zeroed everything, which is what we want - MY_CALLOC_N(N, bl->bt_compare_funs); - for (int i=0; ibt_compare_funs[i] = bt_compare_functions[i]; - - MY_CALLOC_N(N, bl->fractal_queues); - for (int i=0; ifractal_queues[i]=NULL; - MY_CALLOC_N(N, bl->fractal_threads); - MY_CALLOC_N(N, bl->fractal_threads_live); - for (int i=0; ifractal_threads_live[i] = false; - - { - int r = ft_loader_init_file_infos(&bl->file_infos); - if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; } - } - - SET_TO_MY_STRDUP(bl->temp_file_template, temp_file_template); - - bl->n_rows = 0; - bl->progress = 0; - bl->progress_callback_result = 0; - - MY_CALLOC_N(N, bl->rows); - MY_CALLOC_N(N, bl->fs); - MY_CALLOC_N(N, bl->last_key); - for(int i=0;irows[i], memory_per_rowset_during_extract(bl)); - if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; } - } - init_merge_fileset(&bl->fs[i]); - bl->last_key[i].flags = DB_DBT_REALLOC; // don't really need this, but it's nice to maintain it. We use ulen to keep track of the realloced space. - } - - { - int r = init_rowset(&bl->primary_rowset, memory_per_rowset_during_extract(bl)); - if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; } - } - { int r = queue_create(&bl->primary_rowset_queue, EXTRACTOR_QUEUE_DEPTH); - if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; } - } - { - ft_loader_lock_init(bl); - } - - *blp = bl; - - return 0; -} - -int toku_ft_loader_open (/* out */ FTLOADER *blp, - CACHETABLE cachetable, - generate_row_for_put_func g, - DB *src_db, - int N, FT_HANDLE fts[/*N*/], DB* dbs[/*N*/], - const char *new_fnames_in_env[/*N*/], - ft_compare_func bt_compare_functions[/*N*/], - const char *temp_file_template, - LSN load_lsn, - TOKUTXN txn, - bool reserve_memory, - uint64_t reserve_memory_size, - bool compress_intermediates, - bool allow_puts) { -// Effect: called by DB_ENV->create_loader to create a brt loader. -// Arguments: -// blp Return the brt loader here. -// g The function for generating a row -// src_db The source database. Needed by g. May be NULL if that's ok with g. -// N The number of dbs to create. -// dbs An array of open databases. Used by g. The data will be put in these database. -// new_fnames The file names (these strings are owned by the caller: we make a copy for our own purposes). -// temp_file_template A template suitable for mkstemp() -// reserve_memory Cause the loader to reserve memory for its use from the cache table. -// compress_intermediates Cause the loader to compress intermediate loader files. -// allow_puts Prepare the loader for rows to insert. When puts are disabled, the loader does not run the -// extractor or the fractal tree writer threads. -// Return value: 0 on success, an error number otherwise. - int result = 0; - { - int r = toku_ft_loader_internal_init(blp, cachetable, g, src_db, - N, fts, dbs, - new_fnames_in_env, - bt_compare_functions, - temp_file_template, - load_lsn, - txn, - reserve_memory, - reserve_memory_size, - compress_intermediates, - allow_puts); - if (r!=0) result = r; - } - if (result==0 && allow_puts) { - FTLOADER bl = *blp; - int r = toku_pthread_create(&bl->extractor_thread, NULL, extractor_thread, (void*)bl); - if (r==0) { - bl->extractor_live = true; - } else { - result = r; - (void) toku_ft_loader_internal_destroy(bl, true); - } - } - return result; -} - -static void ft_loader_set_panic(FTLOADER bl, int error, bool callback, int which_db, DBT *key, DBT *val) { - DB *db = nullptr; - if (bl && bl->dbs && which_db >= 0 && which_db < bl->N) { - db = bl->dbs[which_db]; - } - int r = ft_loader_set_error(&bl->error_callback, error, db, which_db, key, val); - if (r == 0 && callback) - ft_loader_call_error_function(&bl->error_callback); -} - -// One of the tests uses this. -FILE *toku_bl_fidx2file (FTLOADER bl, FIDX i) { - toku_mutex_lock(&bl->file_infos.lock); - invariant(i.idx >=0 && i.idx < bl->file_infos.n_files); - invariant(bl->file_infos.file_infos[i.idx].is_open); - FILE *result=bl->file_infos.file_infos[i.idx].file; - toku_mutex_unlock(&bl->file_infos.lock); - return result; -} - -static int bl_finish_compressed_write(FILE *stream, struct wbuf *wb) { - int r; - char *compressed_buf = NULL; - const size_t data_size = wb->ndone; - invariant(data_size > 0); - invariant(data_size <= MAX_UNCOMPRESSED_BUF); - - int n_sub_blocks = 0; - int sub_block_size = 0; - - r = choose_sub_block_size(wb->ndone, max_sub_blocks, &sub_block_size, &n_sub_blocks); - invariant(r==0); - invariant(0 < n_sub_blocks && n_sub_blocks <= max_sub_blocks); - invariant(sub_block_size > 0); - - struct sub_block sub_block[max_sub_blocks]; - // set the initial sub block size for all of the sub blocks - for (int i = 0; i < n_sub_blocks; i++) { - sub_block_init(&sub_block[i]); - } - set_all_sub_block_sizes(data_size, sub_block_size, n_sub_blocks, sub_block); - - size_t compressed_len = get_sum_compressed_size_bound(n_sub_blocks, sub_block, TOKU_DEFAULT_COMPRESSION_METHOD); - const size_t sub_block_header_len = sub_block_header_size(n_sub_blocks); - const size_t other_overhead = sizeof(uint32_t); //total_size - const size_t header_len = sub_block_header_len + other_overhead; - MALLOC_N(header_len + compressed_len, compressed_buf); - if (compressed_buf == nullptr) { - return ENOMEM; - } - - // compress all of the sub blocks - char *uncompressed_ptr = (char*)wb->buf; - char *compressed_ptr = compressed_buf + header_len; - compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block, uncompressed_ptr, compressed_ptr, - get_num_cores(), get_ft_pool(), TOKU_DEFAULT_COMPRESSION_METHOD); - - //total_size does NOT include itself - uint32_t total_size = compressed_len + sub_block_header_len; - // serialize the sub block header - uint32_t *ptr = (uint32_t *)(compressed_buf); - *ptr++ = toku_htod32(total_size); - *ptr++ = toku_htod32(n_sub_blocks); - for (int i=0; indone = 0; - - size_t size_to_write = total_size + 4; // Includes writing total_size - - { - size_t written = do_fwrite(compressed_buf, 1, size_to_write, stream); - if (written!=size_to_write) { - if (os_fwrite_fun) // if using hook to induce artificial errors (for testing) ... - r = get_maybe_error_errno(); // ... then there is no error in the stream, but there is one in errno - else - r = ferror(stream); - invariant(r!=0); - goto exit; - } - } - r = 0; -exit: - if (compressed_buf) { - toku_free(compressed_buf); - } - return r; -} - -static int bl_compressed_write(void *ptr, size_t nbytes, FILE *stream, struct wbuf *wb) { - invariant(wb->size <= MAX_UNCOMPRESSED_BUF); - size_t bytes_left = nbytes; - char *buf = (char*)ptr; - - while (bytes_left > 0) { - size_t bytes_to_copy = bytes_left; - if (wb->ndone + bytes_to_copy > wb->size) { - bytes_to_copy = wb->size - wb->ndone; - } - wbuf_nocrc_literal_bytes(wb, buf, bytes_to_copy); - if (wb->ndone == wb->size) { - //Compress, write to disk, and empty out wb - int r = bl_finish_compressed_write(stream, wb); - if (r != 0) { - errno = r; - return -1; - } - wb->ndone = 0; - } - bytes_left -= bytes_to_copy; - buf += bytes_to_copy; - } - return 0; -} - -static int bl_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream, struct wbuf *wb, FTLOADER bl) -/* Effect: this is a wrapper for fwrite that returns 0 on success, otherwise returns an error number. - * Arguments: - * ptr the data to be writen. - * size the amount of data to be written. - * nmemb the number of units of size to be written. - * stream write the data here. - * wb where to write uncompressed data (if we're compressing) or ignore if NULL - * bl passed so we can panic the ft_loader if something goes wrong (recording the error number). - * Return value: 0 on success, an error number otherwise. - */ -{ - if (!bl->compress_intermediates || !wb) { - size_t r = do_fwrite(ptr, size, nmemb, stream); - if (r!=nmemb) { - int e; - if (os_fwrite_fun) // if using hook to induce artificial errors (for testing) ... - e = get_maybe_error_errno(); // ... then there is no error in the stream, but there is one in errno - else - e = ferror(stream); - invariant(e!=0); - return e; - } - } else { - size_t num_bytes = size * nmemb; - int r = bl_compressed_write(ptr, num_bytes, stream, wb); - if (r != 0) { - return r; - } - } - return 0; -} - -static int bl_fread (void *ptr, size_t size, size_t nmemb, FILE *stream) -/* Effect: this is a wrapper for fread that returns 0 on success, otherwise returns an error number. - * Arguments: - * ptr read data into here. - * size size of data element to be read. - * nmemb number of data elements to be read. - * stream where to read the data from. - * Return value: 0 on success, an error number otherwise. - */ -{ - size_t r = fread(ptr, size, nmemb, stream); - if (r==0) { - if (feof(stream)) return EOF; - else { - do_error: ; - int e = ferror(stream); - // r == 0 && !feof && e == 0, how does this happen? invariant(e!=0); - return e; - } - } else if (rsize; - if ((r=bl_fwrite(&dlen, sizeof(dlen), 1, datafile, wb, bl))) return r; - if ((r=bl_fwrite(dbt->data, 1, dlen, datafile, wb, bl))) return r; - if (dataoff) - *dataoff += dlen + sizeof(dlen); - return 0; -} - -static int bl_read_dbt (/*in*/DBT *dbt, FILE *stream) -{ - int len; - { - int r; - if ((r = bl_fread(&len, sizeof(len), 1, stream))) return r; - invariant(len>=0); - } - if ((int)dbt->ulenulen=len; dbt->data=toku_xrealloc(dbt->data, len); } - { - int r; - if ((r = bl_fread(dbt->data, 1, len, stream))) return r; - } - dbt->size = len; - return 0; -} - -static int bl_read_dbt_from_dbufio (/*in*/DBT *dbt, DBUFIO_FILESET bfs, int filenum) -{ - int result = 0; - uint32_t len; - { - size_t n_read; - int r = dbufio_fileset_read(bfs, filenum, &len, sizeof(len), &n_read); - if (r!=0) { - result = r; - } else if (n_readulendata, len); - if (data==NULL) { - result = get_error_errno(); - } else { - dbt->ulen=len; - dbt->data=data; - } - } - } - if (result==0) { - size_t n_read; - int r = dbufio_fileset_read(bfs, filenum, dbt->data, len, &n_read); - if (r!=0) { - result = r; - } else if (n_readsize = len; - } - } - return result; -} - - -int loader_write_row(DBT *key, DBT *val, FIDX data, FILE *dataf, uint64_t *dataoff, struct wbuf *wb, FTLOADER bl) -/* Effect: Given a key and a val (both DBTs), write them to a file. Increment *dataoff so that it's up to date. - * Arguments: - * key, val write these. - * data the file to write them to - * dataoff a pointer to a counter that keeps track of the amount of data written so far. - * wb a pointer (possibly NULL) to buffer uncompressed output - * bl the ft_loader (passed so we can panic if needed). - * Return value: 0 on success, an error number otherwise. - */ -{ - //int klen = key->size; - //int vlen = val->size; - int r; - // we have a chance to handle the errors because when we close we can delete all the files. - if ((r=bl_write_dbt(key, dataf, dataoff, wb, bl))) return r; - if ((r=bl_write_dbt(val, dataf, dataoff, wb, bl))) return r; - toku_mutex_lock(&bl->file_infos.lock); - bl->file_infos.file_infos[data.idx].n_rows++; - toku_mutex_unlock(&bl->file_infos.lock); - return 0; -} - -int loader_read_row (FILE *f, DBT *key, DBT *val) -/* Effect: Read a key value pair from a file. The DBTs must have DB_DBT_REALLOC set. - * Arguments: - * f where to read it from. - * key, val read it into these. - * bl passed so we can panic if needed. - * Return value: 0 on success, an error number otherwise. - * Requires: The DBTs must have DB_DBT_REALLOC - */ -{ - { - int r = bl_read_dbt(key, f); - if (r!=0) return r; - } - { - int r = bl_read_dbt(val, f); - if (r!=0) return r; - } - return 0; -} - -static int loader_read_row_from_dbufio (DBUFIO_FILESET bfs, int filenum, DBT *key, DBT *val) -/* Effect: Read a key value pair from a file. The DBTs must have DB_DBT_REALLOC set. - * Arguments: - * f where to read it from. - * key, val read it into these. - * bl passed so we can panic if needed. - * Return value: 0 on success, an error number otherwise. - * Requires: The DBTs must have DB_DBT_REALLOC - */ -{ - { - int r = bl_read_dbt_from_dbufio(key, bfs, filenum); - if (r!=0) return r; - } - { - int r = bl_read_dbt_from_dbufio(val, bfs, filenum); - if (r!=0) return r; - } - return 0; -} - - -int init_rowset (struct rowset *rows, uint64_t memory_budget) -/* Effect: Initialize a collection of rows to be empty. */ -{ - int result = 0; - - rows->memory_budget = memory_budget; - - rows->rows = NULL; - rows->data = NULL; - - rows->n_rows = 0; - rows->n_rows_limit = 100; - MALLOC_N(rows->n_rows_limit, rows->rows); - if (rows->rows == NULL) - result = get_error_errno(); - rows->n_bytes = 0; - rows->n_bytes_limit = (size_factor==1) ? 1024*size_factor*16 : memory_budget; - //printf("%s:%d n_bytes_limit=%ld (size_factor based limit=%d)\n", __FILE__, __LINE__, rows->n_bytes_limit, 1024*size_factor*16); - rows->data = (char *) toku_malloc(rows->n_bytes_limit); - if (rows->rows==NULL || rows->data==NULL) { - if (result == 0) - result = get_error_errno(); - toku_free(rows->rows); - toku_free(rows->data); - rows->rows = NULL; - rows->data = NULL; - } - return result; -} - -static void zero_rowset (struct rowset *rows) { - memset(rows, 0, sizeof(*rows)); -} - -void destroy_rowset (struct rowset *rows) { - if ( rows ) { - toku_free(rows->data); - toku_free(rows->rows); - zero_rowset(rows); - } -} - -static int row_wont_fit (struct rowset *rows, size_t size) -/* Effect: Return nonzero if adding a row of size SIZE would be too big (bigger than the buffer limit) */ -{ - // Account for the memory used by the data and also the row structures. - size_t memory_in_use = (rows->n_rows*sizeof(struct row) - + rows->n_bytes); - return (rows->memory_budget < memory_in_use + size); -} - -int add_row (struct rowset *rows, DBT *key, DBT *val) -/* Effect: add a row to a collection. */ -{ - int result = 0; - if (rows->n_rows >= rows->n_rows_limit) { - struct row *old_rows = rows->rows; - size_t old_n_rows_limit = rows->n_rows_limit; - rows->n_rows_limit *= 2; - REALLOC_N(rows->n_rows_limit, rows->rows); - if (rows->rows == NULL) { - result = get_error_errno(); - rows->rows = old_rows; - rows->n_rows_limit = old_n_rows_limit; - return result; - } - } - size_t off = rows->n_bytes; - size_t next_off = off + key->size + val->size; - - struct row newrow; - memset(&newrow, 0, sizeof newrow); newrow.off = off; newrow.klen = key->size; newrow.vlen = val->size; - - rows->rows[rows->n_rows++] = newrow; - if (next_off > rows->n_bytes_limit) { - size_t old_n_bytes_limit = rows->n_bytes_limit; - while (next_off > rows->n_bytes_limit) { - rows->n_bytes_limit = rows->n_bytes_limit*2; - } - invariant(next_off <= rows->n_bytes_limit); - char *old_data = rows->data; - REALLOC_N(rows->n_bytes_limit, rows->data); - if (rows->data == NULL) { - result = get_error_errno(); - rows->data = old_data; - rows->n_bytes_limit = old_n_bytes_limit; - return result; - } - } - memcpy(rows->data+off, key->data, key->size); - memcpy(rows->data+off+key->size, val->data, val->size); - rows->n_bytes = next_off; - return result; -} - -static int process_primary_rows (FTLOADER bl, struct rowset *primary_rowset); - -static int finish_primary_rows_internal (FTLOADER bl) -// now we have been asked to finish up. -// Be sure to destroy the rowsets. -{ - int *MALLOC_N(bl->N, ra); - if (ra==NULL) return get_error_errno(); - - for (int i = 0; i < bl->N; i++) { - //printf("%s:%d extractor finishing index %d with %ld rows\n", __FILE__, __LINE__, i, rows->n_rows); - ra[i] = sort_and_write_rows(bl->rows[i], &(bl->fs[i]), bl, i, bl->dbs[i], bl->bt_compare_funs[i]); - zero_rowset(&bl->rows[i]); - } - - // accept any of the error codes (in this case, the last one). - int r = 0; - for (int i = 0; i < bl->N; i++) - if (ra[i] != 0) - r = ra[i]; - - toku_free(ra); - return r; -} - -static int finish_primary_rows (FTLOADER bl) { - return finish_primary_rows_internal (bl); -} - -static void* extractor_thread (void *blv) { - FTLOADER bl = (FTLOADER)blv; - int r = 0; - while (1) { - void *item; - { - int rq = queue_deq(bl->primary_rowset_queue, &item, NULL, NULL); - if (rq==EOF) break; - invariant(rq==0); // other errors are arbitrarily bad. - } - struct rowset *primary_rowset = (struct rowset *)item; - - //printf("%s:%d extractor got %ld rows\n", __FILE__, __LINE__, primary_rowset.n_rows); - - // Now we have some rows to output - { - r = process_primary_rows(bl, primary_rowset); - if (r) - ft_loader_set_panic(bl, r, false, 0, nullptr, nullptr); - } - } - - //printf("%s:%d extractor finishing\n", __FILE__, __LINE__); - if (r == 0) { - r = finish_primary_rows(bl); - if (r) - ft_loader_set_panic(bl, r, false, 0, nullptr, nullptr); - - } - return NULL; -} - -static void enqueue_for_extraction (FTLOADER bl) { - //printf("%s:%d enqueing %ld items\n", __FILE__, __LINE__, bl->primary_rowset.n_rows); - struct rowset *XMALLOC(enqueue_me); - *enqueue_me = bl->primary_rowset; - zero_rowset(&bl->primary_rowset); - int r = queue_enq(bl->primary_rowset_queue, (void*)enqueue_me, 1, NULL); - resource_assert_zero(r); -} - -static int loader_do_put(FTLOADER bl, - DBT *pkey, - DBT *pval) -{ - int result; - result = add_row(&bl->primary_rowset, pkey, pval); - if (result == 0 && row_wont_fit(&bl->primary_rowset, 0)) { - // queue the rows for further processing by the extractor thread. - //printf("%s:%d please extract %ld\n", __FILE__, __LINE__, bl->primary_rowset.n_rows); - enqueue_for_extraction(bl); - { - int r = init_rowset(&bl->primary_rowset, memory_per_rowset_during_extract(bl)); - // bl->primary_rowset will get destroyed by toku_ft_loader_abort - if (r != 0) - result = r; - } - } - return result; -} - -static int -finish_extractor (FTLOADER bl) { - //printf("%s:%d now finishing extraction\n", __FILE__, __LINE__); - - int rval; - - if (bl->primary_rowset.n_rows>0) { - enqueue_for_extraction(bl); - } else { - destroy_rowset(&bl->primary_rowset); - } - //printf("%s:%d please finish extraction\n", __FILE__, __LINE__); - { - int r = queue_eof(bl->primary_rowset_queue); - invariant(r==0); - } - //printf("%s:%d joining\n", __FILE__, __LINE__); - { - void *toku_pthread_retval; - int r = toku_pthread_join(bl->extractor_thread, &toku_pthread_retval); - resource_assert_zero(r); - invariant(toku_pthread_retval == NULL); - bl->extractor_live = false; - } - { - int r = queue_destroy(bl->primary_rowset_queue); - invariant(r==0); - bl->primary_rowset_queue = nullptr; - } - - rval = ft_loader_fi_close_all(&bl->file_infos); - - //printf("%s:%d joined\n", __FILE__, __LINE__); - return rval; -} - -static const DBT zero_dbt = {0,0,0,0}; - -static DBT make_dbt (void *data, uint32_t size) { - DBT result = zero_dbt; - result.data = data; - result.size = size; - return result; -} - -#define inc_error_count() error_count++ - -static TXNID leafentry_xid(FTLOADER bl, int which_db) { - TXNID le_xid = TXNID_NONE; - if (bl->root_xids_that_created && bl->load_root_xid != bl->root_xids_that_created[which_db]) - le_xid = bl->load_root_xid; - return le_xid; -} - -size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid) { - size_t s = 0; - if (xid == TXNID_NONE) - s = LE_CLEAN_MEMSIZE(val_size) + key_size + sizeof(uint32_t); - else - s = LE_MVCC_COMMITTED_MEMSIZE(val_size) + key_size + sizeof(uint32_t); - return s; -} - -static int process_primary_rows_internal (FTLOADER bl, struct rowset *primary_rowset) -// process the rows in primary_rowset, and then destroy the rowset. -// if FLUSH is true then write all the buffered rows out. -// if primary_rowset is NULL then treat it as empty. -{ - int error_count = 0; - int *XMALLOC_N(bl->N, error_codes); - - // If we parallelize the first for loop, dest_keys/dest_vals init&cleanup need to move inside - DBT_ARRAY dest_keys; - DBT_ARRAY dest_vals; - toku_dbt_array_init(&dest_keys, 1); - toku_dbt_array_init(&dest_vals, 1); - - for (int i = 0; i < bl->N; i++) { - unsigned int klimit,vlimit; // maximum row sizes. - toku_ft_get_maximum_advised_key_value_lengths(&klimit, &vlimit); - - error_codes[i] = 0; - struct rowset *rows = &(bl->rows[i]); - struct merge_fileset *fs = &(bl->fs[i]); - ft_compare_func compare = bl->bt_compare_funs[i]; - - // Don't parallelize this loop, or we have to lock access to add_row() which would be a lot of overehad. - // Also this way we can reuse the DB_DBT_REALLOC'd values inside dest_keys/dest_vals without a race. - for (size_t prownum=0; prownumn_rows; prownum++) { - if (error_count) break; - - struct row *prow = &primary_rowset->rows[prownum]; - DBT pkey = zero_dbt; - DBT pval = zero_dbt; - pkey.data = primary_rowset->data + prow->off; - pkey.size = prow->klen; - pval.data = primary_rowset->data + prow->off + prow->klen; - pval.size = prow->vlen; - - - DBT_ARRAY key_array; - DBT_ARRAY val_array; - if (bl->dbs[i] != bl->src_db) { - int r = bl->generate_row_for_put(bl->dbs[i], bl->src_db, &dest_keys, &dest_vals, &pkey, &pval); - if (r != 0) { - error_codes[i] = r; - inc_error_count(); - break; - } - paranoid_invariant(dest_keys.size <= dest_keys.capacity); - paranoid_invariant(dest_vals.size <= dest_vals.capacity); - paranoid_invariant(dest_keys.size == dest_vals.size); - - key_array = dest_keys; - val_array = dest_vals; - } else { - key_array.size = key_array.capacity = 1; - key_array.dbts = &pkey; - - val_array.size = val_array.capacity = 1; - val_array.dbts = &pval; - } - for (uint32_t row = 0; row < key_array.size; row++) { - DBT *dest_key = &key_array.dbts[row]; - DBT *dest_val = &val_array.dbts[row]; - if (dest_key->size > klimit) { - error_codes[i] = EINVAL; - fprintf(stderr, "Key too big (keysize=%d bytes, limit=%d bytes)\n", dest_key->size, klimit); - inc_error_count(); - break; - } - if (dest_val->size > vlimit) { - error_codes[i] = EINVAL; - fprintf(stderr, "Row too big (rowsize=%d bytes, limit=%d bytes)\n", dest_val->size, vlimit); - inc_error_count(); - break; - } - - bl->extracted_datasizes[i] += ft_loader_leafentry_size(dest_key->size, dest_val->size, leafentry_xid(bl, i)); - - if (row_wont_fit(rows, dest_key->size + dest_val->size)) { - //printf("%s:%d rows.n_rows=%ld rows.n_bytes=%ld\n", __FILE__, __LINE__, rows->n_rows, rows->n_bytes); - int r = sort_and_write_rows(*rows, fs, bl, i, bl->dbs[i], compare); // cannot spawn this because of the race on rows. If we were to create a new rows, and if sort_and_write_rows were to destroy the rows it is passed, we could spawn it, however. - // If we do spawn this, then we must account for the additional storage in the memory_per_rowset() function. - init_rowset(rows, memory_per_rowset_during_extract(bl)); // we passed the contents of rows to sort_and_write_rows. - if (r != 0) { - error_codes[i] = r; - inc_error_count(); - break; - } - } - int r = add_row(rows, dest_key, dest_val); - if (r != 0) { - error_codes[i] = r; - inc_error_count(); - break; - } - } - } - } - toku_dbt_array_destroy(&dest_keys); - toku_dbt_array_destroy(&dest_vals); - - destroy_rowset(primary_rowset); - toku_free(primary_rowset); - int r = 0; - if (error_count > 0) { - for (int i=0; iN; i++) { - if (error_codes[i]) { - r = error_codes[i]; - ft_loader_set_panic(bl, r, false, i, nullptr, nullptr); - } - } - invariant(r); // found the error - } - toku_free(error_codes); - return r; -} - -static int process_primary_rows (FTLOADER bl, struct rowset *primary_rowset) { - int r = process_primary_rows_internal (bl, primary_rowset); - return r; -} - -int toku_ft_loader_put (FTLOADER bl, DBT *key, DBT *val) -/* Effect: Put a key-value pair into the ft loader. Called by DB_LOADER->put(). - * Return value: 0 on success, an error number otherwise. - */ -{ - if (!bl->allow_puts || ft_loader_get_error(&bl->error_callback)) - return EINVAL; // previous panic - bl->n_rows++; - return loader_do_put(bl, key, val); -} - -void toku_ft_loader_set_n_rows(FTLOADER bl, uint64_t n_rows) { - bl->n_rows = n_rows; -} - -uint64_t toku_ft_loader_get_n_rows(FTLOADER bl) { - return bl->n_rows; -} - -int merge_row_arrays_base (struct row dest[/*an+bn*/], struct row a[/*an*/], int an, struct row b[/*bn*/], int bn, - int which_db, DB *dest_db, ft_compare_func compare, - - FTLOADER bl, - struct rowset *rowset) -/* Effect: Given two arrays of rows, a and b, merge them using the comparison function, and write them into dest. - * This function is suitable for use in a mergesort. - * If a pair of duplicate keys is ever noticed, then call the error_callback function (if it exists), and return DB_KEYEXIST. - * Arguments: - * dest write the rows here - * a,b the rows being merged - * an,bn the lenth of a and b respectively. - * dest_db We need the dest_db to run the comparison function. - * compare We need the compare function for the dest_db. - */ -{ - while (an>0 && bn>0) { - DBT akey; memset(&akey, 0, sizeof akey); akey.data=rowset->data+a->off; akey.size=a->klen; - DBT bkey; memset(&bkey, 0, sizeof bkey); bkey.data=rowset->data+b->off; bkey.size=b->klen; - - int compare_result = compare(dest_db, &akey, &bkey); - if (compare_result==0) { - if (bl->error_callback.error_callback) { - DBT aval; memset(&aval, 0, sizeof aval); aval.data=rowset->data + a->off + a->klen; aval.size = a->vlen; - ft_loader_set_error(&bl->error_callback, DB_KEYEXIST, dest_db, which_db, &akey, &aval); - } - return DB_KEYEXIST; - } else if (compare_result<0) { - // a is smaller - *dest = *a; - dest++; a++; an--; - } else { - *dest = *b; - dest++; b++; bn--; - } - } - while (an>0) { - *dest = *a; - dest++; a++; an--; - } - while (bn>0) { - *dest = *b; - dest++; b++; bn--; - } - return 0; -} - -static int binary_search (int *location, - const DBT *key, - struct row a[/*an*/], int an, - int abefore, - int which_db, DB *dest_db, ft_compare_func compare, - FTLOADER bl, - struct rowset *rowset) -// Given a sorted array of rows a, and a dbt key, find the first row in a that is > key. -// If no such row exists, then consider the result to be equal to an. -// On success store abefore+the index into *location -// Return 0 on success. -// Return DB_KEYEXIST if we find a row that is equal to key. -{ - if (an==0) { - *location = abefore; - return 0; - } else { - int a2 = an/2; - DBT akey = make_dbt(rowset->data+a[a2].off, a[a2].klen); - int compare_result = compare(dest_db, key, &akey); - if (compare_result==0) { - if (bl->error_callback.error_callback) { - DBT aval = make_dbt(rowset->data + a[a2].off + a[a2].klen, a[a2].vlen); - ft_loader_set_error(&bl->error_callback, DB_KEYEXIST, dest_db, which_db, &akey, &aval); - } - return DB_KEYEXIST; - } else if (compare_result<0) { - // key is before a2 - if (an==1) { - *location = abefore; - return 0; - } else { - return binary_search(location, key, - a, a2, - abefore, - which_db, dest_db, compare, bl, rowset); - } - } else { - // key is after a2 - if (an==1) { - *location = abefore + 1; - return 0; - } else { - return binary_search(location, key, - a+a2, an-a2, - abefore+a2, - which_db, dest_db, compare, bl, rowset); - } - } - } -} - - -#define SWAP(typ,x,y) { typ tmp = x; x=y; y=tmp; } - -static int merge_row_arrays (struct row dest[/*an+bn*/], struct row a[/*an*/], int an, struct row b[/*bn*/], int bn, - int which_db, DB *dest_db, ft_compare_func compare, - FTLOADER bl, - struct rowset *rowset) -/* Effect: Given two sorted arrays of rows, a and b, merge them using the comparison function, and write them into dest. - * Arguments: - * dest write the rows here - * a,b the rows being merged - * an,bn the lenth of a and b respectively. - * dest_db We need the dest_db to run the comparison function. - * compare We need the compare function for the dest_db. - */ -{ - if (an + bn < 10000) { - return merge_row_arrays_base(dest, a, an, b, bn, which_db, dest_db, compare, bl, rowset); - } - if (an < bn) { - SWAP(struct row *,a, b) - SWAP(int ,an,bn) - } - // an >= bn - int a2 = an/2; - DBT akey = make_dbt(rowset->data+a[a2].off, a[a2].klen); - int b2 = 0; // initialize to zero so we can add the answer in. - { - int r = binary_search(&b2, &akey, b, bn, 0, which_db, dest_db, compare, bl, rowset); - if (r!=0) return r; // for example if we found a duplicate, called the error_callback, and now we return an error code. - } - int ra, rb; - ra = merge_row_arrays(dest, a, a2, b, b2, which_db, dest_db, compare, bl, rowset); - rb = merge_row_arrays(dest+a2+b2, a+a2, an-a2, b+b2, bn-b2, which_db, dest_db, compare, bl, rowset); - if (ra!=0) return ra; - else return rb; -} - -int mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func compare, FTLOADER bl, struct rowset *rowset) -/* Sort an array of rows (using mergesort). - * Arguments: - * rows sort this array of rows. - * n the length of the array. - * dest_db used by the comparison function. - * compare the compare function - */ -{ - if (n<=1) return 0; // base case is sorted - int mid = n/2; - int r1, r2; - r1 = mergesort_row_array (rows, mid, which_db, dest_db, compare, bl, rowset); - - // Don't spawn this one explicitly - r2 = mergesort_row_array (rows+mid, n-mid, which_db, dest_db, compare, bl, rowset); - - if (r1!=0) return r1; - if (r2!=0) return r2; - - struct row *MALLOC_N(n, tmp); - if (tmp == NULL) return get_error_errno(); - { - int r = merge_row_arrays(tmp, rows, mid, rows+mid, n-mid, which_db, dest_db, compare, bl, rowset); - if (r!=0) { - toku_free(tmp); - return r; - } - } - memcpy(rows, tmp, sizeof(*tmp)*n); - toku_free(tmp); - return 0; -} - -// C function for testing mergesort_row_array -int ft_loader_mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func compare, FTLOADER bl, struct rowset *rowset) { - return mergesort_row_array (rows, n, which_db, dest_db, compare, bl, rowset); -} - -static int sort_rows (struct rowset *rows, int which_db, DB *dest_db, ft_compare_func compare, - FTLOADER bl) -/* Effect: Sort a collection of rows. - * If any duplicates are found, then call the error_callback function and return non zero. - * Otherwise return 0. - * Arguments: - * rowset the */ -{ - return mergesort_row_array(rows->rows, rows->n_rows, which_db, dest_db, compare, bl, rows); -} - -/* filesets Maintain a collection of files. Typically these files are each individually sorted, and we will merge them. - * These files have two parts, one is for the data rows, and the other is a collection of offsets so we an more easily parallelize the manipulation (e.g., by allowing us to find the offset of the ith row quickly). */ - -void init_merge_fileset (struct merge_fileset *fs) -/* Effect: Initialize a fileset */ -{ - fs->have_sorted_output = false; - fs->sorted_output = FIDX_NULL; - fs->prev_key = zero_dbt; - fs->prev_key.flags = DB_DBT_REALLOC; - - fs->n_temp_files = 0; - fs->n_temp_files_limit = 0; - fs->data_fidxs = NULL; -} - -void destroy_merge_fileset (struct merge_fileset *fs) -/* Effect: Destroy a fileset. */ -{ - if ( fs ) { - toku_destroy_dbt(&fs->prev_key); - fs->n_temp_files = 0; - fs->n_temp_files_limit = 0; - toku_free(fs->data_fidxs); - fs->data_fidxs = NULL; - } -} - - -static int extend_fileset (FTLOADER bl, struct merge_fileset *fs, FIDX*ffile) -/* Effect: Add two files (one for data and one for idx) to the fileset. - * Arguments: - * bl the ft_loader (needed to panic if anything goes wrong, and also to get the temp_file_template. - * fs the fileset - * ffile the data file (which will be open) - * fidx the index file (which will be open) - */ -{ - FIDX sfile; - int r; - r = ft_loader_open_temp_file(bl, &sfile); if (r!=0) return r; - - if (fs->n_temp_files+1 > fs->n_temp_files_limit) { - fs->n_temp_files_limit = (fs->n_temp_files+1)*2; - XREALLOC_N(fs->n_temp_files_limit, fs->data_fidxs); - } - fs->data_fidxs[fs->n_temp_files] = sfile; - fs->n_temp_files++; - - *ffile = sfile; - return 0; -} - -// RFP maybe this should be buried in the ft_loader struct -static toku_mutex_t update_progress_lock = TOKU_MUTEX_INITIALIZER; - -static int update_progress (int N, - FTLOADER bl, - const char *UU(message)) -{ - // Must protect the increment and the call to the poll_function. - toku_mutex_lock(&update_progress_lock); - bl->progress+=N; - - int result; - if (bl->progress_callback_result == 0) { - //printf(" %20s: %d ", message, bl->progress); - result = ft_loader_call_poll_function(&bl->poll_callback, (float)bl->progress/(float)PROGRESS_MAX); - if (result!=0) { - bl->progress_callback_result = result; - } - } else { - result = bl->progress_callback_result; - } - toku_mutex_unlock(&update_progress_lock); - return result; -} - - -static int write_rowset_to_file (FTLOADER bl, FIDX sfile, const struct rowset rows) { - int r = 0; - // Allocate a buffer if we're compressing intermediates. - char *uncompressed_buffer = nullptr; - if (bl->compress_intermediates) { - MALLOC_N(MAX_UNCOMPRESSED_BUF, uncompressed_buffer); - if (uncompressed_buffer == nullptr) { - return ENOMEM; - } - } - struct wbuf wb; - wbuf_init(&wb, uncompressed_buffer, MAX_UNCOMPRESSED_BUF); - - FILE *sstream = toku_bl_fidx2file(bl, sfile); - for (size_t i=0; icompress_intermediates && wb.ndone > 0) { - r = bl_finish_compressed_write(sstream, &wb); - if (r != 0) { - goto exit; - } - } - r = 0; -exit: - if (uncompressed_buffer) { - toku_free(uncompressed_buffer); - } - return r; -} - - -int sort_and_write_rows (struct rowset rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func compare) -/* Effect: Given a rowset, sort it and write it to a temporary file. - * Note: The loader maintains for each index the most recently written-to file, as well as the DBT for the last key written into that file. - * If this rowset is sorted and all greater than that dbt, then we append to the file (skipping the sort, and reducing the number of temporary files). - * Arguments: - * rows the rowset - * fs the fileset into which the sorted data will be added - * bl the ft_loader - * dest_db the DB, needed for the comparison function. - * compare The comparison function. - * Returns 0 on success, otherwise an error number. - * Destroy the rowset after finishing it. - * Note: There is no sense in trying to calculate progress by this function since it's done concurrently with the loader->put operation. - * Note first time called: invariant: fs->have_sorted_output == false - */ -{ - //printf(" sort_and_write use %d progress=%d fin at %d\n", progress_allocation, bl->progress, bl->progress+progress_allocation); - - // TODO: erase the files, and deal with all the cleanup on error paths - //printf("%s:%d sort_rows n_rows=%ld\n", __FILE__, __LINE__, rows->n_rows); - //bl_time_t before_sort = bl_time_now(); - - int result; - if (rows.n_rows == 0) { - result = 0; - } else { - result = sort_rows(&rows, which_db, dest_db, compare, bl); - - //bl_time_t after_sort = bl_time_now(); - - if (result == 0) { - DBT min_rowset_key = make_dbt(rows.data+rows.rows[0].off, rows.rows[0].klen); - if (fs->have_sorted_output && compare(dest_db, &fs->prev_key, &min_rowset_key) < 0) { - // write everything to the same output if the max key in the temp file (prev_key) is < min of the sorted rowset - result = write_rowset_to_file(bl, fs->sorted_output, rows); - if (result == 0) { - // set the max key in the temp file to the max key in the sorted rowset - result = toku_dbt_set(rows.rows[rows.n_rows-1].klen, rows.data + rows.rows[rows.n_rows-1].off, &fs->prev_key, NULL); - } - } else { - // write the sorted rowset into a new temp file - if (fs->have_sorted_output) { - fs->have_sorted_output = false; - result = ft_loader_fi_close(&bl->file_infos, fs->sorted_output, true); - } - if (result == 0) { - FIDX sfile = FIDX_NULL; - result = extend_fileset(bl, fs, &sfile); - if (result == 0) { - result = write_rowset_to_file(bl, sfile, rows); - if (result == 0) { - fs->have_sorted_output = true; fs->sorted_output = sfile; - // set the max key in the temp file to the max key in the sorted rowset - result = toku_dbt_set(rows.rows[rows.n_rows-1].klen, rows.data + rows.rows[rows.n_rows-1].off, &fs->prev_key, NULL); - } - } - } - // Note: if result == 0 then invariant fs->have_sorted_output == true - } - } - } - - destroy_rowset(&rows); - - //bl_time_t after_write = bl_time_now(); - - return result; -} - -// C function for testing sort_and_write_rows -int ft_loader_sort_and_write_rows (struct rowset *rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func compare) { - return sort_and_write_rows (*rows, fs, bl, which_db, dest_db, compare); -} - -int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q, int n_sources, DBUFIO_FILESET bfs, FIDX srcs_fidxs[/*n_sources*/], FTLOADER bl, int which_db, DB *dest_db, ft_compare_func compare, int progress_allocation) -/* Effect: Given an array of FILE*'s each containing sorted, merge the data and write it to an output. All the files remain open after the merge. - * This merge is performed in one pass, so don't pass too many files in. If you need a tree of merges do it elsewhere. - * If TO_Q is true then we write rowsets into queue Q. Otherwise we write into dest_data. - * Modifies: May modify the arrays of files (but if modified, it must be a permutation so the caller can use that array to close everything.) - * Requires: The number of sources is at least one, and each of the input files must have at least one row in it. - * Arguments: - * to_q boolean indicating that output is queue (true) or a file (false) - * dest_data where to write the sorted data - * q where to write the sorted data - * n_sources how many source files. - * srcs_data the array of source data files. - * bl the ft_loader. - * dest_db the destination DB (used in the comparison function). - * Return value: 0 on success, otherwise an error number. - * The fidxs are not closed by this function. - */ -{ - int result = 0; - - FILE *dest_stream = to_q ? NULL : toku_bl_fidx2file(bl, dest_data); - - //printf(" merge_some_files progress=%d fin at %d\n", bl->progress, bl->progress+progress_allocation); - DBT keys[n_sources]; - DBT vals[n_sources]; - uint64_t dataoff[n_sources]; - DBT zero = zero_dbt; zero.flags=DB_DBT_REALLOC; - - for (int i=0; ierror_callback); - if (r!=0) result = r; - } - - uint64_t n_rows = 0; - if (result==0) { - // load pqueue with first value from each source - for (int i=0; ifile_infos.lock); - n_rows += bl->file_infos.file_infos[srcs_fidxs[i].idx].n_rows; - toku_mutex_unlock(&bl->file_infos.lock); - } - } - uint64_t n_rows_done = 0; - - struct rowset *output_rowset = NULL; - if (result==0 && to_q) { - XMALLOC(output_rowset); // freed in cleanup - int r = init_rowset(output_rowset, memory_per_rowset_during_merge(bl, n_sources, to_q)); - if (r!=0) result = r; - } - - // Allocate a buffer if we're compressing intermediates. - char *uncompressed_buffer = nullptr; - struct wbuf wb; - if (bl->compress_intermediates && !to_q) { - MALLOC_N(MAX_UNCOMPRESSED_BUF, uncompressed_buffer); - if (uncompressed_buffer == nullptr) { - result = ENOMEM; - } - } - wbuf_init(&wb, uncompressed_buffer, MAX_UNCOMPRESSED_BUF); - - //printf(" n_rows=%ld\n", n_rows); - while (result==0 && pqueue_size(pq)>0) { - int mini; - { - // get the minimum - pqueue_node_t *node; - int r = pqueue_pop(pq, &node); - if (r!=0) { - result = r; - invariant(0); - break; - } - mini = node->i; - } - if (to_q) { - if (row_wont_fit(output_rowset, keys[mini].size + vals[mini].size)) { - { - int r = queue_enq(q, (void*)output_rowset, 1, NULL); - if (r!=0) { - result = r; - break; - } - } - XMALLOC(output_rowset); // freed in cleanup - { - int r = init_rowset(output_rowset, memory_per_rowset_during_merge(bl, n_sources, to_q)); - if (r!=0) { - result = r; - break; - } - } - } - { - int r = add_row(output_rowset, &keys[mini], &vals[mini]); - if (r!=0) { - result = r; - break; - } - } - } else { - // write it to the dest file - int r = loader_write_row(&keys[mini], &vals[mini], dest_data, dest_stream, &dataoff[mini], &wb, bl); - if (r!=0) { - result = r; - break; - } - } - - { - // read next row from file that just sourced min value - int r = loader_read_row_from_dbufio(bfs, mini, &keys[mini], &vals[mini]); - if (r!=0) { - if (r==EOF) { - // on feof, queue size permanently smaller - toku_free(keys[mini].data); keys[mini].data = NULL; - toku_free(vals[mini].data); vals[mini].data = NULL; - } else { - fprintf(stderr, "%s:%d r=%d errno=%d bfs=%p mini=%d\n", __FILE__, __LINE__, r, get_maybe_error_errno(), bfs, mini); - dbufio_print(bfs); - result = r; - break; - } - } else { - // insert value into queue (re-populate queue) - pq_nodes[mini].key = &keys[mini]; - r = pqueue_insert(pq, &pq_nodes[mini]); - if (r!=0) { - // Note: This error path tested by loader-dup-test1.tdbrun (and by loader-dup-test4) - result = r; - // printf("%s:%d returning\n", __FILE__, __LINE__); - break; - } - } - } - - n_rows_done++; - const uint64_t rows_per_report = size_factor*1024; - if (n_rows_done%rows_per_report==0) { - // need to update the progress. - double fraction_of_remaining_we_just_did = (double)rows_per_report / (double)(n_rows - n_rows_done + rows_per_report); - invariant(0<= fraction_of_remaining_we_just_did && fraction_of_remaining_we_just_did<=1); - int progress_just_done = fraction_of_remaining_we_just_did * progress_allocation; - progress_allocation -= progress_just_done; - // ignore the result from update_progress here, we'll call update_progress again below, which will give us the nonzero result. - int r = update_progress(progress_just_done, bl, "in file merge"); - if (0) printf("%s:%d Progress=%d\n", __FILE__, __LINE__, r); - } - } - if (result == 0 && uncompressed_buffer != nullptr && wb.ndone > 0) { - result = bl_finish_compressed_write(dest_stream, &wb); - } - - if (result==0 && to_q) { - int r = queue_enq(q, (void*)output_rowset, 1, NULL); - if (r!=0) - result = r; - else - output_rowset = NULL; - } - - // cleanup - if (uncompressed_buffer) { - toku_free(uncompressed_buffer); - } - for (int i=0; icompress_intermediates); - if (r!=0) { result = r; } - } - - if (result==0) { - int r = toku_merge_some_files_using_dbufio (to_q, dest_data, q, n_sources, bfs, srcs_fidxs, bl, which_db, dest_db, compare, progress_allocation); - if (r!=0) { result = r; } - } - - if (bfs!=NULL) { - if (result != 0) - (void) panic_dbufio_fileset(bfs, result); - int r = destroy_dbufio_fileset(bfs); - if (r!=0 && result==0) result=r; - bfs = NULL; - } - if (fds!=NULL) { - toku_free(fds); - fds = NULL; - } - return result; -} - -static int int_min (int a, int b) -{ - if (a1) { - N = (N+B-1)/B; - result++; - } - return result; -} - -int merge_files (struct merge_fileset *fs, - FTLOADER bl, - // These are needed for the comparison function and error callback. - int which_db, DB *dest_db, ft_compare_func compare, - int progress_allocation, - // Write rowsets into this queue. - QUEUE output_q - ) -/* Effect: Given a fileset, merge all the files writing all the answers into a queue. - * All the files in fs, and any temporary files will be closed and unlinked (and the fileset will be empty) - * Return value: 0 on success, otherwise an error number. - * On error *fs will contain no open files. All the files (including any temporary files) will be closed and unlinked. - * (however the fs will still need to be deallocated.) - */ -{ - //printf(" merge_files %d files\n", fs->n_temp_files); - //printf(" merge_files use %d progress=%d fin at %d\n", progress_allocation, bl->progress, bl->progress+progress_allocation); - const int final_mergelimit = (size_factor == 1) ? 4 : merge_fanin(bl, true); // try for a merge to the leaf level - const int earlier_mergelimit = (size_factor == 1) ? 4 : merge_fanin(bl, false); // try for a merge at nonleaf. - int n_passes_left = (fs->n_temp_files<=final_mergelimit) - ? 1 - : 1+n_passes((fs->n_temp_files+final_mergelimit-1)/final_mergelimit, earlier_mergelimit); - // printf("%d files, %d on last pass, %d on earlier passes, %d passes\n", fs->n_temp_files, final_mergelimit, earlier_mergelimit, n_passes_left); - int result = 0; - while (fs->n_temp_files > 0) { - int progress_allocation_for_this_pass = progress_allocation/n_passes_left; - progress_allocation -= progress_allocation_for_this_pass; - //printf("%s:%d n_passes_left=%d progress_allocation_for_this_pass=%d\n", __FILE__, __LINE__, n_passes_left, progress_allocation_for_this_pass); - - invariant(fs->n_temp_files>0); - struct merge_fileset next_file_set; - bool to_queue = (bool)(fs->n_temp_files <= final_mergelimit); - init_merge_fileset(&next_file_set); - while (fs->n_temp_files>0) { - // grab some files and merge them. - int n_to_merge = int_min(to_queue?final_mergelimit:earlier_mergelimit, fs->n_temp_files); - - // We are about to do n_to_merge/n_temp_files of the remaining for this pass. - int progress_allocation_for_this_subpass = progress_allocation_for_this_pass * (double)n_to_merge / (double)fs->n_temp_files; - // printf("%s:%d progress_allocation_for_this_subpass=%d n_temp_files=%d b=%llu\n", __FILE__, __LINE__, progress_allocation_for_this_subpass, fs->n_temp_files, (long long unsigned) memory_per_rowset_during_merge(bl, n_to_merge, to_queue)); - progress_allocation_for_this_pass -= progress_allocation_for_this_subpass; - - //printf("%s:%d merging\n", __FILE__, __LINE__); - FIDX merged_data = FIDX_NULL; - - FIDX *XMALLOC_N(n_to_merge, data_fidxs); - for (int i=0; in_temp_files -1 -i; - FIDX fidx = fs->data_fidxs[idx]; - result = ft_loader_fi_reopen(&bl->file_infos, fidx, "r"); - if (result) break; - data_fidxs[i] = fidx; - } - if (result==0 && !to_queue) { - result = extend_fileset(bl, &next_file_set, &merged_data); - } - - if (result==0) { - result = merge_some_files(to_queue, merged_data, output_q, n_to_merge, data_fidxs, bl, which_db, dest_db, compare, progress_allocation_for_this_subpass); - // if result!=0, fall through - if (result==0) { - /*nothing*/;// this is gratuitous, but we need something to give code coverage tools to help us know that it's important to distinguish between result==0 and result!=0 - } - } - - //printf("%s:%d merged\n", __FILE__, __LINE__); - for (int i=0; ifile_infos, data_fidxs[i], true); - if (r!=0 && result==0) result = r; - } - { - int r = ft_loader_fi_unlink(&bl->file_infos, data_fidxs[i]); - if (r!=0 && result==0) result = r; - } - data_fidxs[i] = FIDX_NULL; - } - } - - fs->n_temp_files -= n_to_merge; - if (!to_queue && !fidx_is_null(merged_data)) { - int r = ft_loader_fi_close(&bl->file_infos, merged_data, true); - if (r!=0 && result==0) result = r; - } - toku_free(data_fidxs); - - if (result!=0) break; - } - - destroy_merge_fileset(fs); - *fs = next_file_set; - - // Update the progress - n_passes_left--; - - if (result==0) { invariant(progress_allocation_for_this_pass==0); } - - if (result!=0) break; - } - if (result) ft_loader_set_panic(bl, result, true, which_db, nullptr, nullptr); - - { - int r = queue_eof(output_q); - if (r!=0 && result==0) result = r; - } - // It's conceivable that the progress_allocation could be nonzero (for example if bl->N==0) - { - int r = update_progress(progress_allocation, bl, "did merge_files"); - if (r!=0 && result==0) result = r; - } - return result; -} - -struct subtree_info { - int64_t block; -}; - -struct subtrees_info { - int64_t next_free_block; - int64_t n_subtrees; // was n_blocks - int64_t n_subtrees_limit; - struct subtree_info *subtrees; -}; - -static void subtrees_info_init(struct subtrees_info *p) { - p->next_free_block = p->n_subtrees = p->n_subtrees_limit = 0; - p->subtrees = NULL; -} - -static void subtrees_info_destroy(struct subtrees_info *p) { - toku_free(p->subtrees); - p->subtrees = NULL; -} - -static void allocate_node (struct subtrees_info *sts, int64_t b) { - if (sts->n_subtrees >= sts->n_subtrees_limit) { - sts->n_subtrees_limit *= 2; - XREALLOC_N(sts->n_subtrees_limit, sts->subtrees); - } - sts->subtrees[sts->n_subtrees].block = b; - sts->n_subtrees++; -} - -// dbuf will always contained 512-byte aligned buffer, but the length might not be a multiple of 512 bytes. If that's what you want, then pad it. -struct dbuf { - unsigned char *buf; - int buflen; - int off; - int error; -}; - -struct leaf_buf { - BLOCKNUM blocknum; - TXNID xid; - uint64_t nkeys, ndata, dsize; - FTNODE node; - XIDS xids; - uint64_t off; -}; - -struct translation { - int64_t off, size; -}; - -struct dbout { - int fd; - toku_off_t current_off; - - int64_t n_translations; - int64_t n_translations_limit; - struct translation *translation; - toku_mutex_t mutex; - FT h; -}; - -static inline void dbout_init(struct dbout *out, FT h) { - out->fd = -1; - out->current_off = 0; - out->n_translations = out->n_translations_limit = 0; - out->translation = NULL; - toku_mutex_init(&out->mutex, NULL); - out->h = h; -} - -static inline void dbout_destroy(struct dbout *out) { - if (out->fd >= 0) { - toku_os_close(out->fd); - out->fd = -1; - } - toku_free(out->translation); - out->translation = NULL; - toku_mutex_destroy(&out->mutex); -} - -static inline void dbout_lock(struct dbout *out) { - toku_mutex_lock(&out->mutex); -} - -static inline void dbout_unlock(struct dbout *out) { - toku_mutex_unlock(&out->mutex); -} - -static void seek_align_locked(struct dbout *out) { - toku_off_t old_current_off = out->current_off; - int alignment = 4096; - out->current_off += alignment-1; - out->current_off &= ~(alignment-1); - toku_off_t r = lseek(out->fd, out->current_off, SEEK_SET); - invariant(r==out->current_off); - invariant(out->current_off >= old_current_off); - invariant(out->current_off < old_current_off+alignment); - invariant(out->current_off % alignment == 0); -} - -static void seek_align(struct dbout *out) { - dbout_lock(out); - seek_align_locked(out); - dbout_unlock(out); -} - -static void dbuf_init (struct dbuf *dbuf) { - dbuf->buf = 0; - dbuf->buflen = 0; - dbuf->off = 0; - dbuf->error = 0; -} - -static void dbuf_destroy (struct dbuf *dbuf) { - toku_free(dbuf->buf); dbuf->buf = NULL; -} - -static int allocate_block (struct dbout *out, int64_t *ret_block_number) -// Return the new block number -{ - int result = 0; - dbout_lock(out); - int64_t block_number = out->n_translations; - if (block_number >= out->n_translations_limit) { - int64_t old_n_translations_limit = out->n_translations_limit; - struct translation *old_translation = out->translation; - if (out->n_translations_limit==0) { - out->n_translations_limit = 1; - } else { - out->n_translations_limit *= 2; - } - REALLOC_N(out->n_translations_limit, out->translation); - if (out->translation == NULL) { - result = get_error_errno(); - invariant(result); - out->n_translations_limit = old_n_translations_limit; - out->translation = old_translation; - goto cleanup; - } - } - out->n_translations++; - *ret_block_number = block_number; -cleanup: - dbout_unlock(out); - return result; -} - -static void putbuf_bytes (struct dbuf *dbuf, const void *bytes, int nbytes) { - if (!dbuf->error && dbuf->off + nbytes > dbuf->buflen) { - unsigned char *oldbuf = dbuf->buf; - int oldbuflen = dbuf->buflen; - dbuf->buflen += dbuf->off + nbytes; - dbuf->buflen *= 2; - REALLOC_N_ALIGNED(512, dbuf->buflen, dbuf->buf); - if (dbuf->buf == NULL) { - dbuf->error = get_error_errno(); - dbuf->buf = oldbuf; - dbuf->buflen = oldbuflen; - } - } - if (!dbuf->error) { - memcpy(dbuf->buf + dbuf->off, bytes, nbytes); - dbuf->off += nbytes; - } -} - -static void putbuf_int32 (struct dbuf *dbuf, int v) { - putbuf_bytes(dbuf, &v, 4); -} - -static void putbuf_int64 (struct dbuf *dbuf, long long v) { - putbuf_int32(dbuf, v>>32); - putbuf_int32(dbuf, v&0xFFFFFFFF); -} - -static struct leaf_buf *start_leaf (struct dbout *out, const DESCRIPTOR UU(desc), int64_t lblocknum, TXNID xid, uint32_t UU(target_nodesize)) { - invariant(lblocknum < out->n_translations_limit); - - struct leaf_buf *XMALLOC(lbuf); - lbuf->blocknum.b = lblocknum; - lbuf->xid = xid; - lbuf->nkeys = lbuf->ndata = lbuf->dsize = 0; - lbuf->off = 0; - - lbuf->xids = xids_get_root_xids(); - if (xid != TXNID_NONE) { - XIDS new_xids = NULL; - int r = xids_create_child(lbuf->xids, &new_xids, xid); - assert(r == 0 && new_xids); - xids_destroy(&lbuf->xids); - lbuf->xids = new_xids; - } - - FTNODE XMALLOC(node); - toku_initialize_empty_ftnode(node, lbuf->blocknum, 0 /*height*/, 1 /*basement nodes*/, FT_LAYOUT_VERSION, 0); - BP_STATE(node, 0) = PT_AVAIL; - lbuf->node = node; - - return lbuf; -} - -static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progress_allocation, FTLOADER bl, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method); -static int write_nonleaves (FTLOADER bl, FIDX pivots_fidx, struct dbout *out, struct subtrees_info *sts, const DESCRIPTOR descriptor, uint32_t target_nodesize, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method); -static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen, int this_leafentry_size, STAT64INFO stats_to_update); -static int write_translation_table (struct dbout *out, long long *off_of_translation_p); -static int write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk); - -static void drain_writer_q(QUEUE q) { - void *item; - while (1) { - int r = queue_deq(q, &item, NULL, NULL); - if (r == EOF) - break; - invariant(r == 0); - struct rowset *rowset = (struct rowset *) item; - destroy_rowset(rowset); - toku_free(rowset); - } -} - -static void cleanup_maxkey(DBT *maxkey) { - if (maxkey->flags == DB_DBT_REALLOC) { - toku_free(maxkey->data); - maxkey->data = NULL; - maxkey->flags = 0; - } -} - -static void update_maxkey(DBT *maxkey, DBT *key) { - cleanup_maxkey(maxkey); - *maxkey = *key; -} - -static int copy_maxkey(DBT *maxkey) { - DBT newkey; - toku_init_dbt_flags(&newkey, DB_DBT_REALLOC); - int r = toku_dbt_set(maxkey->size, maxkey->data, &newkey, NULL); - if (r == 0) - update_maxkey(maxkey, &newkey); - return r; -} - -static int toku_loader_write_ft_from_q (FTLOADER bl, - const DESCRIPTOR descriptor, - int fd, // write to here - int progress_allocation, - QUEUE q, - uint64_t total_disksize_estimate, - int which_db, - uint32_t target_nodesize, - uint32_t target_basementnodesize, - enum toku_compression_method target_compression_method, - uint32_t target_fanout) -// Effect: Consume a sequence of rowsets work from a queue, creating a fractal tree. Closes fd. -{ - // set the number of fractal tree writer threads so that we can partition memory in the merger - ft_loader_set_fractal_workers_count(bl); - - int result = 0; - int r; - - // The pivots file will contain all the pivot strings (in the form ) - // The pivots_fname is the name of the pivots file. - // Note that the pivots file will have one extra pivot in it (the last key in the dictionary) which will not appear in the tree. - int64_t n_pivots=0; // number of pivots in pivots_file - FIDX pivots_file; // the file - - r = ft_loader_open_temp_file (bl, &pivots_file); - if (r) { - result = r; - drain_writer_q(q); - r = toku_os_close(fd); - assert_zero(r); - return result; - } - FILE *pivots_stream = toku_bl_fidx2file(bl, pivots_file); - - TXNID root_xid_that_created = TXNID_NONE; - if (bl->root_xids_that_created) - root_xid_that_created = bl->root_xids_that_created[which_db]; - - // TODO: (Zardosht/Yoni/Leif), do this code properly - struct ft ft; - toku_ft_init(&ft, (BLOCKNUM){0}, bl->load_lsn, root_xid_that_created, target_nodesize, target_basementnodesize, target_compression_method, target_fanout); - - struct dbout out; - ZERO_STRUCT(out); - dbout_init(&out, &ft); - out.fd = fd; - out.current_off = 8192; // leave 8K reserved at beginning - out.n_translations = 3; // 3 translations reserved at the beginning - out.n_translations_limit = 4; - MALLOC_N(out.n_translations_limit, out.translation); - if (out.translation == NULL) { - result = get_error_errno(); - dbout_destroy(&out); - drain_writer_q(q); - toku_free(ft.h); - return result; - } - - // The blocks_array will contain all the block numbers that correspond to the pivots. Generally there should be one more block than pivot. - struct subtrees_info sts; - subtrees_info_init(&sts); - sts.next_free_block = 3; - sts.n_subtrees = 0; - sts.n_subtrees_limit = 1; - MALLOC_N(sts.n_subtrees_limit, sts.subtrees); - if (sts.subtrees == NULL) { - result = get_error_errno(); - subtrees_info_destroy(&sts); - dbout_destroy(&out); - drain_writer_q(q); - toku_free(ft.h); - return result; - } - - out.translation[0].off = -2LL; out.translation[0].size = 0; // block 0 is NULL - invariant(1==RESERVED_BLOCKNUM_TRANSLATION); - invariant(2==RESERVED_BLOCKNUM_DESCRIPTOR); - out.translation[1].off = -1; // block 1 is the block translation, filled in later - out.translation[2].off = -1; // block 2 is the descriptor - seek_align(&out); - int64_t lblock = 0; // make gcc --happy - result = allocate_block(&out, &lblock); - invariant(result == 0); // can not fail since translations reserved above - - TXNID le_xid = leafentry_xid(bl, which_db); - struct leaf_buf *lbuf = start_leaf(&out, descriptor, lblock, le_xid, target_nodesize); - uint64_t n_rows_remaining = bl->n_rows; - uint64_t old_n_rows_remaining = bl->n_rows; - - uint64_t used_estimate = 0; // how much diskspace have we used up? - - DBT maxkey = make_dbt(0, 0); // keep track of the max key of the current node - - STAT64INFO_S deltas = ZEROSTATS; - while (result == 0) { - void *item; - { - int rr = queue_deq(q, &item, NULL, NULL); - if (rr == EOF) break; - if (rr != 0) { - ft_loader_set_panic(bl, rr, true, which_db, nullptr, nullptr); - break; - } - } - struct rowset *output_rowset = (struct rowset *)item; - - for (unsigned int i = 0; i < output_rowset->n_rows; i++) { - DBT key = make_dbt(output_rowset->data+output_rowset->rows[i].off, output_rowset->rows[i].klen); - DBT val = make_dbt(output_rowset->data+output_rowset->rows[i].off + output_rowset->rows[i].klen, output_rowset->rows[i].vlen); - - size_t this_leafentry_size = ft_loader_leafentry_size(key.size, val.size, le_xid); - - used_estimate += this_leafentry_size; - - // Spawn off a node if - // a) there is at least one row in it, and - // b) this item would make the nodesize too big, or - // c) the remaining amount won't fit in the current node and the current node's data is more than the remaining amount - uint64_t remaining_amount = total_disksize_estimate - used_estimate; - uint64_t used_here = lbuf->off + 1000; // leave 1000 for various overheads. - uint64_t target_size = (target_nodesize*7L)/8; // use only 7/8 of the node. - uint64_t used_here_with_next_key = used_here + this_leafentry_size; - if (lbuf->nkeys > 0 && - ((used_here_with_next_key >= target_size) || (used_here + remaining_amount >= target_size && lbuf->off > remaining_amount))) { - - int progress_this_node = progress_allocation * (double)(old_n_rows_remaining - n_rows_remaining)/(double)old_n_rows_remaining; - progress_allocation -= progress_this_node; - old_n_rows_remaining = n_rows_remaining; - - allocate_node(&sts, lblock); - - n_pivots++; - - invariant(maxkey.data != NULL); - if ((r = bl_write_dbt(&maxkey, pivots_stream, NULL, nullptr, bl))) { - ft_loader_set_panic(bl, r, true, which_db, nullptr, nullptr); - if (result == 0) result = r; - break; - } - - finish_leafnode(&out, lbuf, progress_this_node, bl, target_basementnodesize, target_compression_method); - lbuf = NULL; - - r = allocate_block(&out, &lblock); - if (r != 0) { - ft_loader_set_panic(bl, r, true, which_db, nullptr, nullptr); - if (result == 0) result = r; - break; - } - lbuf = start_leaf(&out, descriptor, lblock, le_xid, target_nodesize); - } - - add_pair_to_leafnode(lbuf, (unsigned char *) key.data, key.size, (unsigned char *) val.data, val.size, this_leafentry_size, &deltas); - n_rows_remaining--; - - update_maxkey(&maxkey, &key); // set the new maxkey to the current key - } - - r = copy_maxkey(&maxkey); // make a copy of maxkey before the rowset is destroyed - if (result == 0) - result = r; - destroy_rowset(output_rowset); - toku_free(output_rowset); - - if (result == 0) - result = ft_loader_get_error(&bl->error_callback); // check if an error was posted and terminate this quickly - } - - if (deltas.numrows || deltas.numbytes) { - toku_ft_update_stats(&ft.in_memory_stats, deltas); - } - - cleanup_maxkey(&maxkey); - - if (lbuf) { - allocate_node(&sts, lblock); - { - int p = progress_allocation/2; - finish_leafnode(&out, lbuf, p, bl, target_basementnodesize, target_compression_method); - progress_allocation -= p; - } - } - - - if (result == 0) { - result = ft_loader_get_error(&bl->error_callback); // if there were any prior errors then exit - } - - if (result != 0) goto error; - - // We haven't paniced, so the sum should add up. - invariant(used_estimate == total_disksize_estimate); - - n_pivots++; - - { - DBT key = make_dbt(0,0); // must write an extra DBT into the pivots file. - r = bl_write_dbt(&key, pivots_stream, NULL, nullptr, bl); - if (r) { - result = r; goto error; - } - } - - r = write_nonleaves(bl, pivots_file, &out, &sts, descriptor, target_nodesize, target_basementnodesize, target_compression_method); - if (r) { - result = r; goto error; - } - - { - invariant(sts.n_subtrees==1); - out.h->h->root_blocknum = make_blocknum(sts.subtrees[0].block); - toku_free(sts.subtrees); sts.subtrees = NULL; - - // write the descriptor - { - seek_align(&out); - invariant(out.n_translations >= RESERVED_BLOCKNUM_DESCRIPTOR); - invariant(out.translation[RESERVED_BLOCKNUM_DESCRIPTOR].off == -1); - out.translation[RESERVED_BLOCKNUM_DESCRIPTOR].off = out.current_off; - size_t desc_size = 4+toku_serialize_descriptor_size(descriptor); - invariant(desc_size>0); - out.translation[RESERVED_BLOCKNUM_DESCRIPTOR].size = desc_size; - struct wbuf wbuf; - char *XMALLOC_N(desc_size, buf); - wbuf_init(&wbuf, buf, desc_size); - toku_serialize_descriptor_contents_to_wbuf(&wbuf, descriptor); - uint32_t checksum = toku_x1764_finish(&wbuf.checksum); - wbuf_int(&wbuf, checksum); - invariant(wbuf.ndone==desc_size); - r = toku_os_write(out.fd, wbuf.buf, wbuf.ndone); - out.current_off += desc_size; - toku_free(buf); // wbuf_destroy - if (r) { - result = r; goto error; - } - } - - long long off_of_translation; - r = write_translation_table(&out, &off_of_translation); - if (r) { - result = r; goto error; - } - - r = write_header(&out, off_of_translation, (out.n_translations+1)*16+4); - if (r) { - result = r; goto error; - } - - r = update_progress(progress_allocation, bl, "wrote tdb file"); - if (r) { - result = r; goto error; - } - } - - r = fsync(out.fd); - if (r) { - result = get_error_errno(); goto error; - } - - // Do we need to pay attention to user_said_stop? Or should the guy at the other end of the queue pay attention and send in an EOF. - - error: - { - int rr = toku_os_close(fd); - if (rr) - result = get_error_errno(); - } - out.fd = -1; - - subtrees_info_destroy(&sts); - dbout_destroy(&out); - drain_writer_q(q); - toku_free(ft.h); - - return result; -} - -int toku_loader_write_ft_from_q_in_C (FTLOADER bl, - const DESCRIPTOR descriptor, - int fd, // write to here - int progress_allocation, - QUEUE q, - uint64_t total_disksize_estimate, - int which_db, - uint32_t target_nodesize, - uint32_t target_basementnodesize, - enum toku_compression_method target_compression_method, - uint32_t target_fanout) -// This is probably only for testing. -{ - target_nodesize = target_nodesize == 0 ? default_loader_nodesize : target_nodesize; - target_basementnodesize = target_basementnodesize == 0 ? default_loader_basementnodesize : target_basementnodesize; - return toku_loader_write_ft_from_q (bl, descriptor, fd, progress_allocation, q, total_disksize_estimate, which_db, target_nodesize, target_basementnodesize, target_compression_method, target_fanout); -} - - -static void* fractal_thread (void *ftav) { - struct fractal_thread_args *fta = (struct fractal_thread_args *)ftav; - int r = toku_loader_write_ft_from_q (fta->bl, fta->descriptor, fta->fd, fta->progress_allocation, fta->q, fta->total_disksize_estimate, fta->which_db, fta->target_nodesize, fta->target_basementnodesize, fta->target_compression_method, fta->target_fanout); - fta->errno_result = r; - return NULL; -} - -static int loader_do_i (FTLOADER bl, - int which_db, - DB *dest_db, - ft_compare_func compare, - const DESCRIPTOR descriptor, - const char *new_fname, - int progress_allocation // how much progress do I need to add into bl->progress by the end.. - ) -/* Effect: Handle the file creating for one particular DB in the bulk loader. */ -/* Requires: The data is fully extracted, so we can do merges out of files and write the ft file. */ -{ - //printf("doing i use %d progress=%d fin at %d\n", progress_allocation, bl->progress, bl->progress+progress_allocation); - struct merge_fileset *fs = &(bl->fs[which_db]); - struct rowset *rows = &(bl->rows[which_db]); - invariant(rows->data==NULL); // the rows should be all cleaned up already - - int r = queue_create(&bl->fractal_queues[which_db], FRACTAL_WRITER_QUEUE_DEPTH); - if (r) goto error; - - { - mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO; - int fd = toku_os_open(new_fname, O_RDWR| O_CREAT | O_BINARY, mode); // #2621 - if (fd < 0) { - r = get_error_errno(); goto error; - } - - uint32_t target_nodesize, target_basementnodesize, target_fanout; - enum toku_compression_method target_compression_method; - r = dest_db->get_pagesize(dest_db, &target_nodesize); - invariant_zero(r); - r = dest_db->get_readpagesize(dest_db, &target_basementnodesize); - invariant_zero(r); - r = dest_db->get_compression_method(dest_db, &target_compression_method); - invariant_zero(r); - r = dest_db->get_fanout(dest_db, &target_fanout); - invariant_zero(r); - - if (bl->allow_puts) { - // a better allocation would be to figure out roughly how many merge passes we'll need. - int allocation_for_merge = (2*progress_allocation)/3; - progress_allocation -= allocation_for_merge; - - // This structure must stay live until the join below. - struct fractal_thread_args fta = { - bl, - descriptor, - fd, - progress_allocation, - bl->fractal_queues[which_db], - bl->extracted_datasizes[which_db], - 0, - which_db, - target_nodesize, - target_basementnodesize, - target_compression_method, - target_fanout - }; - - r = toku_pthread_create(bl->fractal_threads+which_db, NULL, fractal_thread, (void*)&fta); - if (r) { - int r2 __attribute__((__unused__)) = queue_destroy(bl->fractal_queues[which_db]); - // ignore r2, since we already have an error - bl->fractal_queues[which_db] = nullptr; - goto error; - } - invariant(bl->fractal_threads_live[which_db]==false); - bl->fractal_threads_live[which_db] = true; - - r = merge_files(fs, bl, which_db, dest_db, compare, allocation_for_merge, bl->fractal_queues[which_db]); - - { - void *toku_pthread_retval; - int r2 = toku_pthread_join(bl->fractal_threads[which_db], &toku_pthread_retval); - invariant(fta.bl==bl); // this is a gratuitous assertion to make sure that the fta struct is still live here. A previous bug put that struct into a C block statement. - resource_assert_zero(r2); - invariant(toku_pthread_retval==NULL); - invariant(bl->fractal_threads_live[which_db]); - bl->fractal_threads_live[which_db] = false; - if (r == 0) r = fta.errno_result; - } - } else { - queue_eof(bl->fractal_queues[which_db]); - r = toku_loader_write_ft_from_q(bl, descriptor, fd, progress_allocation, - bl->fractal_queues[which_db], bl->extracted_datasizes[which_db], which_db, - target_nodesize, target_basementnodesize, target_compression_method, target_fanout); - } - } - - error: // this is the cleanup code. Even if r==0 (no error) we fall through to here. - if (bl->fractal_queues[which_db]) { - int r2 = queue_destroy(bl->fractal_queues[which_db]); - invariant(r2==0); - bl->fractal_queues[which_db] = nullptr; - } - - // if we get here we need to free up the merge_fileset and the rowset, as well as the keys - toku_free(rows->data); rows->data = NULL; - toku_free(rows->rows); rows->rows = NULL; - toku_free(fs->data_fidxs); fs->data_fidxs = NULL; - return r; -} - -static int toku_ft_loader_close_internal (FTLOADER bl) -/* Effect: Close the bulk loader. - * Return all the file descriptors in the array fds. */ -{ - int result = 0; - if (bl->N == 0) - result = update_progress(PROGRESS_MAX, bl, "done"); - else { - int remaining_progress = PROGRESS_MAX; - for (int i = 0; i < bl->N; i++) { - // Take the unallocated progress and divide it among the unfinished jobs. - // This calculation allocates all of the PROGRESS_MAX bits of progress to some job. - int allocate_here = remaining_progress/(bl->N - i); - remaining_progress -= allocate_here; - char *fname_in_cwd = toku_cachetable_get_fname_in_cwd(bl->cachetable, bl->new_fnames_in_env[i]); - result = loader_do_i(bl, i, bl->dbs[i], bl->bt_compare_funs[i], bl->descriptors[i], fname_in_cwd, allocate_here); - toku_free(fname_in_cwd); - if (result != 0) - goto error; - invariant(0 <= bl->progress && bl->progress <= PROGRESS_MAX); - } - if (result==0) invariant(remaining_progress==0); - - // fsync the directory containing the new tokudb files. - char *fname0 = toku_cachetable_get_fname_in_cwd(bl->cachetable, bl->new_fnames_in_env[0]); - int r = toku_fsync_directory(fname0); - toku_free(fname0); - if (r != 0) { - result = r; goto error; - } - } - invariant(bl->file_infos.n_files_open == 0); - invariant(bl->file_infos.n_files_extant == 0); - invariant(bl->progress == PROGRESS_MAX); - error: - toku_ft_loader_internal_destroy(bl, (bool)(result!=0)); - return result; -} - -int toku_ft_loader_close (FTLOADER bl, - ft_loader_error_func error_function, void *error_extra, - ft_loader_poll_func poll_function, void *poll_extra - ) -{ - int result = 0; - - int r; - - //printf("Closing\n"); - - ft_loader_set_error_function(&bl->error_callback, error_function, error_extra); - - ft_loader_set_poll_function(&bl->poll_callback, poll_function, poll_extra); - - if (bl->extractor_live) { - r = finish_extractor(bl); - if (r) - result = r; - invariant(!bl->extractor_live); - } else { - r = finish_primary_rows(bl); - if (r) - result = r; - } - - // check for an error during extraction - if (result == 0) { - r = ft_loader_call_error_function(&bl->error_callback); - if (r) - result = r; - } - - if (result == 0) { - r = toku_ft_loader_close_internal(bl); - if (r && result == 0) - result = r; - } else - toku_ft_loader_internal_destroy(bl, true); - - return result; -} - -int toku_ft_loader_finish_extractor(FTLOADER bl) { - int result = 0; - if (bl->extractor_live) { - int r = finish_extractor(bl); - if (r) - result = r; - invariant(!bl->extractor_live); - } else - result = EINVAL; - return result; -} - -int toku_ft_loader_abort(FTLOADER bl, bool is_error) -/* Effect : Abort the bulk loader, free ft_loader resources */ -{ - int result = 0; - - // cleanup the extractor thread - if (bl->extractor_live) { - int r = finish_extractor(bl); - if (r) - result = r; - invariant(!bl->extractor_live); - } - - for (int i = 0; i < bl->N; i++) - invariant(!bl->fractal_threads_live[i]); - - toku_ft_loader_internal_destroy(bl, is_error); - return result; -} - -int toku_ft_loader_get_error(FTLOADER bl, int *error) { - *error = ft_loader_get_error(&bl->error_callback); - return 0; -} - -static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen, int this_leafentry_size, STAT64INFO stats_to_update) { - lbuf->nkeys++; - lbuf->ndata++; - lbuf->dsize += keylen + vallen; - lbuf->off += this_leafentry_size; - - // append this key val pair to the leafnode - // #3588 TODO just make a clean ule and append it to the omt - // #3588 TODO can do the rebalancing here and avoid a lot of work later - FTNODE leafnode = lbuf->node; - uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); - DBT thekey = { .data = key, .size = (uint32_t) keylen }; - DBT theval = { .data = val, .size = (uint32_t) vallen }; - FT_MSG_S msg = { .type = FT_INSERT, - .msn = ZERO_MSN, - .xids = lbuf->xids, - .u = { .id = { &thekey, &theval } } }; - uint64_t workdone=0; - // there's no mvcc garbage in a bulk-loaded FT, so there's no need to pass useful gc info - txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true); - toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, NULL, &gc_info, &workdone, stats_to_update); -} - -static int write_literal(struct dbout *out, void*data, size_t len) { - invariant(out->current_off%4096==0); - int result = toku_os_write(out->fd, data, len); - if (result == 0) - out->current_off+=len; - return result; -} - -static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progress_allocation, FTLOADER bl, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method) { - int result = 0; - - // serialize leaf to buffer - size_t serialized_leaf_size = 0; - size_t uncompressed_serialized_leaf_size = 0; - char *serialized_leaf = NULL; - FTNODE_DISK_DATA ndd = NULL; - result = toku_serialize_ftnode_to_memory(lbuf->node, &ndd, target_basementnodesize, target_compression_method, true, true, &serialized_leaf_size, &uncompressed_serialized_leaf_size, &serialized_leaf); - - // write it out - if (result == 0) { - dbout_lock(out); - long long off_of_leaf = out->current_off; - result = write_literal(out, serialized_leaf, serialized_leaf_size); - if (result == 0) { - out->translation[lbuf->blocknum.b].off = off_of_leaf; - out->translation[lbuf->blocknum.b].size = serialized_leaf_size; - seek_align_locked(out); - } - dbout_unlock(out); - } - - // free the node - if (serialized_leaf) { - toku_free(ndd); - toku_free(serialized_leaf); - } - toku_ftnode_free(&lbuf->node); - xids_destroy(&lbuf->xids); - toku_free(lbuf); - - //printf("Nodewrite %d (%.1f%%):", progress_allocation, 100.0*progress_allocation/PROGRESS_MAX); - if (result == 0) - result = update_progress(progress_allocation, bl, "wrote node"); - - if (result) - ft_loader_set_panic(bl, result, true, 0, nullptr, nullptr); -} - -static int write_translation_table (struct dbout *out, long long *off_of_translation_p) { - seek_align(out); - struct dbuf ttable; - dbuf_init(&ttable); - long long off_of_translation = out->current_off; - long long bt_size_on_disk = out->n_translations * 16 + 20; - putbuf_int64(&ttable, out->n_translations); // number of records - putbuf_int64(&ttable, -1LL); // the linked list - out->translation[1].off = off_of_translation; - out->translation[1].size = bt_size_on_disk; - for (int i=0; in_translations; i++) { - putbuf_int64(&ttable, out->translation[i].off); - putbuf_int64(&ttable, out->translation[i].size); - } - unsigned int checksum = toku_x1764_memory(ttable.buf, ttable.off); - putbuf_int32(&ttable, checksum); - // pad it to 512 zeros - long long encoded_length = ttable.off; - { - int nbytes_to_add = roundup_to_multiple(512, ttable.off) - encoded_length; - char zeros[nbytes_to_add]; - for (int i=0; ifd, ttable.buf, ttable.off, off_of_translation); - } - dbuf_destroy(&ttable); - *off_of_translation_p = off_of_translation; - return result; -} - -static int -write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk) { - int result = 0; - size_t size = toku_serialize_ft_size(out->h->h); - size_t alloced_size = roundup_to_multiple(512, size); - struct wbuf wbuf; - char *MALLOC_N_ALIGNED(512, alloced_size, buf); - if (buf == NULL) { - result = get_error_errno(); - } else { - wbuf_init(&wbuf, buf, size); - out->h->h->on_disk_stats = out->h->in_memory_stats; - toku_serialize_ft_to_wbuf(&wbuf, out->h->h, translation_location_on_disk, translation_size_on_disk); - for (size_t i=size; ifd, wbuf.buf, alloced_size, 0); - } - toku_free(buf); - } - return result; -} - -static int read_some_pivots (FIDX pivots_file, int n_to_read, FTLOADER bl, - /*out*/ DBT pivots[/*n_to_read*/]) -// pivots is an array to be filled in. The pivots array is uninitialized. -{ - for (int i = 0; i < n_to_read; i++) - pivots[i] = zero_dbt; - - FILE *pivots_stream = toku_bl_fidx2file(bl, pivots_file); - - int result = 0; - for (int i = 0; i < n_to_read; i++) { - int r = bl_read_dbt(&pivots[i], pivots_stream); - if (r != 0) { - result = r; - break; - } - } - return result; -} - -static void delete_pivots(DBT pivots[], int n) { - for (int i = 0; i < n; i++) - toku_free(pivots[i].data); - toku_free(pivots); -} - -static int setup_nonleaf_block (int n_children, - struct subtrees_info *subtrees, FIDX pivots_file, int64_t first_child_offset_in_subtrees, - struct subtrees_info *next_subtrees, FIDX next_pivots_file, - struct dbout *out, FTLOADER bl, - /*out*/int64_t *blocknum, - /*out*/struct subtree_info **subtrees_info_p, - /*out*/DBT **pivots_p) -// Do the serial part of setting up a non leaf block. -// Read the pivots out of the file, and store them in a newly allocated array of DBTs (returned in *pivots_p) There are (n_blocks_to_use-1) of these. -// Copy the final pivot into the next_pivots file instead of returning it. -// Copy the subtree_info from the subtrees structure, and store them in a newly allocated array of subtree_infos (return in *subtrees_info_p). There are n_blocks_to_use of these. -// Allocate a block number and return it in *blocknum. -// Store the blocknum in the next_blocks structure, so it can be combined with the pivots at the next level of the tree. -// Update n_blocks_used and n_translations. -// This code cannot be called in parallel because of all the race conditions. -// The actual creation of the node can be called in parallel after this work is done. -{ - //printf("Nonleaf has children :"); for(int i=0; isubtrees[i].block); printf("\n"); - - int result = 0; - - DBT *MALLOC_N(n_children, pivots); - if (pivots == NULL) { - result = get_error_errno(); - } - - if (result == 0) { - int r = read_some_pivots(pivots_file, n_children, bl, pivots); - if (r) - result = r; - } - - if (result == 0) { - FILE *next_pivots_stream = toku_bl_fidx2file(bl, next_pivots_file); - int r = bl_write_dbt(&pivots[n_children-1], next_pivots_stream, NULL, nullptr, bl); - if (r) - result = r; - } - - if (result == 0) { - // The last pivot was written to the next_pivots file, so we free it now instead of returning it. - toku_free(pivots[n_children-1].data); - pivots[n_children-1] = zero_dbt; - - struct subtree_info *XMALLOC_N(n_children, subtrees_array); - for (int i = 0; i < n_children; i++) { - int64_t from_blocknum = first_child_offset_in_subtrees + i; - subtrees_array[i] = subtrees->subtrees[from_blocknum]; - } - - int r = allocate_block(out, blocknum); - if (r) { - toku_free(subtrees_array); - result = r; - } else { - allocate_node(next_subtrees, *blocknum); - - *pivots_p = pivots; - *subtrees_info_p = subtrees_array; - } - } - - if (result != 0) { - if (pivots) { - delete_pivots(pivots, n_children); pivots = NULL; - } - } - - return result; -} - -static void write_nonleaf_node (FTLOADER bl, struct dbout *out, int64_t blocknum_of_new_node, int n_children, - DBT *pivots, /* must free this array, as well as the things it points t */ - struct subtree_info *subtree_info, int height, const DESCRIPTOR UU(desc), uint32_t UU(target_nodesize), uint32_t target_basementnodesize, enum toku_compression_method target_compression_method) -{ - //Nodes do not currently touch descriptors - invariant(height > 0); - - int result = 0; - - FTNODE XMALLOC(node); - toku_initialize_empty_ftnode(node, make_blocknum(blocknum_of_new_node), height, n_children, - FT_LAYOUT_VERSION, 0); - node->totalchildkeylens = 0; - for (int i=0; ichildkeys[i], pivots[i]); - node->totalchildkeylens += pivots[i].size; - } - assert(node->bp); - for (int i=0; itranslation[blocknum_of_new_node].off = out->current_off; - out->translation[blocknum_of_new_node].size = n_bytes; - //fprintf(stderr, "Wrote internal node at %ld (%ld bytes)\n", out->current_off, n_bytes); - //for (uint32_t i=0; i=' ' && b<128) ? b : '*'); } - r = write_literal(out, bytes, n_bytes); - if (r) - result = r; - else - seek_align_locked(out); - dbout_unlock(out); - toku_free(bytes); - } - } - - for (int i=0; ichildkeys[i].data); - } - for (int i=0; ibp); - toku_free(node->childkeys); - toku_free(node); - toku_free(ndd); - toku_free(subtree_info); - - if (result != 0) - ft_loader_set_panic(bl, result, true, 0, nullptr, nullptr); -} - -static int write_nonleaves (FTLOADER bl, FIDX pivots_fidx, struct dbout *out, struct subtrees_info *sts, const DESCRIPTOR descriptor, uint32_t target_nodesize, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method) { - int result = 0; - int height = 1; - - // Watch out for the case where we saved the last pivot but didn't write any more nodes out. - // The trick is not to look at n_pivots, but to look at blocks.n_blocks - while (sts->n_subtrees > 1) { - // If there is more than one block in blocks, then we must build another level of the tree. - - // we need to create a pivots file for the pivots of the next level. - // and a blocks_array - // So for example. - // 1) we grab 16 pivots and 16 blocks. - // 2) We put the 15 pivots and 16 blocks into an non-leaf node. - // 3) We put the 16th pivot into the next pivots file. - { - int r = fseek(toku_bl_fidx2file(bl, pivots_fidx), 0, SEEK_SET); - if (r!=0) { return get_error_errno(); } - } - - FIDX next_pivots_file; - { - int r = ft_loader_open_temp_file (bl, &next_pivots_file); - if (r != 0) { result = r; break; } - } - - struct subtrees_info next_sts; - subtrees_info_init(&next_sts); - next_sts.n_subtrees = 0; - next_sts.n_subtrees_limit = 1; - XMALLOC_N(next_sts.n_subtrees_limit, next_sts.subtrees); - - const int n_per_block = 15; - int64_t n_subtrees_used = 0; - while (sts->n_subtrees - n_subtrees_used >= n_per_block*2) { - // grab the first N_PER_BLOCK and build a node. - DBT *pivots; - int64_t blocknum_of_new_node; - struct subtree_info *subtree_info; - int r = setup_nonleaf_block (n_per_block, - sts, pivots_fidx, n_subtrees_used, - &next_sts, next_pivots_file, - out, bl, - &blocknum_of_new_node, &subtree_info, &pivots); - if (r) { - result = r; - break; - } else { - write_nonleaf_node(bl, out, blocknum_of_new_node, n_per_block, pivots, subtree_info, height, descriptor, target_nodesize, target_basementnodesize, target_compression_method); // frees all the data structures that go into making the node. - n_subtrees_used += n_per_block; - } - } - - int64_t n_blocks_left = sts->n_subtrees - n_subtrees_used; - if (result == 0) { - // Now we have a one or two blocks at the end to handle. - invariant(n_blocks_left>=2); - if (n_blocks_left > n_per_block) { - // Write half the remaining blocks - int64_t n_first = n_blocks_left/2; - DBT *pivots; - int64_t blocknum_of_new_node; - struct subtree_info *subtree_info; - int r = setup_nonleaf_block(n_first, - sts, pivots_fidx, n_subtrees_used, - &next_sts, next_pivots_file, - out, bl, - &blocknum_of_new_node, &subtree_info, &pivots); - if (r) { - result = r; - } else { - write_nonleaf_node(bl, out, blocknum_of_new_node, n_first, pivots, subtree_info, height, descriptor, target_nodesize, target_basementnodesize, target_compression_method); - n_blocks_left -= n_first; - n_subtrees_used += n_first; - } - } - } - if (result == 0) { - // Write the last block. - DBT *pivots; - int64_t blocknum_of_new_node; - struct subtree_info *subtree_info; - int r = setup_nonleaf_block(n_blocks_left, - sts, pivots_fidx, n_subtrees_used, - &next_sts, next_pivots_file, - out, bl, - &blocknum_of_new_node, &subtree_info, &pivots); - if (r) { - result = r; - } else { - write_nonleaf_node(bl, out, blocknum_of_new_node, n_blocks_left, pivots, subtree_info, height, descriptor, target_nodesize, target_basementnodesize, target_compression_method); - n_subtrees_used += n_blocks_left; - } - } - if (result == 0) - invariant(n_subtrees_used == sts->n_subtrees); - - - if (result == 0) // pick up write_nonleaf_node errors - result = ft_loader_get_error(&bl->error_callback); - - // Now set things up for the next iteration. - int r = ft_loader_fi_close(&bl->file_infos, pivots_fidx, true); if (r != 0 && result == 0) result = r; - r = ft_loader_fi_unlink(&bl->file_infos, pivots_fidx); if (r != 0 && result == 0) result = r; - pivots_fidx = next_pivots_file; - toku_free(sts->subtrees); sts->subtrees = NULL; - *sts = next_sts; - height++; - - if (result) - break; - } - { int r = ft_loader_fi_close (&bl->file_infos, pivots_fidx, true); if (r != 0 && result == 0) result = r; } - { int r = ft_loader_fi_unlink(&bl->file_infos, pivots_fidx); if (r != 0 && result == 0) result = r; } - return result; -} - -void ft_loader_set_fractal_workers_count_from_c(FTLOADER bl) { - ft_loader_set_fractal_workers_count (bl); -} - - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ftloader.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ftloader.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ftloader.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ftloader.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,135 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FTLOADER_H -#define FTLOADER_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -// The loader callbacks are C functions and need to be defined as such - -typedef void (*ft_loader_error_func)(DB *, int which_db, int err, DBT *key, DBT *val, void *extra); - -typedef int (*ft_loader_poll_func)(void *extra, float progress); - -typedef struct ft_loader_s *FTLOADER; - -int toku_ft_loader_open (FTLOADER *bl, - CACHETABLE cachetable, - generate_row_for_put_func g, - DB *src_db, - int N, - FT_HANDLE ft_hs[/*N*/], DB* dbs[/*N*/], - const char * new_fnames_in_env[/*N*/], - ft_compare_func bt_compare_functions[/*N*/], - const char *temp_file_template, - LSN load_lsn, - TOKUTXN txn, - bool reserve_memory, - uint64_t reserve_memory_size, - bool compress_intermediates, - bool allow_puts); - -int toku_ft_loader_put (FTLOADER bl, DBT *key, DBT *val); - -int toku_ft_loader_close (FTLOADER bl, - ft_loader_error_func error_callback, void *error_callback_extra, - ft_loader_poll_func poll_callback, void *poll_callback_extra); - -int toku_ft_loader_abort(FTLOADER bl, - bool is_error); - -// For test purposes only -void toku_ft_loader_set_size_factor (uint32_t factor); - -void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)); - -size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid); - -#endif // FTLOADER_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ftloader-internal.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ftloader-internal.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ftloader-internal.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ftloader-internal.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,366 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FTLOADER_INTERNAL_H -#define FTLOADER_INTERNAL_H -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." - -#include -#include "fttypes.h" -#include "ftloader.h" -#include "queue.h" -#include -#include "dbufio.h" - -enum { EXTRACTOR_QUEUE_DEPTH = 2, - FILE_BUFFER_SIZE = 1<<24, - MIN_ROWSET_MEMORY = 1<<23, - MIN_MERGE_FANIN = 2, - FRACTAL_WRITER_QUEUE_DEPTH = 3, - FRACTAL_WRITER_ROWSETS = FRACTAL_WRITER_QUEUE_DEPTH + 2, - DBUFIO_DEPTH = 2, - TARGET_MERGE_BUF_SIZE = 1<<24, // we'd like the merge buffer to be this big. - MIN_MERGE_BUF_SIZE = 1<<20, // always use at least this much - MAX_UNCOMPRESSED_BUF = MIN_MERGE_BUF_SIZE -}; - - -/* These functions are exported to allow the tests to compile. */ - -/* These structures maintain a collection of all the open temporary files used by the loader. */ -struct file_info { - bool is_open; - bool is_extant; // if true, the file must be unlinked. - char *fname; - FILE *file; - uint64_t n_rows; // how many rows were written into that file - size_t buffer_size; - void *buffer; -}; -struct file_infos { - int n_files; - int n_files_limit; - struct file_info *file_infos; - int n_files_open, n_files_extant; - toku_mutex_t lock; // must protect this data structure because current activity performs a REALLOC(fi->file_infos). -}; -typedef struct fidx { int idx; } FIDX; -static const FIDX FIDX_NULL __attribute__((__unused__)) = {-1}; -static int fidx_is_null (const FIDX f) __attribute__((__unused__)); -static int fidx_is_null (const FIDX f) { return f.idx==-1; } -FILE *toku_bl_fidx2file (FTLOADER bl, FIDX i); - -int ft_loader_open_temp_file (FTLOADER bl, FIDX*file_idx); - -/* These data structures are used for manipulating a collection of rows in main memory. */ -struct row { - size_t off; // the offset in the data array. - int klen,vlen; -}; -struct rowset { - uint64_t memory_budget; - size_t n_rows, n_rows_limit; - struct row *rows; - size_t n_bytes, n_bytes_limit; - char *data; -}; - -int init_rowset (struct rowset *rows, uint64_t memory_budget); -void destroy_rowset (struct rowset *rows); -int add_row (struct rowset *rows, DBT *key, DBT *val); - -int loader_write_row(DBT *key, DBT *val, FIDX data, FILE*, uint64_t *dataoff, struct wbuf *wb, FTLOADER bl); -int loader_read_row (FILE *f, DBT *key, DBT *val); - -struct merge_fileset { - bool have_sorted_output; // Is there an previous key? - FIDX sorted_output; // this points to one of the data_fidxs. If output_is_sorted then this is the file containing sorted data. It's still open - DBT prev_key; // What is it? If it's here, its the last output in the merge fileset - - int n_temp_files, n_temp_files_limit; - FIDX *data_fidxs; -}; - -void init_merge_fileset (struct merge_fileset *fs); -void destroy_merge_fileset (struct merge_fileset *fs); - -struct poll_callback_s { - ft_loader_poll_func poll_function; - void *poll_extra; -}; -typedef struct poll_callback_s *ft_loader_poll_callback; - -int ft_loader_init_poll_callback(ft_loader_poll_callback); - -void ft_loader_destroy_poll_callback(ft_loader_poll_callback); - -void ft_loader_set_poll_function(ft_loader_poll_callback, ft_loader_poll_func poll_function, void *poll_extra); - -int ft_loader_call_poll_function(ft_loader_poll_callback, float progress); - -struct error_callback_s { - int error; - ft_loader_error_func error_callback; - void *extra; - DB *db; - int which_db; - DBT key; - DBT val; - bool did_callback; - toku_mutex_t mutex; -}; -typedef struct error_callback_s *ft_loader_error_callback; - -void ft_loader_init_error_callback(ft_loader_error_callback); - -void ft_loader_destroy_error_callback(ft_loader_error_callback); - -int ft_loader_get_error(ft_loader_error_callback); - -void ft_loader_set_error_function(ft_loader_error_callback, ft_loader_error_func error_function, void *extra); - -int ft_loader_set_error(ft_loader_error_callback, int error, DB *db, int which_db, DBT *key, DBT *val); - -int ft_loader_call_error_function(ft_loader_error_callback); - -int ft_loader_set_error_and_callback(ft_loader_error_callback, int error, DB *db, int which_db, DBT *key, DBT *val); - -struct ft_loader_s { - // These two are set in the close function, and used while running close - struct error_callback_s error_callback; - struct poll_callback_s poll_callback; - - generate_row_for_put_func generate_row_for_put; - ft_compare_func *bt_compare_funs; - - DB *src_db; - int N; - DB **dbs; // N of these - DESCRIPTOR *descriptors; // N of these. - TXNID *root_xids_that_created; // N of these. - const char **new_fnames_in_env; // N of these. The file names that the final data will be written to (relative to env). - - uint64_t *extracted_datasizes; // N of these. - - struct rowset primary_rowset; // the primary rows that have been put, but the secondary rows haven't been generated. - struct rowset primary_rowset_temp; // the primary rows that are being worked on by the extractor_thread. - - QUEUE primary_rowset_queue; // main thread enqueues rowsets in this queue (in maybe 64MB chunks). The extractor thread removes them, sorts them, adn writes to file. - toku_pthread_t extractor_thread; // the thread that takes primary rowset and does extraction and the first level sort and write to file. - bool extractor_live; - - DBT *last_key; // for each rowset, remember the most recently output key. The system may choose not to keep this up-to-date when a rowset is unsorted. These keys are malloced and ulen maintains the size of the malloced block. - - struct rowset *rows; // secondary rows that have been put, but haven't been sorted and written to a file. - uint64_t n_rows; // how many rows have been put? - struct merge_fileset *fs; - - const char *temp_file_template; - - CACHETABLE cachetable; - bool did_reserve_memory; - bool compress_intermediates; - bool allow_puts; - uint64_t reserved_memory; // how much memory are we allowed to use? - - /* To make it easier to recover from errors, we don't use FILE*, instead we use an index into the file_infos. */ - struct file_infos file_infos; - -#define PROGRESS_MAX (1<<16) - int progress; // Progress runs from 0 to PROGRESS_MAX. When we call the poll function we convert to a float from 0.0 to 1.0 - // We use an integer so that we can add to the progress using a fetch-and-add instruction. - - int progress_callback_result; // initially zero, if any call to the poll function callback returns nonzero, we save the result here (and don't call the poll callback function again). - - LSN load_lsn; //LSN of the fsynced 'load' log entry. Write this LSN (as checkpoint_lsn) in ft headers made by this loader. - TXNID load_root_xid; //(Root) transaction that performed the load. - - QUEUE *fractal_queues; // an array of work queues, one for each secondary index. - toku_pthread_t *fractal_threads; - bool *fractal_threads_live; // an array of bools indicating that fractal_threads[i] is a live thread. (There is no NULL for a pthread_t, so we have to maintain this separately). - - unsigned fractal_workers; // number of fractal tree writer threads - - toku_mutex_t mutex; - bool mutex_init; -}; - -// Set the number of rows in the loader. Used for test. -void toku_ft_loader_set_n_rows(FTLOADER bl, uint64_t n_rows); - -// Get the number of rows in the loader. Used for test. -uint64_t toku_ft_loader_get_n_rows(FTLOADER bl); - -// The data passed into a fractal_thread via pthread_create. -struct fractal_thread_args { - FTLOADER bl; - const DESCRIPTOR descriptor; - int fd; // write the ft into fd. - int progress_allocation; - QUEUE q; - uint64_t total_disksize_estimate; - int errno_result; // the final result. - int which_db; - uint32_t target_nodesize; - uint32_t target_basementnodesize; - enum toku_compression_method target_compression_method; - uint32_t target_fanout; -}; - -void toku_ft_loader_set_n_rows(FTLOADER bl, uint64_t n_rows); -uint64_t toku_ft_loader_get_n_rows(FTLOADER bl); - -int merge_row_arrays_base (struct row dest[/*an+bn*/], struct row a[/*an*/], int an, struct row b[/*bn*/], int bn, - int which_db, DB *dest_db, ft_compare_func, - FTLOADER, - struct rowset *); - -int merge_files (struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func, int progress_allocation, QUEUE); - -int sort_and_write_rows (struct rowset rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func); - -int mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func, FTLOADER, struct rowset *); - -//int write_file_to_dbfile (int outfile, FIDX infile, FTLOADER bl, const DESCRIPTOR descriptor, int progress_allocation); -int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q, int n_sources, DBUFIO_FILESET bfs, FIDX srcs_fidxs[/*n_sources*/], FTLOADER bl, int which_db, DB *dest_db, ft_compare_func compare, int progress_allocation); - -int ft_loader_sort_and_write_rows (struct rowset *rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func); - -// This is probably only for testing. -int toku_loader_write_ft_from_q_in_C (FTLOADER bl, - const DESCRIPTOR descriptor, - int fd, // write to here - int progress_allocation, - QUEUE q, - uint64_t total_disksize_estimate, - int which_db, - uint32_t target_nodesize, - uint32_t target_basementnodesize, - enum toku_compression_method target_compression_method, - uint32_t fanout); - -int ft_loader_mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func, FTLOADER, struct rowset *); - -int ft_loader_write_file_to_dbfile (int outfile, FIDX infile, FTLOADER bl, const DESCRIPTOR descriptor, int progress_allocation); - -int ft_loader_init_file_infos (struct file_infos *fi); -void ft_loader_fi_destroy (struct file_infos *fi, bool is_error); -int ft_loader_fi_close (struct file_infos *fi, FIDX idx, bool require_open); -int ft_loader_fi_close_all (struct file_infos *fi); -int ft_loader_fi_reopen (struct file_infos *fi, FIDX idx, const char *mode); -int ft_loader_fi_unlink (struct file_infos *fi, FIDX idx); - -int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, - CACHETABLE cachetable, - generate_row_for_put_func g, - DB *src_db, - int N, FT_HANDLE ft_hs[/*N*/], DB* dbs[/*N*/], - const char *new_fnames_in_env[/*N*/], - ft_compare_func bt_compare_functions[/*N*/], - const char *temp_file_template, - LSN load_lsn, - TOKUTXN txn, - bool reserve_memory, - uint64_t reserve_memory_size, - bool compress_intermediates, - bool allow_puts); - -void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error); - -// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.) -uint64_t toku_ft_loader_get_rowset_budget_for_testing (void); - -int toku_ft_loader_finish_extractor(FTLOADER bl); - -int toku_ft_loader_get_error(FTLOADER bl, int *loader_errno); - -void ft_loader_lock_init(FTLOADER bl); -void ft_loader_lock_destroy(FTLOADER bl); -void ft_loader_set_fractal_workers_count_from_c(FTLOADER bl); - -#endif // FTLOADER_INTERNAL_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft_msg.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft_msg.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft_msg.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft_msg.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,133 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." - - -#include -#include "fttypes.h" -#include "xids.h" -#include "ft_msg.h" - - -uint32_t -ft_msg_get_keylen(FT_MSG ft_msg) { - uint32_t rval = ft_msg->u.id.key->size; - return rval; -} - -uint32_t -ft_msg_get_vallen(FT_MSG ft_msg) { - uint32_t rval = ft_msg->u.id.val->size; - return rval; -} - -XIDS -ft_msg_get_xids(FT_MSG ft_msg) { - XIDS rval = ft_msg->xids; - return rval; -} - -void * -ft_msg_get_key(FT_MSG ft_msg) { - void * rval = ft_msg->u.id.key->data; - return rval; -} - -void * -ft_msg_get_val(FT_MSG ft_msg) { - void * rval = ft_msg->u.id.val->data; - return rval; -} - -enum ft_msg_type -ft_msg_get_type(FT_MSG ft_msg) { - enum ft_msg_type rval = ft_msg->type; - return rval; -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft_msg.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft_msg.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft_msg.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft_msg.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,124 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: - -/* The purpose of this file is to provide access to the ft_msg, - * which is the ephemeral version of the fifo_msg. - */ - -#ifndef FT_MSG_H -#define FT_MSG_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - - -uint32_t ft_msg_get_keylen(FT_MSG ft_msg); - -uint32_t ft_msg_get_vallen(FT_MSG ft_msg); - -XIDS ft_msg_get_xids(FT_MSG ft_msg); - -void * ft_msg_get_key(FT_MSG ft_msg); - -void * ft_msg_get_val(FT_MSG ft_msg); - -enum ft_msg_type ft_msg_get_type(FT_MSG ft_msg); - -void ft_msg_from_fifo_msg(FT_MSG ft_msg, FIFO_MSG fifo_msg); - -#if 0 - -void ft_msg_from_dbts(FT_MSG ft_msg, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type); - -#endif - - - -#endif // FT_MSG_H - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-node-deserialize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-node-deserialize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-node-deserialize.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-node-deserialize.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,238 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include - -/* - * ft-node-deserialize.c - - * This file contains functions used by deserializtion - * code paths in and out of the engine. The functions can, - * essentially, be broken up into two types. Some of these - * functions return error codes based expected values inside - * the fractal tree node, others merely read the specific - * quantities of bytes out of the buffer. It is expeceted - * that these will be called in the correct order by users - * of these functions/this API. - * - */ - -// Sets initial values for the given fractal tree node to be -// deserialized -void -initialize_ftnode(FTNODE node, BLOCKNUM blocknum) -{ - node->fullhash = 0xDEADBEEF; // Is this 'spoof' ok? - node->thisnodename = blocknum; - node->dirty = 0; - node->bp = NULL; - // Can we use this initialization as a correctness assert in - // a later function? - node->layout_version_read_from_disk = 0; -} - -/************************ - * TODO: In other deserialization code, we check the rb size member. We - * verify that it is greater than or equal to 24. Ignoring this magic - * number for a moment, should we put this check in its own function? * -*************************/ - - -// Read and check the 'magic' bytes on disk. Returns an error if -// the magic does not match. -int -read_and_check_magic(struct rbuf *rb) -{ - int r = 0; - bytevec magic; - rbuf_literal_bytes(rb, &magic, 8); - if (memcmp(magic, "tokuleaf", 8)!=0 && - memcmp(magic, "tokunode", 8)!=0) { - r = DB_BADFORMAT; // TODO: Return more meaningful error. - } - - return r; -} - -// Read the version number from the given buffer -// and returns an error if the version is too old. -int -read_and_check_version(FTNODE node, struct rbuf *rb) -{ - int r = 0; - int version = rbuf_int(rb); - node->layout_version_read_from_disk = version; - if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) { - r = 1; // TODO: Better error reporting. - } - - return r; -} - -// Reads the basic version, build, and child info from -// the given buffer. -void -read_node_info(FTNODE node, struct rbuf *rb, int version) -{ - node->layout_version = version; - node->layout_version_original = rbuf_int(rb); - node->build_id = rbuf_int(rb); - node->n_children = rbuf_int(rb); -} - -// Allocates the partitions based on the given node's nubmer -// of children. It then reads, out of the given buffer, -// the start and size of each child partition. -// TODO: Should these be two seperate functions? -void -allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd) -{ - XMALLOC_N(node->n_children, node->bp); - // TODO: Fix this to use xmalloc_n - XMALLOC_N(node->n_children, *ndd); - // Read the partition locations. - for (int i = 0; i < node->n_children; i++) { - BP_START(*ndd, i) = rbuf_int(rb); - BP_SIZE (*ndd, i) = rbuf_int(rb); - } -} - -// Compares checksum of stored (in the given buffer) checksum -// and the checksum of the buffer itself. If these are NOT -// equal, this function returns an appropriate error code. -int -check_node_info_checksum(struct rbuf *rb) -{ - int r = 0; - // Verify checksum of header stored. - uint32_t checksum = toku_x1764_memory(rb->buf, rb->ndone); - uint32_t stored_checksum = rbuf_int(rb); - - if (stored_checksum != checksum) { - // TODO: dump_bad_block(rb->buf, rb->size); - r = TOKUDB_BAD_CHECKSUM; - } - - return r; -} - -// Reads node info from older (13 and 14) fractal tree nodes -// out of the given buffer. -void -read_legacy_node_info(FTNODE node, struct rbuf *rb, int version) -{ - (void)rbuf_int(rb); // 1. nodesize - node->flags = rbuf_int(rb); // 2. flags - node->height = rbuf_int(rb); // 3. height - - // If the version is less than 14, there are two extra ints here. - // we would need to ignore them if they are there. - if (version == FT_LAYOUT_VERSION_13) { - (void) rbuf_int(rb); // 4. rand4 - (void) rbuf_int(rb); // 5. local - } -} - -// Assuming the given buffer is in the correct position, -// this checks to see if the stored checksum matches the -// checksum of the entire buffer. -int -check_legacy_end_checksum(struct rbuf *rb) -{ - int r = 0; - uint32_t expected_xsum = rbuf_int(rb); - uint32_t actual_xsum = toku_x1764_memory(rb->buf, rb->size - 4); - if (expected_xsum != actual_xsum) { - r = TOKUDB_BAD_CHECKSUM; - } - - return r; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft_node-serialize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft_node-serialize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft_node-serialize.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft_node-serialize.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,3214 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "ft-internal.h" -#include "log-internal.h" -#include -#include -#include -#include -#include "ft.h" -#include -#include - -static FT_UPGRADE_STATUS_S ft_upgrade_status; - -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ft_upgrade_status, k, c, t, "ft upgrade: " l, inc) - -static void -status_init(void) -{ - // Note, this function initializes the keyname, type, and legend fields. - // Value fields are initialized to zero by compiler. - STATUS_INIT(FT_UPGRADE_FOOTPRINT, nullptr, UINT64, "footprint", TOKU_ENGINE_STATUS); - ft_upgrade_status.initialized = true; -} -#undef STATUS_INIT - -#define UPGRADE_STATUS_VALUE(x) ft_upgrade_status.status[x].value.num - -void -toku_ft_upgrade_get_status(FT_UPGRADE_STATUS s) { - if (!ft_upgrade_status.initialized) { - status_init(); - } - UPGRADE_STATUS_VALUE(FT_UPGRADE_FOOTPRINT) = toku_log_upgrade_get_footprint(); - *s = ft_upgrade_status; -} - -static int num_cores = 0; // cache the number of cores for the parallelization -static struct toku_thread_pool *ft_pool = NULL; - -int get_num_cores(void) { - return num_cores; -} - -struct toku_thread_pool *get_ft_pool(void) { - return ft_pool; -} - -void -toku_ft_serialize_layer_init(void) { - num_cores = toku_os_get_number_active_processors(); - int r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r); -} - -void -toku_ft_serialize_layer_destroy(void) { - toku_thread_pool_destroy(&ft_pool); -} - -enum {FILE_CHANGE_INCREMENT = (16<<20)}; - -static inline uint64_t -alignup64(uint64_t a, uint64_t b) { - return ((a+b-1)/b)*b; -} - -// safe_file_size_lock must be held. -void -toku_maybe_truncate_file (int fd, uint64_t size_used, uint64_t expected_size, uint64_t *new_sizep) -// Effect: If file size >= SIZE+32MiB, reduce file size. -// (32 instead of 16.. hysteresis). -// Return 0 on success, otherwise an error number. -{ - int64_t file_size; - { - int r = toku_os_get_file_size(fd, &file_size); - lazy_assert_zero(r); - invariant(file_size >= 0); - } - invariant(expected_size == (uint64_t)file_size); - // If file space is overallocated by at least 32M - if ((uint64_t)file_size >= size_used + (2*FILE_CHANGE_INCREMENT)) { - toku_off_t new_size = alignup64(size_used, (2*FILE_CHANGE_INCREMENT)); //Truncate to new size_used. - invariant(new_size < file_size); - invariant(new_size >= 0); - int r = ftruncate(fd, new_size); - lazy_assert_zero(r); - *new_sizep = new_size; - } - else { - *new_sizep = file_size; - } - return; -} - -static int64_t -min64(int64_t a, int64_t b) { - if (a= 0); - invariant(expected_size == file_size); - // We want to double the size of the file, or add 16MiB, whichever is less. - // We emulate calling this function repeatedly until it satisfies the request. - int64_t to_write = 0; - if (file_size == 0) { - // Prevent infinite loop by starting with stripe_width as a base case. - to_write = stripe_width; - } - while (file_size + to_write < size) { - to_write += alignup64(min64(file_size + to_write, FILE_CHANGE_INCREMENT), stripe_width); - } - if (to_write > 0) { - assert(to_write%512==0); - toku::scoped_malloc_aligned wbuf_aligned(to_write, 512); - char *wbuf = reinterpret_cast(wbuf_aligned.get()); - memset(wbuf, 0, to_write); - toku_off_t start_write = alignup64(file_size, stripe_width); - invariant(start_write >= file_size); - toku_os_full_pwrite(fd, wbuf, to_write, start_write); - *new_size = start_write + to_write; - } - else { - *new_size = file_size; - } -} - -// Don't include the sub_block header -// Overhead calculated in same order fields are written to wbuf -enum { - node_header_overhead = (8+ // magic "tokunode" or "tokuleaf" or "tokuroll" - 4+ // layout_version - 4+ // layout_version_original - 4), // build_id -}; - -#include "sub_block.h" -#include "sub_block_map.h" - -// uncompressed header offsets -enum { - uncompressed_magic_offset = 0, - uncompressed_version_offset = 8, -}; - -static uint32_t -serialize_node_header_size(FTNODE node) { - uint32_t retval = 0; - retval += 8; // magic - retval += sizeof(node->layout_version); - retval += sizeof(node->layout_version_original); - retval += 4; // BUILD_ID - retval += 4; // n_children - retval += node->n_children*8; // encode start offset and length of each partition - retval += 4; // checksum - return retval; -} - -static void -serialize_node_header(FTNODE node, FTNODE_DISK_DATA ndd, struct wbuf *wbuf) { - if (node->height == 0) - wbuf_nocrc_literal_bytes(wbuf, "tokuleaf", 8); - else - wbuf_nocrc_literal_bytes(wbuf, "tokunode", 8); - paranoid_invariant(node->layout_version == FT_LAYOUT_VERSION); - wbuf_nocrc_int(wbuf, node->layout_version); - wbuf_nocrc_int(wbuf, node->layout_version_original); - wbuf_nocrc_uint(wbuf, BUILD_ID); - wbuf_nocrc_int (wbuf, node->n_children); - for (int i=0; in_children; i++) { - assert(BP_SIZE(ndd,i)>0); - wbuf_nocrc_int(wbuf, BP_START(ndd, i)); // save the beginning of the partition - wbuf_nocrc_int(wbuf, BP_SIZE (ndd, i)); // and the size - } - // checksum the header - uint32_t end_to_end_checksum = toku_x1764_memory(wbuf->buf, wbuf_get_woffset(wbuf)); - wbuf_nocrc_int(wbuf, end_to_end_checksum); - invariant(wbuf->ndone == wbuf->size); -} - -static uint32_t -serialize_ftnode_partition_size (FTNODE node, int i) -{ - uint32_t result = 0; - paranoid_invariant(node->bp[i].state == PT_AVAIL); - result++; // Byte that states what the partition is - if (node->height > 0) { - NONLEAF_CHILDINFO bnc = BNC(node, i); - // number of messages (4 bytes) plus size of the buffer - result += (4 + toku_bnc_nbytesinbuf(bnc)); - // number of offsets (4 bytes) plus an array of 4 byte offsets, for each message tree - result += (4 + (4 * bnc->fresh_message_tree.size())); - result += (4 + (4 * bnc->stale_message_tree.size())); - result += (4 + (4 * bnc->broadcast_list.size())); - } - else { - result += 4 + bn_data::HEADER_LENGTH; // n_entries in buffer table + basement header - result += BLB_NBYTESINDATA(node, i); - } - result += 4; // checksum - return result; -} - -#define FTNODE_PARTITION_DMT_LEAVES 0xaa -#define FTNODE_PARTITION_FIFO_MSG 0xbb - -UU() static int -assert_fresh(const int32_t &offset, const uint32_t UU(idx), struct fifo *const f) { - struct fifo_entry *entry = toku_fifo_get_entry(f, offset); - assert(entry->is_fresh); - return 0; -} - -UU() static int -assert_stale(const int32_t &offset, const uint32_t UU(idx), struct fifo *const f) { - struct fifo_entry *entry = toku_fifo_get_entry(f, offset); - assert(!entry->is_fresh); - return 0; -} - -static void bnc_verify_message_trees(NONLEAF_CHILDINFO UU(bnc)) { -#ifdef TOKU_DEBUG_PARANOID - bnc->fresh_message_tree.iterate(bnc->buffer); - bnc->stale_message_tree.iterate(bnc->buffer); -#endif -} - -static int -wbuf_write_offset(const int32_t &offset, const uint32_t UU(idx), struct wbuf *const wb) { - wbuf_nocrc_int(wb, offset); - return 0; -} - -static void -serialize_child_buffer(NONLEAF_CHILDINFO bnc, struct wbuf *wb) -{ - unsigned char ch = FTNODE_PARTITION_FIFO_MSG; - wbuf_nocrc_char(wb, ch); - // serialize the FIFO, first the number of entries, then the elements - wbuf_nocrc_int(wb, toku_bnc_n_entries(bnc)); - FIFO_ITERATE( - bnc->buffer, key, keylen, data, datalen, type, msn, xids, is_fresh, - { - paranoid_invariant((int) type >= 0 && (int) type < 256); - wbuf_nocrc_char(wb, (unsigned char) type); - wbuf_nocrc_char(wb, (unsigned char) is_fresh); - wbuf_MSN(wb, msn); - wbuf_nocrc_xids(wb, xids); - wbuf_nocrc_bytes(wb, key, keylen); - wbuf_nocrc_bytes(wb, data, datalen); - }); - - bnc_verify_message_trees(bnc); - - // serialize the message trees (num entries, offsets array): - // fresh, stale, broadcast - wbuf_nocrc_int(wb, bnc->fresh_message_tree.size()); - bnc->fresh_message_tree.iterate(wb); - - wbuf_nocrc_int(wb, bnc->stale_message_tree.size()); - bnc->stale_message_tree.iterate(wb); - - wbuf_nocrc_int(wb, bnc->broadcast_list.size()); - bnc->broadcast_list.iterate(wb); -} - -// -// Serialize the i'th partition of node into sb -// For leaf nodes, this would be the i'th basement node -// For internal nodes, this would be the i'th internal node -// -static void -serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) { - if (sb->uncompressed_ptr == NULL) { - assert(sb->uncompressed_size == 0); - sb->uncompressed_size = serialize_ftnode_partition_size(node,i); - sb->uncompressed_ptr = toku_xmalloc(sb->uncompressed_size); - } else { - assert(sb->uncompressed_size > 0); - } - // - // Now put the data into sb->uncompressed_ptr - // - struct wbuf wb; - wbuf_init(&wb, sb->uncompressed_ptr, sb->uncompressed_size); - if (node->height > 0) { - // TODO: (Zardosht) possibly exit early if there are no messages - serialize_child_buffer(BNC(node, i), &wb); - } - else { - unsigned char ch = FTNODE_PARTITION_DMT_LEAVES; - bn_data* bd = BLB_DATA(node, i); - - wbuf_nocrc_char(&wb, ch); - wbuf_nocrc_uint(&wb, bd->num_klpairs()); - - bd->serialize_to_wbuf(&wb); - } - uint32_t end_to_end_checksum = toku_x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb)); - wbuf_nocrc_int(&wb, end_to_end_checksum); - invariant(wb.ndone == wb.size); - invariant(sb->uncompressed_size==wb.ndone); -} - -// -// Takes the data in sb->uncompressed_ptr, and compresses it -// into a newly allocated buffer sb->compressed_ptr -// -static void -compress_ftnode_sub_block(struct sub_block *sb, enum toku_compression_method method) { - assert(sb->compressed_ptr == NULL); - set_compressed_size_bound(sb, method); - // add 8 extra bytes, 4 for compressed size, 4 for decompressed size - sb->compressed_ptr = toku_xmalloc(sb->compressed_size_bound + 8); - // - // This probably seems a bit complicated. Here is what is going on. - // In TokuDB 5.0, sub_blocks were compressed and the compressed data - // was checksummed. The checksum did NOT include the size of the compressed data - // and the size of the uncompressed data. The fields of sub_block only reference the - // compressed data, and it is the responsibility of the user of the sub_block - // to write the length - // - // For Dr. No, we want the checksum to also include the size of the compressed data, and the - // size of the decompressed data, because this data - // may be read off of disk alone, so it must be verifiable alone. - // - // So, we pass in a buffer to compress_nocrc_sub_block that starts 8 bytes after the beginning - // of sb->compressed_ptr, so we have space to put in the sizes, and then run the checksum. - // - sb->compressed_size = compress_nocrc_sub_block( - sb, - (char *)sb->compressed_ptr + 8, - sb->compressed_size_bound, - method - ); - - uint32_t* extra = (uint32_t *)(sb->compressed_ptr); - // store the compressed and uncompressed size at the beginning - extra[0] = toku_htod32(sb->compressed_size); - extra[1] = toku_htod32(sb->uncompressed_size); - // now checksum the entire thing - sb->compressed_size += 8; // now add the eight bytes that we saved for the sizes - sb->xsum = toku_x1764_memory(sb->compressed_ptr,sb->compressed_size); - - // - // This is the end result for Dr. No and forward. For ftnodes, sb->compressed_ptr contains - // two integers at the beginning, the size and uncompressed size, and then the compressed - // data. sb->xsum contains the checksum of this entire thing. - // - // In TokuDB 5.0, sb->compressed_ptr only contained the compressed data, sb->xsum - // checksummed only the compressed data, and the checksumming of the sizes were not - // done here. - // -} - -// -// Returns the size needed to serialize the ftnode info -// Does not include header information that is common with rollback logs -// such as the magic, layout_version, and build_id -// Includes only node specific info such as pivot information, n_children, and so on -// -static uint32_t -serialize_ftnode_info_size(FTNODE node) -{ - uint32_t retval = 0; - retval += 8; // max_msn_applied_to_node_on_disk - retval += 4; // nodesize - retval += 4; // flags - retval += 4; // height; - retval += 8; // oldest_referenced_xid_known - retval += node->totalchildkeylens; // total length of pivots - retval += (node->n_children-1)*4; // encode length of each pivot - if (node->height > 0) { - retval += node->n_children*8; // child blocknum's - } - retval += 4; // checksum - return retval; -} - -static void serialize_ftnode_info(FTNODE node, - SUB_BLOCK sb // output - ) { - assert(sb->uncompressed_size == 0); - assert(sb->uncompressed_ptr == NULL); - sb->uncompressed_size = serialize_ftnode_info_size(node); - sb->uncompressed_ptr = toku_xmalloc(sb->uncompressed_size); - struct wbuf wb; - wbuf_init(&wb, sb->uncompressed_ptr, sb->uncompressed_size); - - wbuf_MSN(&wb, node->max_msn_applied_to_node_on_disk); - wbuf_nocrc_uint(&wb, 0); // write a dummy value for where node->nodesize used to be - wbuf_nocrc_uint(&wb, node->flags); - wbuf_nocrc_int (&wb, node->height); - wbuf_TXNID(&wb, node->oldest_referenced_xid_known); - - // pivot information - for (int i = 0; i < node->n_children-1; i++) { - wbuf_nocrc_bytes(&wb, node->childkeys[i].data, node->childkeys[i].size); - } - // child blocks, only for internal nodes - if (node->height > 0) { - for (int i = 0; i < node->n_children; i++) { - wbuf_nocrc_BLOCKNUM(&wb, BP_BLOCKNUM(node,i)); - } - } - - uint32_t end_to_end_checksum = toku_x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb)); - wbuf_nocrc_int(&wb, end_to_end_checksum); - invariant(wb.ndone == wb.size); - invariant(sb->uncompressed_size==wb.ndone); -} - -// This is the size of the uncompressed data, not including the compression headers -unsigned int -toku_serialize_ftnode_size (FTNODE node) { - unsigned int result = 0; - // - // As of now, this seems to be called if and only if the entire node is supposed - // to be in memory, so we will assert it. - // - toku_assert_entire_node_in_memory(node); - result += serialize_node_header_size(node); - result += serialize_ftnode_info_size(node); - for (int i = 0; i < node->n_children; i++) { - result += serialize_ftnode_partition_size(node,i); - } - return result; -} - -struct array_info { - uint32_t offset; - LEAFENTRY* le_array; - uint32_t* key_sizes_array; - const void** key_ptr_array; -}; - -static int -array_item(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t idx, struct array_info *const ai) { - ai->le_array[idx+ai->offset] = le; - ai->key_sizes_array[idx+ai->offset] = keylen; - ai->key_ptr_array[idx+ai->offset] = key; - return 0; -} - -// There must still be at least one child -// Requires that all messages in buffers above have been applied. -// Because all messages above have been applied, setting msn of all new basements -// to max msn of existing basements is correct. (There cannot be any messages in -// buffers above that still need to be applied.) -void -rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize) -{ - assert(node->height == 0); - assert(node->dirty); - - uint32_t num_orig_basements = node->n_children; - // Count number of leaf entries in this leaf (num_le). - uint32_t num_le = 0; - for (uint32_t i = 0; i < num_orig_basements; i++) { - num_le += BLB_DATA(node, i)->num_klpairs(); - } - - uint32_t num_alloc = num_le ? num_le : 1; // simplify logic below by always having at least one entry per array - - // Create an array of OMTVALUE's that store all the pointers to all the data. - // Each element in leafpointers is a pointer to a leaf. - toku::scoped_malloc leafpointers_buf(sizeof(LEAFENTRY) * num_alloc); - LEAFENTRY *leafpointers = reinterpret_cast(leafpointers_buf.get()); - leafpointers[0] = NULL; - - toku::scoped_malloc key_pointers_buf(sizeof(void *) * num_alloc); - const void **key_pointers = reinterpret_cast(key_pointers_buf.get()); - key_pointers[0] = NULL; - - toku::scoped_malloc key_sizes_buf(sizeof(uint32_t) * num_alloc); - uint32_t *key_sizes = reinterpret_cast(key_sizes_buf.get()); - - // Capture pointers to old mempools' buffers (so they can be destroyed) - toku::scoped_malloc old_bns_buf(sizeof(BASEMENTNODE) * num_orig_basements); - BASEMENTNODE *old_bns = reinterpret_cast(old_bns_buf.get()); - old_bns[0] = NULL; - - uint32_t curr_le = 0; - for (uint32_t i = 0; i < num_orig_basements; i++) { - bn_data* bd = BLB_DATA(node, i); - struct array_info ai {.offset = curr_le, .le_array = leafpointers, .key_sizes_array = key_sizes, .key_ptr_array = key_pointers }; - bd->iterate(&ai); - curr_le += bd->num_klpairs(); - } - - // Create an array that will store indexes of new pivots. - // Each element in new_pivots is the index of a pivot key. - // (Allocating num_le of them is overkill, but num_le is an upper bound.) - toku::scoped_malloc new_pivots_buf(sizeof(uint32_t) * num_alloc); - uint32_t *new_pivots = reinterpret_cast(new_pivots_buf.get()); - new_pivots[0] = 0; - - // Each element in le_sizes is the size of the leafentry pointed to by leafpointers. - toku::scoped_malloc le_sizes_buf(sizeof(size_t) * num_alloc); - size_t *le_sizes = reinterpret_cast(le_sizes_buf.get()); - le_sizes[0] = 0; - - // Create an array that will store the size of each basement. - // This is the sum of the leaf sizes of all the leaves in that basement. - // We don't know how many basements there will be, so we use num_le as the upper bound. - - // Sum of all le sizes in a single basement - toku::scoped_calloc bn_le_sizes_buf(sizeof(size_t) * num_alloc); - size_t *bn_le_sizes = reinterpret_cast(bn_le_sizes_buf.get()); - - // Sum of all key sizes in a single basement - toku::scoped_calloc bn_key_sizes_buf(sizeof(size_t) * num_alloc); - size_t *bn_key_sizes = reinterpret_cast(bn_key_sizes_buf.get()); - - // TODO 4050: All these arrays should be combined into a single array of some bn_info struct (pivot, msize, num_les). - // Each entry is the number of leafentries in this basement. (Again, num_le is overkill upper baound.) - toku::scoped_malloc num_les_this_bn_buf(sizeof(uint32_t) * num_alloc); - uint32_t *num_les_this_bn = reinterpret_cast(num_les_this_bn_buf.get()); - num_les_this_bn[0] = 0; - - // Figure out the new pivots. - // We need the index of each pivot, and for each basement we need - // the number of leaves and the sum of the sizes of the leaves (memory requirement for basement). - uint32_t curr_pivot = 0; - uint32_t num_le_in_curr_bn = 0; - uint32_t bn_size_so_far = 0; - for (uint32_t i = 0; i < num_le; i++) { - uint32_t curr_le_size = leafentry_disksize((LEAFENTRY) leafpointers[i]); - le_sizes[i] = curr_le_size; - if ((bn_size_so_far + curr_le_size + sizeof(uint32_t) + key_sizes[i] > basementnodesize) && (num_le_in_curr_bn != 0)) { - // cap off the current basement node to end with the element before i - new_pivots[curr_pivot] = i-1; - curr_pivot++; - num_le_in_curr_bn = 0; - bn_size_so_far = 0; - } - num_le_in_curr_bn++; - num_les_this_bn[curr_pivot] = num_le_in_curr_bn; - bn_le_sizes[curr_pivot] += curr_le_size; - bn_key_sizes[curr_pivot] += sizeof(uint32_t) + key_sizes[i]; // uint32_t le_offset - bn_size_so_far += curr_le_size + sizeof(uint32_t) + key_sizes[i]; - } - // curr_pivot is now the total number of pivot keys in the leaf node - int num_pivots = curr_pivot; - int num_children = num_pivots + 1; - - // now we need to fill in the new basement nodes and pivots - - // TODO: (Zardosht) this is an ugly thing right now - // Need to figure out how to properly deal with seqinsert. - // I am not happy with how this is being - // handled with basement nodes - uint32_t tmp_seqinsert = BLB_SEQINSERT(node, num_orig_basements - 1); - - // choose the max msn applied to any basement as the max msn applied to all new basements - MSN max_msn = ZERO_MSN; - for (uint32_t i = 0; i < num_orig_basements; i++) { - MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i); - max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn; - } - // remove the basement node in the node, we've saved a copy - for (uint32_t i = 0; i < num_orig_basements; i++) { - // save a reference to the old basement nodes - // we will need them to ensure that the memory - // stays intact - old_bns[i] = toku_detach_bn(node, i); - } - // Now destroy the old basements, but do not destroy leaves - toku_destroy_ftnode_internals(node); - - // now reallocate pieces and start filling them in - invariant(num_children > 0); - node->totalchildkeylens = 0; - - XCALLOC_N(num_pivots, node->childkeys); // allocate pointers to pivot structs - node->n_children = num_children; - XCALLOC_N(num_children, node->bp); // allocate pointers to basements (bp) - for (int i = 0; i < num_children; i++) { - set_BLB(node, i, toku_create_empty_bn()); // allocate empty basements and set bp pointers - } - - // now we start to fill in the data - - // first the pivots - for (int i = 0; i < num_pivots; i++) { - uint32_t keylen = key_sizes[new_pivots[i]]; - const void *key = key_pointers[new_pivots[i]]; - toku_memdup_dbt(&node->childkeys[i], key, keylen); - node->totalchildkeylens += keylen; - } - - uint32_t baseindex_this_bn = 0; - // now the basement nodes - for (int i = 0; i < num_children; i++) { - // put back seqinsert - BLB_SEQINSERT(node, i) = tmp_seqinsert; - - // create start (inclusive) and end (exclusive) boundaries for data of basement node - uint32_t curr_start = (i==0) ? 0 : new_pivots[i-1]+1; // index of first leaf in basement - uint32_t curr_end = (i==num_pivots) ? num_le : new_pivots[i]+1; // index of first leaf in next basement - uint32_t num_in_bn = curr_end - curr_start; // number of leaves in this basement - - // create indexes for new basement - invariant(baseindex_this_bn == curr_start); - uint32_t num_les_to_copy = num_les_this_bn[i]; - invariant(num_les_to_copy == num_in_bn); - - bn_data* bd = BLB_DATA(node, i); - bd->set_contents_as_clone_of_sorted_array( - num_les_to_copy, - &key_pointers[baseindex_this_bn], - &key_sizes[baseindex_this_bn], - &leafpointers[baseindex_this_bn], - &le_sizes[baseindex_this_bn], - bn_key_sizes[i], // Total key sizes - bn_le_sizes[i] // total le sizes - ); - - BP_STATE(node,i) = PT_AVAIL; - BP_TOUCH_CLOCK(node,i); - BLB_MAX_MSN_APPLIED(node,i) = max_msn; - baseindex_this_bn += num_les_to_copy; // set to index of next bn - } - node->max_msn_applied_to_node_on_disk = max_msn; - - // destroy buffers of old mempools - for (uint32_t i = 0; i < num_orig_basements; i++) { - destroy_basement_node(old_bns[i]); - } -} // end of rebalance_ftnode_leaf() - -struct serialize_times { - tokutime_t serialize_time; - tokutime_t compress_time; -}; - -static void -serialize_and_compress_partition(FTNODE node, - int childnum, - enum toku_compression_method compression_method, - SUB_BLOCK sb, - struct serialize_times *st) -{ - // serialize, compress, update status - tokutime_t t0 = toku_time_now(); - serialize_ftnode_partition(node, childnum, sb); - tokutime_t t1 = toku_time_now(); - compress_ftnode_sub_block(sb, compression_method); - tokutime_t t2 = toku_time_now(); - - st->serialize_time += t1 - t0; - st->compress_time += t2 - t1; -} - -void -toku_create_compressed_partition_from_available( - FTNODE node, - int childnum, - enum toku_compression_method compression_method, - SUB_BLOCK sb - ) -{ - tokutime_t t0 = toku_time_now(); - - // serialize - sb->uncompressed_size = serialize_ftnode_partition_size(node, childnum); - toku::scoped_malloc uncompressed_buf(sb->uncompressed_size); - sb->uncompressed_ptr = uncompressed_buf.get(); - serialize_ftnode_partition(node, childnum, sb); - - tokutime_t t1 = toku_time_now(); - - // compress. no need to pad with extra bytes for sizes/xsum - we're not storing them - set_compressed_size_bound(sb, compression_method); - sb->compressed_ptr = toku_xmalloc(sb->compressed_size_bound); - sb->compressed_size = compress_nocrc_sub_block( - sb, - sb->compressed_ptr, - sb->compressed_size_bound, - compression_method - ); - sb->uncompressed_ptr = NULL; - - tokutime_t t2 = toku_time_now(); - - toku_ft_status_update_serialize_times(node, t1 - t0, t2 - t1); -} - -static void -serialize_and_compress_serially(FTNODE node, - int npartitions, - enum toku_compression_method compression_method, - struct sub_block sb[], - struct serialize_times *st) { - for (int i = 0; i < npartitions; i++) { - serialize_and_compress_partition(node, i, compression_method, &sb[i], st); - } -} - -struct serialize_compress_work { - struct work base; - FTNODE node; - int i; - enum toku_compression_method compression_method; - struct sub_block *sb; - struct serialize_times st; -}; - -static void * -serialize_and_compress_worker(void *arg) { - struct workset *ws = (struct workset *) arg; - while (1) { - struct serialize_compress_work *w = (struct serialize_compress_work *) workset_get(ws); - if (w == NULL) - break; - int i = w->i; - serialize_and_compress_partition(w->node, i, w->compression_method, &w->sb[i], &w->st); - } - workset_release_ref(ws); - return arg; -} - -static void -serialize_and_compress_in_parallel(FTNODE node, - int npartitions, - enum toku_compression_method compression_method, - struct sub_block sb[], - struct serialize_times *st) { - if (npartitions == 1) { - serialize_and_compress_partition(node, 0, compression_method, &sb[0], st); - } else { - int T = num_cores; - if (T > npartitions) - T = npartitions; - if (T > 0) - T = T - 1; - struct workset ws; - ZERO_STRUCT(ws); - workset_init(&ws); - struct serialize_compress_work work[npartitions]; - workset_lock(&ws); - for (int i = 0; i < npartitions; i++) { - work[i] = (struct serialize_compress_work) { .base = {{NULL}}, - .node = node, - .i = i, - .compression_method = compression_method, - .sb = sb, - .st = { .serialize_time = 0, .compress_time = 0} }; - workset_put_locked(&ws, &work[i].base); - } - workset_unlock(&ws); - toku_thread_pool_run(ft_pool, 0, &T, serialize_and_compress_worker, &ws); - workset_add_ref(&ws, T); - serialize_and_compress_worker(&ws); - workset_join(&ws); - workset_destroy(&ws); - - // gather up the statistics from each thread's work item - for (int i = 0; i < npartitions; i++) { - st->serialize_time += work[i].st.serialize_time; - st->compress_time += work[i].st.compress_time; - } - } -} - -static void -serialize_and_compress_sb_node_info(FTNODE node, struct sub_block *sb, - enum toku_compression_method compression_method, struct serialize_times *st) { - // serialize, compress, update serialize times. - tokutime_t t0 = toku_time_now(); - serialize_ftnode_info(node, sb); - tokutime_t t1 = toku_time_now(); - compress_ftnode_sub_block(sb, compression_method); - tokutime_t t2 = toku_time_now(); - - st->serialize_time += t1 - t0; - st->compress_time += t2 - t1; -} - -int toku_serialize_ftnode_to_memory(FTNODE node, - FTNODE_DISK_DATA* ndd, - unsigned int basementnodesize, - enum toku_compression_method compression_method, - bool do_rebalancing, - bool in_parallel, // for loader is true, for toku_ftnode_flush_callback, is false - /*out*/ size_t *n_bytes_to_write, - /*out*/ size_t *n_uncompressed_bytes, - /*out*/ char **bytes_to_write) -// Effect: Writes out each child to a separate malloc'd buffer, then compresses -// all of them, and writes the uncompressed header, to bytes_to_write, -// which is malloc'd. -// -// The resulting buffer is guaranteed to be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needed). -// 512-byte padding is for O_DIRECT to work. -{ - toku_assert_entire_node_in_memory(node); - - if (do_rebalancing && node->height == 0) { - rebalance_ftnode_leaf(node, basementnodesize); - } - const int npartitions = node->n_children; - - // Each partition represents a compressed sub block - // For internal nodes, a sub block is a message buffer - // For leaf nodes, a sub block is a basement node - toku::scoped_malloc sb_buf(sizeof(struct sub_block) * npartitions); - struct sub_block *sb = reinterpret_cast(sb_buf.get()); - XREALLOC_N(npartitions, *ndd); - struct sub_block sb_node_info; - for (int i = 0; i < npartitions; i++) { - sub_block_init(&sb[i]);; - } - sub_block_init(&sb_node_info); - - // - // First, let's serialize and compress the individual sub blocks - // - struct serialize_times st; - memset(&st, 0, sizeof(st)); - if (in_parallel) { - serialize_and_compress_in_parallel(node, npartitions, compression_method, sb, &st); - } - else { - serialize_and_compress_serially(node, npartitions, compression_method, sb, &st); - } - - // - // Now lets create a sub-block that has the common node information, - // This does NOT include the header - // - serialize_and_compress_sb_node_info(node, &sb_node_info, compression_method, &st); - - // update the serialize times, ignore the header for simplicity. we captured all - // of the partitions' serialize times so that's probably good enough. - toku_ft_status_update_serialize_times(node, st.serialize_time, st.compress_time); - - // now we have compressed each of our pieces into individual sub_blocks, - // we can put the header and all the subblocks into a single buffer - // and return it. - - // The total size of the node is: - // size of header + disk size of the n+1 sub_block's created above - uint32_t total_node_size = (serialize_node_header_size(node) // uncompressed header - + sb_node_info.compressed_size // compressed nodeinfo (without its checksum) - + 4); // nodeinfo's checksum - uint32_t total_uncompressed_size = (serialize_node_header_size(node) // uncompressed header - + sb_node_info.uncompressed_size // uncompressed nodeinfo (without its checksum) - + 4); // nodeinfo's checksum - // store the BP_SIZESs - for (int i = 0; i < node->n_children; i++) { - uint32_t len = sb[i].compressed_size + 4; // data and checksum - BP_SIZE (*ndd,i) = len; - BP_START(*ndd,i) = total_node_size; - total_node_size += sb[i].compressed_size + 4; - total_uncompressed_size += sb[i].uncompressed_size + 4; - } - - uint32_t total_buffer_size = roundup_to_multiple(512, total_node_size); // make the buffer be 512 bytes. - - char *XMALLOC_N_ALIGNED(512, total_buffer_size, data); - char *curr_ptr = data; - // now create the final serialized node - - // write the header - struct wbuf wb; - wbuf_init(&wb, curr_ptr, serialize_node_header_size(node)); - serialize_node_header(node, *ndd, &wb); - assert(wb.ndone == wb.size); - curr_ptr += serialize_node_header_size(node); - - // now write sb_node_info - memcpy(curr_ptr, sb_node_info.compressed_ptr, sb_node_info.compressed_size); - curr_ptr += sb_node_info.compressed_size; - // write the checksum - *(uint32_t *)curr_ptr = toku_htod32(sb_node_info.xsum); - curr_ptr += sizeof(sb_node_info.xsum); - - for (int i = 0; i < npartitions; i++) { - memcpy(curr_ptr, sb[i].compressed_ptr, sb[i].compressed_size); - curr_ptr += sb[i].compressed_size; - // write the checksum - *(uint32_t *)curr_ptr = toku_htod32(sb[i].xsum); - curr_ptr += sizeof(sb[i].xsum); - } - // Zero the rest of the buffer - for (uint32_t i=total_node_size; ih->basementnodesize, - h->h->compression_method, - do_rebalancing, - false, // in_parallel - &n_to_write, - &n_uncompressed_bytes, - &compressed_buf - ); - if (r != 0) { - return r; - } - - // If the node has never been written, then write the whole buffer, including the zeros - invariant(blocknum.b>=0); - DISKOFF offset; - - toku_blocknum_realloc_on_disk(h->blocktable, blocknum, n_to_write, &offset, - h, fd, for_checkpoint); //dirties h - - tokutime_t t0 = toku_time_now(); - toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); - tokutime_t t1 = toku_time_now(); - - tokutime_t io_time = t1 - t0; - toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint); - - toku_free(compressed_buf); - node->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction. - return 0; -} - -static void -deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, - DESCRIPTOR desc, ft_compare_func cmp) { - int r; - int n_in_this_buffer = rbuf_int(rbuf); - int32_t *fresh_offsets = NULL, *stale_offsets = NULL; - int32_t *broadcast_offsets = NULL; - int nfresh = 0, nstale = 0; - int nbroadcast_offsets = 0; - if (cmp) { - XMALLOC_N(n_in_this_buffer, stale_offsets); - XMALLOC_N(n_in_this_buffer, fresh_offsets); - XMALLOC_N(n_in_this_buffer, broadcast_offsets); - } - toku_fifo_resize(bnc->buffer, rbuf->size + 64); - for (int i = 0; i < n_in_this_buffer; i++) { - bytevec key; ITEMLEN keylen; - bytevec val; ITEMLEN vallen; - // this is weird but it's necessary to pass icc and gcc together - unsigned char ctype = rbuf_char(rbuf); - enum ft_msg_type type = (enum ft_msg_type) ctype; - bool is_fresh = rbuf_char(rbuf); - MSN msn = rbuf_msn(rbuf); - XIDS xids; - xids_create_from_buffer(rbuf, &xids); - rbuf_bytes(rbuf, &key, &keylen); /* Returns a pointer into the rbuf. */ - rbuf_bytes(rbuf, &val, &vallen); - int32_t *dest; - if (cmp) { - if (ft_msg_type_applies_once(type)) { - if (is_fresh) { - dest = &fresh_offsets[nfresh]; - nfresh++; - } else { - dest = &stale_offsets[nstale]; - nstale++; - } - } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) { - dest = &broadcast_offsets[nbroadcast_offsets]; - nbroadcast_offsets++; - } else { - abort(); - } - } else { - dest = NULL; - } - r = toku_fifo_enq(bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, dest); /* Copies the data into the fifo */ - lazy_assert_zero(r); - xids_destroy(&xids); - } - invariant(rbuf->ndone == rbuf->size); - - if (cmp) { - struct toku_fifo_entry_key_msn_cmp_extra extra = { .desc = desc, .cmp = cmp, .fifo = bnc->buffer }; - r = toku::sort::mergesort_r(fresh_offsets, nfresh, extra); - assert_zero(r); - bnc->fresh_message_tree.destroy(); - bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer); - r = toku::sort::mergesort_r(stale_offsets, nstale, extra); - assert_zero(r); - bnc->stale_message_tree.destroy(); - bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer); - bnc->broadcast_list.destroy(); - bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer); - } -} - -// effect: deserialize a single message from rbuf and enqueue the result into the given fifo -static void -fifo_deserialize_msg_from_rbuf(FIFO fifo, struct rbuf *rbuf) { - bytevec key, val; - ITEMLEN keylen, vallen; - enum ft_msg_type type = (enum ft_msg_type) rbuf_char(rbuf); - bool is_fresh = rbuf_char(rbuf); - MSN msn = rbuf_msn(rbuf); - XIDS xids; - xids_create_from_buffer(rbuf, &xids); - rbuf_bytes(rbuf, &key, &keylen); /* Returns a pointer into the rbuf. */ - rbuf_bytes(rbuf, &val, &vallen); - int r = toku_fifo_enq(fifo, key, keylen, val, vallen, type, msn, xids, is_fresh, nullptr); - lazy_assert_zero(r); - xids_destroy(&xids); -} - -static void -deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf) { - int n_in_this_buffer = rbuf_int(rbuf); - int nfresh = 0, nstale = 0, nbroadcast_offsets = 0; - int32_t *XMALLOC_N(n_in_this_buffer, stale_offsets); - int32_t *XMALLOC_N(n_in_this_buffer, fresh_offsets); - int32_t *XMALLOC_N(n_in_this_buffer, broadcast_offsets); - - toku_fifo_resize(bnc->buffer, rbuf->size + 64); - for (int i = 0; i < n_in_this_buffer; i++) { - fifo_deserialize_msg_from_rbuf(bnc->buffer, rbuf); - } - - // read in each message tree (fresh, stale, broadcast) - nfresh = rbuf_int(rbuf); - bytevec fresh_offsets_src_v; - rbuf_literal_bytes(rbuf, &fresh_offsets_src_v, nfresh * (sizeof *fresh_offsets)); - const int32_t *fresh_offsets_src = (const int32_t *) fresh_offsets_src_v; - for (int i = 0; i < nfresh; i++) { - fresh_offsets[i] = toku_dtoh32(fresh_offsets_src[i]); - } - nstale = rbuf_int(rbuf); - bytevec stale_offsets_src_v; - rbuf_literal_bytes(rbuf, &stale_offsets_src_v, nstale * (sizeof *stale_offsets)); - const int32_t *stale_offsets_src = (const int32_t *) stale_offsets_src_v; - for (int i = 0; i < nstale; i++) { - stale_offsets[i] = toku_dtoh32(stale_offsets_src[i]); - } - nbroadcast_offsets = rbuf_int(rbuf); - bytevec broadcast_offsets_src_v; - rbuf_literal_bytes(rbuf, &broadcast_offsets_src_v, nbroadcast_offsets * (sizeof *broadcast_offsets)); - const int32_t *broadcast_offsets_src = (const int32_t *) broadcast_offsets_src_v; - for (int i = 0; i < nbroadcast_offsets; i++) { - broadcast_offsets[i] = toku_dtoh32(broadcast_offsets_src[i]); - } - - // build OMTs out of each offset array - bnc->fresh_message_tree.destroy(); - bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer); - bnc->stale_message_tree.destroy(); - bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer); - bnc->broadcast_list.destroy(); - bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer); -} - -// dump a buffer to stderr -// no locking around this for now -void -dump_bad_block(unsigned char *vp, uint64_t size) { - const uint64_t linesize = 64; - uint64_t n = size / linesize; - for (uint64_t i = 0; i < n; i++) { - fprintf(stderr, "%p: ", vp); - for (uint64_t j = 0; j < linesize; j++) { - unsigned char c = vp[j]; - fprintf(stderr, "%2.2X", c); - } - fprintf(stderr, "\n"); - vp += linesize; - } - size = size % linesize; - for (uint64_t i=0; idata_buffer.initialize_empty(); - return bn; -} - -BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) { - BASEMENTNODE bn = toku_create_empty_bn_no_buffer(); - bn->max_msn_applied = orig_bn->max_msn_applied; - bn->seqinsert = orig_bn->seqinsert; - bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied; - bn->stat64_delta = orig_bn->stat64_delta; - bn->data_buffer.clone(&orig_bn->data_buffer); - return bn; -} - -BASEMENTNODE toku_create_empty_bn_no_buffer(void) { - BASEMENTNODE XMALLOC(bn); - bn->max_msn_applied.msn = 0; - bn->seqinsert = 0; - bn->stale_ancestor_messages_applied = false; - bn->stat64_delta = ZEROSTATS; - bn->data_buffer.init_zero(); - return bn; -} - -NONLEAF_CHILDINFO toku_create_empty_nl(void) { - NONLEAF_CHILDINFO XMALLOC(cn); - int r = toku_fifo_create(&cn->buffer); assert_zero(r); - cn->fresh_message_tree.create_no_array(); - cn->stale_message_tree.create_no_array(); - cn->broadcast_list.create_no_array(); - memset(cn->flow, 0, sizeof cn->flow); - return cn; -} - -// must clone the OMTs, since we serialize them along with the FIFO -NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo) { - NONLEAF_CHILDINFO XMALLOC(cn); - toku_fifo_clone(orig_childinfo->buffer, &cn->buffer); - cn->fresh_message_tree.create_no_array(); - cn->fresh_message_tree.clone(orig_childinfo->fresh_message_tree); - cn->stale_message_tree.create_no_array(); - cn->stale_message_tree.clone(orig_childinfo->stale_message_tree); - cn->broadcast_list.create_no_array(); - cn->broadcast_list.clone(orig_childinfo->broadcast_list); - memset(cn->flow, 0, sizeof cn->flow); - return cn; -} - -void destroy_basement_node (BASEMENTNODE bn) -{ - bn->data_buffer.destroy(); - toku_free(bn); -} - -void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl) -{ - toku_fifo_free(&nl->buffer); - nl->fresh_message_tree.destroy(); - nl->stale_message_tree.destroy(); - nl->broadcast_list.destroy(); - toku_free(nl); -} - -void read_block_from_fd_into_rbuf( - int fd, - BLOCKNUM blocknum, - FT h, - struct rbuf *rb - ) -{ - // get the file offset and block size for the block - DISKOFF offset, size; - toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &offset, &size); - DISKOFF size_aligned = roundup_to_multiple(512, size); - uint8_t *XMALLOC_N_ALIGNED(512, size_aligned, raw_block); - rbuf_init(rb, raw_block, size); - // read the block - ssize_t rlen = toku_os_pread(fd, raw_block, size_aligned, offset); - assert((DISKOFF)rlen >= size); - assert((DISKOFF)rlen <= size_aligned); -} - -static const int read_header_heuristic_max = 32*1024; - -#ifndef MIN -#define MIN(a,b) (((a)>(b)) ? (b) : (a)) -#endif - -static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKNUM blocknum, FT ft, struct rbuf *rb, struct ftnode_fetch_extra *bfe) -// Effect: If the header part of the node is small enough, then read it into the rbuf. The rbuf will be allocated to be big enough in any case. -{ - DISKOFF offset, size; - toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &offset, &size); - DISKOFF read_size = roundup_to_multiple(512, MIN(read_header_heuristic_max, size)); - uint8_t *XMALLOC_N_ALIGNED(512, roundup_to_multiple(512, size), raw_block); - rbuf_init(rb, raw_block, read_size); - - // read the block - tokutime_t t0 = toku_time_now(); - ssize_t rlen = toku_os_pread(fd, raw_block, read_size, offset); - tokutime_t t1 = toku_time_now(); - - assert(rlen >= 0); - rbuf_init(rb, raw_block, rlen); - - bfe->bytes_read = rlen; - bfe->io_time = t1 - t0; - toku_ft_status_update_pivot_fetch_reason(bfe); -} - -// -// read the compressed partition into the sub_block, -// validate the checksum of the compressed data -// -int -read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb) -{ - int r = 0; - sb->compressed_size = rbuf_int(rb); - sb->uncompressed_size = rbuf_int(rb); - bytevec* cp = (bytevec*)&sb->compressed_ptr; - rbuf_literal_bytes(rb, cp, sb->compressed_size); - sb->xsum = rbuf_int(rb); - // let's check the checksum - uint32_t actual_xsum = toku_x1764_memory((char *)sb->compressed_ptr-8, 8+sb->compressed_size); - if (sb->xsum != actual_xsum) { - r = TOKUDB_BAD_CHECKSUM; - } - return r; -} - -static int -read_and_decompress_sub_block(struct rbuf *rb, struct sub_block *sb) -{ - int r = 0; - r = read_compressed_sub_block(rb, sb); - if (r != 0) { - goto exit; - } - - just_decompress_sub_block(sb); -exit: - return r; -} - -// Allocates space for the sub-block and de-compresses the data from -// the supplied compressed pointer.. -void -just_decompress_sub_block(struct sub_block *sb) -{ - // TODO: Add assert that the subblock was read in. - sb->uncompressed_ptr = toku_xmalloc(sb->uncompressed_size); - - toku_decompress( - (Bytef *) sb->uncompressed_ptr, - sb->uncompressed_size, - (Bytef *) sb->compressed_ptr, - sb->compressed_size - ); -} - -// verify the checksum -int -verify_ftnode_sub_block (struct sub_block *sb) -{ - int r = 0; - // first verify the checksum - uint32_t data_size = sb->uncompressed_size - 4; // checksum is 4 bytes at end - uint32_t stored_xsum = toku_dtoh32(*((uint32_t *)((char *)sb->uncompressed_ptr + data_size))); - uint32_t actual_xsum = toku_x1764_memory(sb->uncompressed_ptr, data_size); - if (stored_xsum != actual_xsum) { - dump_bad_block((Bytef *) sb->uncompressed_ptr, sb->uncompressed_size); - r = TOKUDB_BAD_CHECKSUM; - } - return r; -} - -// This function deserializes the data stored by serialize_ftnode_info -static int -deserialize_ftnode_info( - struct sub_block *sb, - FTNODE node - ) -{ - // sb_node_info->uncompressed_ptr stores the serialized node information - // this function puts that information into node - - // first verify the checksum - int r = 0; - r = verify_ftnode_sub_block(sb); - if (r != 0) { - goto exit; - } - - uint32_t data_size; - data_size = sb->uncompressed_size - 4; // checksum is 4 bytes at end - - // now with the data verified, we can read the information into the node - struct rbuf rb; - rbuf_init(&rb, (unsigned char *) sb->uncompressed_ptr, data_size); - - node->max_msn_applied_to_node_on_disk = rbuf_msn(&rb); - (void)rbuf_int(&rb); - node->flags = rbuf_int(&rb); - node->height = rbuf_int(&rb); - if (node->layout_version_read_from_disk < FT_LAYOUT_VERSION_19) { - (void) rbuf_int(&rb); // optimized_for_upgrade - } - if (node->layout_version_read_from_disk >= FT_LAYOUT_VERSION_22) { - rbuf_TXNID(&rb, &node->oldest_referenced_xid_known); - } - - // now create the basement nodes or childinfos, depending on whether this is a - // leaf node or internal node - // now the subtree_estimates - - // n_children is now in the header, nd the allocatio of the node->bp is in deserialize_ftnode_from_rbuf. - - // now the pivots - node->totalchildkeylens = 0; - if (node->n_children > 1) { - XMALLOC_N(node->n_children - 1, node->childkeys); - for (int i=0; i < node->n_children-1; i++) { - bytevec childkeyptr; - unsigned int cklen; - rbuf_bytes(&rb, &childkeyptr, &cklen); - toku_memdup_dbt(&node->childkeys[i], childkeyptr, cklen); - node->totalchildkeylens += cklen; - } - } - else { - node->childkeys = NULL; - node->totalchildkeylens = 0; - } - - // if this is an internal node, unpack the block nums, and fill in necessary fields - // of childinfo - if (node->height > 0) { - for (int i = 0; i < node->n_children; i++) { - BP_BLOCKNUM(node,i) = rbuf_blocknum(&rb); - BP_WORKDONE(node, i) = 0; - } - } - - // make sure that all the data was read - if (data_size != rb.ndone) { - dump_bad_block(rb.buf, rb.size); - abort(); - } -exit: - return r; -} - -static void -setup_available_ftnode_partition(FTNODE node, int i) { - if (node->height == 0) { - set_BLB(node, i, toku_create_empty_bn()); - BLB_MAX_MSN_APPLIED(node,i) = node->max_msn_applied_to_node_on_disk; - } - else { - set_BNC(node, i, toku_create_empty_nl()); - } -} - -// Assign the child_to_read member of the bfe from the given ftnode -// that has been brought into memory. -static void -update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe) -{ - if (bfe->type == ftnode_fetch_subset && bfe->search != NULL) { - // we do not take into account prefetching yet - // as of now, if we need a subset, the only thing - // we can possibly require is a single basement node - // we find out what basement node the query cares about - // and check if it is available - bfe->child_to_read = toku_ft_search_which_child( - &bfe->h->cmp_descriptor, - bfe->h->compare_fun, - node, - bfe->search - ); - } else if (bfe->type == ftnode_fetch_keymatch) { - // we do not take into account prefetching yet - // as of now, if we need a subset, the only thing - // we can possibly require is a single basement node - // we find out what basement node the query cares about - // and check if it is available - paranoid_invariant(bfe->h->compare_fun); - if (node->height == 0) { - int left_child = toku_bfe_leftmost_child_wanted(bfe, node); - int right_child = toku_bfe_rightmost_child_wanted(bfe, node); - if (left_child == right_child) { - bfe->child_to_read = left_child; - } - } - } -} - -// Using the search parameters in the bfe, this function will -// initialize all of the given ftnode's partitions. -static void -setup_partitions_using_bfe(FTNODE node, - struct ftnode_fetch_extra *bfe, - bool data_in_memory) -{ - // Leftmost and Rightmost Child bounds. - int lc, rc; - if (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch) { - lc = toku_bfe_leftmost_child_wanted(bfe, node); - rc = toku_bfe_rightmost_child_wanted(bfe, node); - } else { - lc = -1; - rc = -1; - } - - // - // setup memory needed for the node - // - //printf("node height %d, blocknum %" PRId64 ", type %d lc %d rc %d\n", node->height, node->thisnodename.b, bfe->type, lc, rc); - for (int i = 0; i < node->n_children; i++) { - BP_INIT_UNTOUCHED_CLOCK(node,i); - if (data_in_memory) { - BP_STATE(node, i) = ((toku_bfe_wants_child_available(bfe, i) || (lc <= i && i <= rc)) - ? PT_AVAIL : PT_COMPRESSED); - } else { - BP_STATE(node, i) = PT_ON_DISK; - } - BP_WORKDONE(node,i) = 0; - - switch (BP_STATE(node,i)) { - case PT_AVAIL: - setup_available_ftnode_partition(node, i); - BP_TOUCH_CLOCK(node,i); - break; - case PT_COMPRESSED: - set_BSB(node, i, sub_block_creat()); - break; - case PT_ON_DISK: - set_BNULL(node, i); - break; - case PT_INVALID: - abort(); - } - } -} - -static void setup_ftnode_partitions(FTNODE node, struct ftnode_fetch_extra* bfe, bool data_in_memory) -// Effect: Used when reading a ftnode into main memory, this sets up the partitions. -// We set bfe->child_to_read as well as the BP_STATE and the data pointers (e.g., with set_BSB or set_BNULL or other set_ operations). -// Arguments: Node: the node to set up. -// bfe: Describes the key range needed. -// data_in_memory: true if we have all the data (in which case we set the BP_STATE to be either PT_AVAIL or PT_COMPRESSED depending on the bfe. -// false if we don't have the partitions in main memory (in which case we set the state to PT_ON_DISK. -{ - // Set bfe->child_to_read. - update_bfe_using_ftnode(node, bfe); - - // Setup the partitions. - setup_partitions_using_bfe(node, bfe, data_in_memory); -} - -/* deserialize the partition from the sub-block's uncompressed buffer - * and destroy the uncompressed buffer - */ -static int -deserialize_ftnode_partition( - struct sub_block *sb, - FTNODE node, - int childnum, // which partition to deserialize - DESCRIPTOR desc, - ft_compare_func cmp - ) -{ - int r = 0; - r = verify_ftnode_sub_block(sb); - if (r != 0) { - goto exit; - } - uint32_t data_size; - data_size = sb->uncompressed_size - 4; // checksum is 4 bytes at end - - // now with the data verified, we can read the information into the node - struct rbuf rb; - rbuf_init(&rb, (unsigned char *) sb->uncompressed_ptr, data_size); - unsigned char ch; - ch = rbuf_char(&rb); - - if (node->height > 0) { - assert(ch == FTNODE_PARTITION_FIFO_MSG); - NONLEAF_CHILDINFO bnc = BNC(node, childnum); - if (node->layout_version_read_from_disk <= FT_LAYOUT_VERSION_26) { - // Layout version <= 26 did not serialize sorted message trees to disk. - deserialize_child_buffer_v26(bnc, &rb, desc, cmp); - } else { - deserialize_child_buffer(bnc, &rb); - } - BP_WORKDONE(node, childnum) = 0; - } - else { - assert(ch == FTNODE_PARTITION_DMT_LEAVES); - BLB_SEQINSERT(node, childnum) = 0; - uint32_t num_entries = rbuf_int(&rb); - // we are now at the first byte of first leafentry - data_size -= rb.ndone; // remaining bytes of leafentry data - - BASEMENTNODE bn = BLB(node, childnum); - bn->data_buffer.deserialize_from_rbuf(num_entries, &rb, data_size, node->layout_version_read_from_disk); - } - assert(rb.ndone == rb.size); -exit: - return r; -} - -static int -decompress_and_deserialize_worker(struct rbuf curr_rbuf, struct sub_block curr_sb, FTNODE node, int child, - DESCRIPTOR desc, ft_compare_func cmp, tokutime_t *decompress_time) -{ - int r = 0; - tokutime_t t0 = toku_time_now(); - r = read_and_decompress_sub_block(&curr_rbuf, &curr_sb); - tokutime_t t1 = toku_time_now(); - if (r == 0) { - // at this point, sb->uncompressed_ptr stores the serialized node partition - r = deserialize_ftnode_partition(&curr_sb, node, child, desc, cmp); - } - *decompress_time = t1 - t0; - - toku_free(curr_sb.uncompressed_ptr); - return r; -} - -static int -check_and_copy_compressed_sub_block_worker(struct rbuf curr_rbuf, struct sub_block curr_sb, FTNODE node, int child) -{ - int r = 0; - r = read_compressed_sub_block(&curr_rbuf, &curr_sb); - if (r != 0) { - goto exit; - } - - SUB_BLOCK bp_sb; - bp_sb = BSB(node, child); - bp_sb->compressed_size = curr_sb.compressed_size; - bp_sb->uncompressed_size = curr_sb.uncompressed_size; - bp_sb->compressed_ptr = toku_xmalloc(bp_sb->compressed_size); - memcpy(bp_sb->compressed_ptr, curr_sb.compressed_ptr, bp_sb->compressed_size); -exit: - return r; -} - -static FTNODE alloc_ftnode_for_deserialize(uint32_t fullhash, BLOCKNUM blocknum) { -// Effect: Allocate an FTNODE and fill in the values that are not read from - FTNODE XMALLOC(node); - node->fullhash = fullhash; - node->thisnodename = blocknum; - node->dirty = 0; - node->bp = nullptr; - node->oldest_referenced_xid_known = TXNID_NONE; - return node; -} - -static int -deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode, - FTNODE_DISK_DATA* ndd, - BLOCKNUM blocknum, - uint32_t fullhash, - struct ftnode_fetch_extra *bfe, - struct rbuf *rb, - int fd) -// If we have enough information in the rbuf to construct a header, then do so. -// Also fetch in the basement node if needed. -// Return 0 if it worked. If something goes wrong (including that we are looking at some old data format that doesn't have partitions) then return nonzero. -{ - int r = 0; - - tokutime_t t0, t1; - tokutime_t decompress_time = 0; - tokutime_t deserialize_time = 0; - - t0 = toku_time_now(); - - FTNODE node = alloc_ftnode_for_deserialize(fullhash, blocknum); - - if (rb->size < 24) { - // TODO: What error do we return here? - // Does it even matter? - r = toku_db_badformat(); - goto cleanup; - } - - bytevec magic; - rbuf_literal_bytes(rb, &magic, 8); - if (memcmp(magic, "tokuleaf", 8)!=0 && - memcmp(magic, "tokunode", 8)!=0) { - r = toku_db_badformat(); - goto cleanup; - } - - node->layout_version_read_from_disk = rbuf_int(rb); - if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) { - // This code path doesn't have to worry about upgrade. - r = toku_db_badformat(); - goto cleanup; - } - - // If we get here, we know the node is at least - // FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES. We haven't changed - // the serialization format since then (this comment is correct as of - // version 20, which is Deadshot) so we can go ahead and say the - // layout version is current (it will be as soon as we finish - // deserializing). - // TODO(leif): remove node->layout_version (#5174) - node->layout_version = FT_LAYOUT_VERSION; - - node->layout_version_original = rbuf_int(rb); - node->build_id = rbuf_int(rb); - node->n_children = rbuf_int(rb); - // Guaranteed to be have been able to read up to here. If n_children - // is too big, we may have a problem, so check that we won't overflow - // while reading the partition locations. - unsigned int nhsize; - nhsize = serialize_node_header_size(node); // we can do this because n_children is filled in. - unsigned int needed_size; - needed_size = nhsize + 12; // we need 12 more so that we can read the compressed block size information that follows for the nodeinfo. - if (needed_size > rb->size) { - r = toku_db_badformat(); - goto cleanup; - } - - XMALLOC_N(node->n_children, node->bp); - XMALLOC_N(node->n_children, *ndd); - // read the partition locations - for (int i=0; in_children; i++) { - BP_START(*ndd,i) = rbuf_int(rb); - BP_SIZE (*ndd,i) = rbuf_int(rb); - } - - uint32_t checksum; - checksum = toku_x1764_memory(rb->buf, rb->ndone); - uint32_t stored_checksum; - stored_checksum = rbuf_int(rb); - if (stored_checksum != checksum) { - dump_bad_block(rb->buf, rb->size); - r = TOKUDB_BAD_CHECKSUM; - goto cleanup; - } - - // Now we want to read the pivot information. - struct sub_block sb_node_info; - sub_block_init(&sb_node_info); - sb_node_info.compressed_size = rbuf_int(rb); // we'll be able to read these because we checked the size earlier. - sb_node_info.uncompressed_size = rbuf_int(rb); - if (rb->size-rb->ndone < sb_node_info.compressed_size + 8) { - r = toku_db_badformat(); - goto cleanup; - } - - // Finish reading compressed the sub_block - bytevec* cp; - cp = (bytevec*)&sb_node_info.compressed_ptr; - rbuf_literal_bytes(rb, cp, sb_node_info.compressed_size); - sb_node_info.xsum = rbuf_int(rb); - // let's check the checksum - uint32_t actual_xsum; - actual_xsum = toku_x1764_memory((char *)sb_node_info.compressed_ptr-8, 8+sb_node_info.compressed_size); - if (sb_node_info.xsum != actual_xsum) { - r = TOKUDB_BAD_CHECKSUM; - goto cleanup; - } - - // Now decompress the subblock - sb_node_info.uncompressed_ptr = toku_xmalloc(sb_node_info.uncompressed_size); - { - tokutime_t decompress_t0 = toku_time_now(); - toku_decompress( - (Bytef *) sb_node_info.uncompressed_ptr, - sb_node_info.uncompressed_size, - (Bytef *) sb_node_info.compressed_ptr, - sb_node_info.compressed_size - ); - tokutime_t decompress_t1 = toku_time_now(); - decompress_time = decompress_t1 - decompress_t0; - } - - // at this point sb->uncompressed_ptr stores the serialized node info. - r = deserialize_ftnode_info(&sb_node_info, node); - if (r != 0) { - goto cleanup; - } - - toku_free(sb_node_info.uncompressed_ptr); - sb_node_info.uncompressed_ptr = NULL; - - // Now we have the ftnode_info. We have a bunch more stuff in the - // rbuf, so we might be able to store the compressed data for some - // objects. - // We can proceed to deserialize the individual subblocks. - paranoid_invariant(is_valid_ftnode_fetch_type(bfe->type)); - - // setup the memory of the partitions - // for partitions being decompressed, create either FIFO or basement node - // for partitions staying compressed, create sub_block - setup_ftnode_partitions(node, bfe, false); - - // We must capture deserialize and decompression time before - // the pf_callback, otherwise we would double-count. - t1 = toku_time_now(); - deserialize_time = (t1 - t0) - decompress_time; - - // do partial fetch if necessary - if (bfe->type != ftnode_fetch_none) { - PAIR_ATTR attr; - r = toku_ftnode_pf_callback(node, *ndd, bfe, fd, &attr); - if (r != 0) { - goto cleanup; - } - } - - // handle clock - for (int i = 0; i < node->n_children; i++) { - if (toku_bfe_wants_child_available(bfe, i)) { - paranoid_invariant(BP_STATE(node,i) == PT_AVAIL); - BP_TOUCH_CLOCK(node,i); - } - } - *ftnode = node; - r = 0; - -cleanup: - if (r == 0) { - bfe->deserialize_time += deserialize_time; - bfe->decompress_time += decompress_time; - toku_ft_status_update_deserialize_times(node, deserialize_time, decompress_time); - } - if (r != 0) { - if (node) { - toku_free(*ndd); - toku_free(node->bp); - toku_free(node); - } - } - return r; -} - -// This function takes a deserialized version 13 or 14 buffer and -// constructs the associated internal, non-leaf ftnode object. It -// also creates MSN's for older messages created in older versions -// that did not generate MSN's for messages. These new MSN's are -// generated from the root downwards, counting backwards from MIN_MSN -// and persisted in the ft header. -static int -deserialize_and_upgrade_internal_node(FTNODE node, - struct rbuf *rb, - struct ftnode_fetch_extra* bfe, - STAT64INFO info) -{ - int r = 0; - int version = node->layout_version_read_from_disk; - - if(version == FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT) { - (void) rbuf_int(rb); // 10. fingerprint - } - - node->n_children = rbuf_int(rb); // 11. n_children - - // Sub-tree esitmates... - for (int i = 0; i < node->n_children; ++i) { - if (version == FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT) { - (void) rbuf_int(rb); // 12. fingerprint - } - uint64_t nkeys = rbuf_ulonglong(rb); // 13. nkeys - uint64_t ndata = rbuf_ulonglong(rb); // 14. ndata - uint64_t dsize = rbuf_ulonglong(rb); // 15. dsize - (void) rbuf_char(rb); // 16. exact (char) - invariant(nkeys == ndata); - if (info) { - // info is non-null if we're trying to upgrade old subtree - // estimates to stat64info - info->numrows += nkeys; - info->numbytes += dsize; - } - } - - node->childkeys = NULL; - node->totalchildkeylens = 0; - // I. Allocate keys based on number of children. - XMALLOC_N(node->n_children - 1, node->childkeys); - // II. Copy keys from buffer to allocated keys in ftnode. - for (int i = 0; i < node->n_children - 1; ++i) { - bytevec childkeyptr; - unsigned int cklen; - rbuf_bytes(rb, &childkeyptr, &cklen); // 17. child key pointers - toku_memdup_dbt(&node->childkeys[i], childkeyptr, cklen); - node->totalchildkeylens += cklen; - } - - // Create space for the child node buffers (a.k.a. partitions). - XMALLOC_N(node->n_children, node->bp); - - // Set the child blocknums. - for (int i = 0; i < node->n_children; ++i) { - BP_BLOCKNUM(node, i) = rbuf_blocknum(rb); // 18. blocknums - BP_WORKDONE(node, i) = 0; - } - - // Read in the child buffer maps. - struct sub_block_map child_buffer_map[node->n_children]; - for (int i = 0; i < node->n_children; ++i) { - // The following fields are read in the - // sub_block_map_deserialize() call: - // 19. index 20. offset 21. size - sub_block_map_deserialize(&child_buffer_map[i], rb); - } - - // We need to setup this node's partitions, but we can't call the - // existing call (setup_ftnode_paritions.) because there are - // existing optimizations that would prevent us from bringing all - // of this node's partitions into memory. Instead, We use the - // existing bfe and node to set the bfe's child_to_search member. - // Then we create a temporary bfe that needs all the nodes to make - // sure we properly intitialize our partitions before filling them - // in from our soon-to-be-upgraded node. - update_bfe_using_ftnode(node, bfe); - struct ftnode_fetch_extra temp_bfe; - temp_bfe.type = ftnode_fetch_all; - setup_partitions_using_bfe(node, &temp_bfe, true); - - // Cache the highest MSN generated for the message buffers. This - // will be set in the ftnode. - // - // The way we choose MSNs for upgraded messages is delicate. The - // field `highest_unused_msn_for_upgrade' in the header is always an - // MSN that no message has yet. So when we have N messages that need - // MSNs, we decrement it by N, and then use it and the N-1 MSNs less - // than it, but we do not use the value we decremented it to. - // - // In the code below, we initialize `lowest' with the value of - // `highest_unused_msn_for_upgrade' after it is decremented, so we - // need to be sure to increment it once before we enqueue our first - // message. - MSN highest_msn; - highest_msn.msn = 0; - - // Deserialize de-compressed buffers. - for (int i = 0; i < node->n_children; ++i) { - NONLEAF_CHILDINFO bnc = BNC(node, i); - int n_in_this_buffer = rbuf_int(rb); // 22. node count - - int32_t *fresh_offsets = NULL; - int32_t *broadcast_offsets = NULL; - int nfresh = 0; - int nbroadcast_offsets = 0; - - if (bfe->h->compare_fun) { - XMALLOC_N(n_in_this_buffer, fresh_offsets); - // We skip 'stale' offsets for upgraded nodes. - XMALLOC_N(n_in_this_buffer, broadcast_offsets); - } - - // Atomically decrement the header's MSN count by the number - // of messages in the buffer. - MSN lowest; - uint64_t amount = n_in_this_buffer; - lowest.msn = toku_sync_sub_and_fetch(&bfe->h->h->highest_unused_msn_for_upgrade.msn, amount); - if (highest_msn.msn == 0) { - highest_msn.msn = lowest.msn + n_in_this_buffer; - } - - // Create the FIFO entires from the deserialized buffer. - for (int j = 0; j < n_in_this_buffer; ++j) { - bytevec key; ITEMLEN keylen; - bytevec val; ITEMLEN vallen; - unsigned char ctype = rbuf_char(rb); // 23. message type - enum ft_msg_type type = (enum ft_msg_type) ctype; - XIDS xids; - xids_create_from_buffer(rb, &xids); // 24. XID - rbuf_bytes(rb, &key, &keylen); // 25. key - rbuf_bytes(rb, &val, &vallen); // 26. value - - // can we factor this out? - int32_t *dest; - if (bfe->h->compare_fun) { - if (ft_msg_type_applies_once(type)) { - dest = &fresh_offsets[nfresh]; - nfresh++; - } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) { - dest = &broadcast_offsets[nbroadcast_offsets]; - nbroadcast_offsets++; - } else { - abort(); - } - } else { - dest = NULL; - } - - // Increment our MSN, the last message should have the - // newest/highest MSN. See above for a full explanation. - lowest.msn++; - r = toku_fifo_enq(bnc->buffer, - key, - keylen, - val, - vallen, - type, - lowest, - xids, - true, - dest); - lazy_assert_zero(r); - xids_destroy(&xids); - } - - if (bfe->h->compare_fun) { - struct toku_fifo_entry_key_msn_cmp_extra extra = { .desc = &bfe->h->cmp_descriptor, - .cmp = bfe->h->compare_fun, - .fifo = bnc->buffer }; - typedef toku::sort key_msn_sort; - r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra); - assert_zero(r); - bnc->fresh_message_tree.destroy(); - bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer); - bnc->broadcast_list.destroy(); - bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer); - } - } - - // Assign the highest msn from our upgrade message FIFO queues. - node->max_msn_applied_to_node_on_disk = highest_msn; - // Since we assigned MSNs to this node's messages, we need to dirty it. - node->dirty = 1; - - // Must compute the checksum now (rather than at the end, while we - // still have the pointer to the buffer). - if (version >= FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) { - uint32_t expected_xsum = toku_dtoh32(*(uint32_t*)(rb->buf+rb->size-4)); // 27. checksum - uint32_t actual_xsum = toku_x1764_memory(rb->buf, rb->size-4); - if (expected_xsum != actual_xsum) { - fprintf(stderr, "%s:%d: Bad checksum: expected = %" PRIx32 ", actual= %" PRIx32 "\n", - __FUNCTION__, - __LINE__, - expected_xsum, - actual_xsum); - fprintf(stderr, - "Checksum failure while reading node in file %s.\n", - toku_cachefile_fname_in_env(bfe->h->cf)); - fflush(stderr); - return toku_db_badformat(); - } - } - - return r; -} - -// This function takes a deserialized version 13 or 14 buffer and -// constructs the associated leaf ftnode object. -static int -deserialize_and_upgrade_leaf_node(FTNODE node, - struct rbuf *rb, - struct ftnode_fetch_extra* bfe, - STAT64INFO info) -{ - int r = 0; - int version = node->layout_version_read_from_disk; - - // This is a leaf node, so the offsets in the buffer will be - // different from the internal node offsets above. - uint64_t nkeys = rbuf_ulonglong(rb); // 10. nkeys - uint64_t ndata = rbuf_ulonglong(rb); // 11. ndata - uint64_t dsize = rbuf_ulonglong(rb); // 12. dsize - invariant(nkeys == ndata); - if (info) { - // info is non-null if we're trying to upgrade old subtree - // estimates to stat64info - info->numrows += nkeys; - info->numbytes += dsize; - } - - // This is the optimized for upgrade field. - if (version == FT_LAYOUT_VERSION_14) { - (void) rbuf_int(rb); // 13. optimized - } - - // npartitions - This is really the number of leaf entries in - // our single basement node. There should only be 1 (ONE) - // partition, so there shouldn't be any pivot key stored. This - // means the loop will not iterate. We could remove the loop and - // assert that the value is indeed 1. - int npartitions = rbuf_int(rb); // 14. npartitions - assert(npartitions == 1); - - // Set number of children to 1, since we will only have one - // basement node. - node->n_children = 1; - XMALLOC_N(node->n_children, node->bp); - // This is a malloc(0), but we need to do it in order to get a pointer - // we can free() later. - XMALLOC_N(node->n_children - 1, node->childkeys); - node->totalchildkeylens = 0; - - // Create one basement node to contain all the leaf entries by - // setting up the single partition and updating the bfe. - update_bfe_using_ftnode(node, bfe); - struct ftnode_fetch_extra temp_bfe; - fill_bfe_for_full_read(&temp_bfe, bfe->h); - setup_partitions_using_bfe(node, &temp_bfe, true); - - // 11. Deserialize the partition maps, though they are not used in the - // newer versions of ftnodes. - struct sub_block_map part_map[npartitions]; - for (int i = 0; i < npartitions; ++i) { - sub_block_map_deserialize(&part_map[i], rb); - } - - // Copy all of the leaf entries into the single basement node. - - // The number of leaf entries in buffer. - int n_in_buf = rbuf_int(rb); // 15. # of leaves - BLB_SEQINSERT(node,0) = 0; - BASEMENTNODE bn = BLB(node, 0); - - // Read the leaf entries from the buffer, advancing the buffer - // as we go. - bool has_end_to_end_checksum = (version >= FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM); - if (version <= FT_LAYOUT_VERSION_13) { - // Create our mempool. - // Loop through - for (int i = 0; i < n_in_buf; ++i) { - LEAFENTRY_13 le = reinterpret_cast(&rb->buf[rb->ndone]); - uint32_t disksize = leafentry_disksize_13(le); - rb->ndone += disksize; // 16. leaf entry (13) - invariant(rb->ndone<=rb->size); - LEAFENTRY new_le; - size_t new_le_size; - void* key = NULL; - uint32_t keylen = 0; - r = toku_le_upgrade_13_14(le, - &key, - &keylen, - &new_le_size, - &new_le); - assert_zero(r); - // Copy the pointer value straight into the OMT - LEAFENTRY new_le_in_bn = nullptr; - void *maybe_free; - bn->data_buffer.get_space_for_insert( - i, - key, - keylen, - new_le_size, - &new_le_in_bn, - &maybe_free - ); - if (maybe_free) { - toku_free(maybe_free); - } - memcpy(new_le_in_bn, new_le, new_le_size); - toku_free(new_le); - } - } else { - uint32_t data_size = rb->size - rb->ndone; - if (has_end_to_end_checksum) { - data_size -= sizeof(uint32_t); - } - bn->data_buffer.deserialize_from_rbuf(n_in_buf, rb, data_size, node->layout_version_read_from_disk); - } - - // Whatever this is must be less than the MSNs of every message above - // it, so it's ok to take it here. - bn->max_msn_applied = bfe->h->h->highest_unused_msn_for_upgrade; - bn->stale_ancestor_messages_applied = false; - node->max_msn_applied_to_node_on_disk = bn->max_msn_applied; - - // Checksum (end to end) is only on version 14 - if (has_end_to_end_checksum) { - uint32_t expected_xsum = rbuf_int(rb); // 17. checksum - uint32_t actual_xsum = toku_x1764_memory(rb->buf, rb->size - 4); - if (expected_xsum != actual_xsum) { - fprintf(stderr, "%s:%d: Bad checksum: expected = %" PRIx32 ", actual= %" PRIx32 "\n", - __FUNCTION__, - __LINE__, - expected_xsum, - actual_xsum); - fprintf(stderr, - "Checksum failure while reading node in file %s.\n", - toku_cachefile_fname_in_env(bfe->h->cf)); - fflush(stderr); - return toku_db_badformat(); - } - } - - // We should have read the whole block by this point. - if (rb->ndone != rb->size) { - // TODO: Error handling. - return 1; - } - - return r; -} - -static int -read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, - DISKOFF offset, DISKOFF size, - FT h, - struct rbuf *rb, - /* out */ int *layout_version_p); - -// This function upgrades a version 14 or 13 ftnode to the current -// verison. NOTE: This code assumes the first field of the rbuf has -// already been read from the buffer (namely the layout_version of the -// ftnode.) -static int -deserialize_and_upgrade_ftnode(FTNODE node, - FTNODE_DISK_DATA* ndd, - BLOCKNUM blocknum, - struct ftnode_fetch_extra* bfe, - STAT64INFO info, - int fd) -{ - int r = 0; - int version; - - // I. First we need to de-compress the entire node, only then can - // we read the different sub-sections. - // get the file offset and block size for the block - DISKOFF offset, size; - toku_translate_blocknum_to_offset_size(bfe->h->blocktable, - blocknum, - &offset, - &size); - struct rbuf rb; - r = read_and_decompress_block_from_fd_into_rbuf(fd, - blocknum, - offset, - size, - bfe->h, - &rb, - &version); - if (r != 0) { - goto exit; - } - - // Re-read the magic field from the previous call, since we are - // restarting with a fresh rbuf. - { - bytevec magic; - rbuf_literal_bytes(&rb, &magic, 8); // 1. magic - } - - // II. Start reading ftnode fields out of the decompressed buffer. - - // Copy over old version info. - node->layout_version_read_from_disk = rbuf_int(&rb); // 2. layout version - version = node->layout_version_read_from_disk; - assert(version <= FT_LAYOUT_VERSION_14); - // Upgrade the current version number to the current version. - node->layout_version = FT_LAYOUT_VERSION; - - node->layout_version_original = rbuf_int(&rb); // 3. original layout - node->build_id = rbuf_int(&rb); // 4. build id - - // The remaining offsets into the rbuf do not map to the current - // version, so we need to fill in the blanks and ignore older - // fields. - (void)rbuf_int(&rb); // 5. nodesize - node->flags = rbuf_int(&rb); // 6. flags - node->height = rbuf_int(&rb); // 7. height - - // If the version is less than 14, there are two extra ints here. - // we would need to ignore them if they are there. - // These are the 'fingerprints'. - if (version == FT_LAYOUT_VERSION_13) { - (void) rbuf_int(&rb); // 8. rand4 - (void) rbuf_int(&rb); // 9. local - } - - // The next offsets are dependent on whether this is a leaf node - // or not. - - // III. Read in Leaf and Internal Node specific data. - - // Check height to determine whether this is a leaf node or not. - if (node->height > 0) { - r = deserialize_and_upgrade_internal_node(node, &rb, bfe, info); - } else { - r = deserialize_and_upgrade_leaf_node(node, &rb, bfe, info); - } - - XMALLOC_N(node->n_children, *ndd); - // Initialize the partition locations to zero, because version 14 - // and below have no notion of partitions on disk. - for (int i=0; in_children; i++) { - BP_START(*ndd,i) = 0; - BP_SIZE (*ndd,i) = 0; - } - - toku_free(rb.buf); -exit: - return r; -} - -static int -deserialize_ftnode_from_rbuf( - FTNODE *ftnode, - FTNODE_DISK_DATA* ndd, - BLOCKNUM blocknum, - uint32_t fullhash, - struct ftnode_fetch_extra* bfe, - STAT64INFO info, - struct rbuf *rb, - int fd - ) -// Effect: deserializes a ftnode that is in rb (with pointer of rb just past the magic) into a FTNODE. -{ - int r = 0; - struct sub_block sb_node_info; - - tokutime_t t0, t1; - tokutime_t decompress_time = 0; - tokutime_t deserialize_time = 0; - - t0 = toku_time_now(); - - FTNODE node = alloc_ftnode_for_deserialize(fullhash, blocknum); - - // now start reading from rbuf - // first thing we do is read the header information - bytevec magic; - rbuf_literal_bytes(rb, &magic, 8); - if (memcmp(magic, "tokuleaf", 8)!=0 && - memcmp(magic, "tokunode", 8)!=0) { - r = toku_db_badformat(); - goto cleanup; - } - - node->layout_version_read_from_disk = rbuf_int(rb); - lazy_assert(node->layout_version_read_from_disk >= FT_LAYOUT_MIN_SUPPORTED_VERSION); - - // Check if we are reading in an older node version. - if (node->layout_version_read_from_disk <= FT_LAYOUT_VERSION_14) { - int version = node->layout_version_read_from_disk; - // Perform the upgrade. - r = deserialize_and_upgrade_ftnode(node, ndd, blocknum, bfe, info, fd); - if (r != 0) { - goto cleanup; - } - - if (version <= FT_LAYOUT_VERSION_13) { - // deprecate 'TOKU_DB_VALCMP_BUILTIN'. just remove the flag - node->flags &= ~TOKU_DB_VALCMP_BUILTIN_13; - } - - // If everything is ok, just re-assign the ftnode and retrn. - *ftnode = node; - r = 0; - goto cleanup; - } - - // Upgrade versions after 14 to current. This upgrade is trivial, it - // removes the optimized for upgrade field, which has already been - // removed in the deserialization code (see - // deserialize_ftnode_info()). - node->layout_version = FT_LAYOUT_VERSION; - node->layout_version_original = rbuf_int(rb); - node->build_id = rbuf_int(rb); - node->n_children = rbuf_int(rb); - XMALLOC_N(node->n_children, node->bp); - XMALLOC_N(node->n_children, *ndd); - // read the partition locations - for (int i=0; in_children; i++) { - BP_START(*ndd,i) = rbuf_int(rb); - BP_SIZE (*ndd,i) = rbuf_int(rb); - } - // verify checksum of header stored - uint32_t checksum; - checksum = toku_x1764_memory(rb->buf, rb->ndone); - uint32_t stored_checksum; - stored_checksum = rbuf_int(rb); - if (stored_checksum != checksum) { - dump_bad_block(rb->buf, rb->size); - invariant(stored_checksum == checksum); - } - - // now we read and decompress the pivot and child information - sub_block_init(&sb_node_info); - { - tokutime_t sb_decompress_t0 = toku_time_now(); - r = read_and_decompress_sub_block(rb, &sb_node_info); - tokutime_t sb_decompress_t1 = toku_time_now(); - decompress_time += sb_decompress_t1 - sb_decompress_t0; - } - if (r != 0) { - goto cleanup; - } - - // at this point, sb->uncompressed_ptr stores the serialized node info - r = deserialize_ftnode_info(&sb_node_info, node); - if (r != 0) { - goto cleanup; - } - toku_free(sb_node_info.uncompressed_ptr); - - // now that the node info has been deserialized, we can proceed to deserialize - // the individual sub blocks - paranoid_invariant(is_valid_ftnode_fetch_type(bfe->type)); - - // setup the memory of the partitions - // for partitions being decompressed, create either FIFO or basement node - // for partitions staying compressed, create sub_block - setup_ftnode_partitions(node, bfe, true); - - // This loop is parallelizeable, since we don't have a dependency on the work done so far. - for (int i = 0; i < node->n_children; i++) { - uint32_t curr_offset = BP_START(*ndd,i); - uint32_t curr_size = BP_SIZE(*ndd,i); - // the compressed, serialized partitions start at where rb is currently pointing, - // which would be rb->buf + rb->ndone - // we need to intialize curr_rbuf to point to this place - struct rbuf curr_rbuf = {.buf = NULL, .size = 0, .ndone = 0}; - rbuf_init(&curr_rbuf, rb->buf + curr_offset, curr_size); - - // - // now we are at the point where we have: - // - read the entire compressed node off of disk, - // - decompressed the pivot and offset information, - // - have arrived at the individual partitions. - // - // Based on the information in bfe, we want to decompress a subset of - // of the compressed partitions (also possibly none or possibly all) - // The partitions that we want to decompress and make available - // to the node, we do, the rest we simply copy in compressed - // form into the node, and set the state of the partition to PT_COMPRESSED - // - - struct sub_block curr_sb; - sub_block_init(&curr_sb); - - // curr_rbuf is passed by value to decompress_and_deserialize_worker, so there's no ugly race condition. - // This would be more obvious if curr_rbuf were an array. - - // deserialize_ftnode_info figures out what the state - // should be and sets up the memory so that we are ready to use it - - switch (BP_STATE(node,i)) { - case PT_AVAIL: { - // case where we read and decompress the partition - tokutime_t partition_decompress_time; - r = decompress_and_deserialize_worker(curr_rbuf, curr_sb, node, i, - &bfe->h->cmp_descriptor, bfe->h->compare_fun, &partition_decompress_time); - decompress_time += partition_decompress_time; - if (r != 0) { - goto cleanup; - } - break; - } - case PT_COMPRESSED: - // case where we leave the partition in the compressed state - r = check_and_copy_compressed_sub_block_worker(curr_rbuf, curr_sb, node, i); - if (r != 0) { - goto cleanup; - } - break; - case PT_INVALID: // this is really bad - case PT_ON_DISK: // it's supposed to be in memory. - abort(); - } - } - *ftnode = node; - r = 0; - -cleanup: - if (r == 0) { - t1 = toku_time_now(); - deserialize_time = (t1 - t0) - decompress_time; - bfe->deserialize_time += deserialize_time; - bfe->decompress_time += decompress_time; - toku_ft_status_update_deserialize_times(node, deserialize_time, decompress_time); - } - if (r != 0) { - // NOTE: Right now, callers higher in the stack will assert on - // failure, so this is OK for production. However, if we - // create tools that use this function to search for errors in - // the FT, then we will leak memory. - if (node) { - toku_free(node); - } - } - return r; -} - -int -toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe) { - int r = 0; - assert(BP_STATE(node,childnum) == PT_ON_DISK); - assert(node->bp[childnum].ptr.tag == BCT_NULL); - - // - // setup the partition - // - setup_available_ftnode_partition(node, childnum); - BP_STATE(node,childnum) = PT_AVAIL; - - // - // read off disk and make available in memory - // - // get the file offset and block size for the block - DISKOFF node_offset, total_node_disk_size; - toku_translate_blocknum_to_offset_size( - bfe->h->blocktable, - node->thisnodename, - &node_offset, - &total_node_disk_size - ); - - uint32_t curr_offset = BP_START(ndd, childnum); - uint32_t curr_size = BP_SIZE (ndd, childnum); - struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0}; - - uint32_t pad_at_beginning = (node_offset+curr_offset)%512; - uint32_t padded_size = roundup_to_multiple(512, pad_at_beginning + curr_size); - - toku::scoped_malloc_aligned raw_block_buf(padded_size, 512); - uint8_t *raw_block = reinterpret_cast(raw_block_buf.get()); - rbuf_init(&rb, pad_at_beginning+raw_block, curr_size); - tokutime_t t0 = toku_time_now(); - - // read the block - assert(0==((unsigned long long)raw_block)%512); // for O_DIRECT - assert(0==(padded_size)%512); - assert(0==(node_offset+curr_offset-pad_at_beginning)%512); - ssize_t rlen = toku_os_pread(fd, raw_block, padded_size, node_offset+curr_offset-pad_at_beginning); - assert((DISKOFF)rlen >= pad_at_beginning + curr_size); // we read in at least enough to get what we wanted - assert((DISKOFF)rlen <= padded_size); // we didn't read in too much. - - tokutime_t t1 = toku_time_now(); - - // read sub block - struct sub_block curr_sb; - sub_block_init(&curr_sb); - r = read_compressed_sub_block(&rb, &curr_sb); - if (r != 0) { - return r; - } - invariant(curr_sb.compressed_ptr != NULL); - - // decompress - toku::scoped_malloc uncompressed_buf(curr_sb.uncompressed_size); - curr_sb.uncompressed_ptr = uncompressed_buf.get(); - toku_decompress((Bytef *) curr_sb.uncompressed_ptr, curr_sb.uncompressed_size, - (Bytef *) curr_sb.compressed_ptr, curr_sb.compressed_size); - - // deserialize - tokutime_t t2 = toku_time_now(); - - r = deserialize_ftnode_partition(&curr_sb, node, childnum, &bfe->h->cmp_descriptor, bfe->h->compare_fun); - - tokutime_t t3 = toku_time_now(); - - // capture stats - tokutime_t io_time = t1 - t0; - tokutime_t decompress_time = t2 - t1; - tokutime_t deserialize_time = t3 - t2; - bfe->deserialize_time += deserialize_time; - bfe->decompress_time += decompress_time; - toku_ft_status_update_deserialize_times(node, deserialize_time, decompress_time); - - bfe->bytes_read = rlen; - bfe->io_time = io_time; - - return r; -} - -// Take a ftnode partition that is in the compressed state, and make it avail -int -toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe) { - int r = 0; - assert(BP_STATE(node, childnum) == PT_COMPRESSED); - SUB_BLOCK curr_sb = BSB(node, childnum); - - toku::scoped_malloc uncompressed_buf(curr_sb->uncompressed_size); - assert(curr_sb->uncompressed_ptr == NULL); - curr_sb->uncompressed_ptr = uncompressed_buf.get(); - - setup_available_ftnode_partition(node, childnum); - BP_STATE(node,childnum) = PT_AVAIL; - - // decompress the sub_block - tokutime_t t0 = toku_time_now(); - - toku_decompress( - (Bytef *) curr_sb->uncompressed_ptr, - curr_sb->uncompressed_size, - (Bytef *) curr_sb->compressed_ptr, - curr_sb->compressed_size - ); - - tokutime_t t1 = toku_time_now(); - - r = deserialize_ftnode_partition(curr_sb, node, childnum, &bfe->h->cmp_descriptor, bfe->h->compare_fun); - - tokutime_t t2 = toku_time_now(); - - tokutime_t decompress_time = t1 - t0; - tokutime_t deserialize_time = t2 - t1; - bfe->deserialize_time += deserialize_time; - bfe->decompress_time += decompress_time; - toku_ft_status_update_deserialize_times(node, deserialize_time, decompress_time); - - toku_free(curr_sb->compressed_ptr); - toku_free(curr_sb); - return r; -} - -static int -deserialize_ftnode_from_fd(int fd, - BLOCKNUM blocknum, - uint32_t fullhash, - FTNODE *ftnode, - FTNODE_DISK_DATA *ndd, - struct ftnode_fetch_extra *bfe, - STAT64INFO info) -{ - struct rbuf rb = RBUF_INITIALIZER; - - tokutime_t t0 = toku_time_now(); - read_block_from_fd_into_rbuf(fd, blocknum, bfe->h, &rb); - tokutime_t t1 = toku_time_now(); - - // Decompress and deserialize the ftnode. Time statistics - // are taken inside this function. - int r = deserialize_ftnode_from_rbuf(ftnode, ndd, blocknum, fullhash, bfe, info, &rb, fd); - if (r != 0) { - dump_bad_block(rb.buf,rb.size); - } - - bfe->bytes_read = rb.size; - bfe->io_time = t1 - t0; - toku_free(rb.buf); - return r; -} - -// Read ftnode from file into struct. Perform version upgrade if necessary. -int -toku_deserialize_ftnode_from (int fd, - BLOCKNUM blocknum, - uint32_t fullhash, - FTNODE *ftnode, - FTNODE_DISK_DATA* ndd, - struct ftnode_fetch_extra* bfe - ) -// Effect: Read a node in. If possible, read just the header. -{ - int r = 0; - struct rbuf rb = RBUF_INITIALIZER; - - // each function below takes the appropriate io/decompression/deserialize statistics - - if (!bfe->read_all_partitions) { - read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->h, &rb, bfe); - r = deserialize_ftnode_header_from_rbuf_if_small_enough(ftnode, ndd, blocknum, fullhash, bfe, &rb, fd); - } else { - // force us to do it the old way - r = -1; - } - if (r != 0) { - // Something went wrong, go back to doing it the old way. - r = deserialize_ftnode_from_fd(fd, blocknum, fullhash, ftnode, ndd, bfe, NULL); - } - - toku_free(rb.buf); - return r; -} - -void -toku_verify_or_set_counts(FTNODE UU(node)) { -} - -int -toku_db_badformat(void) { - return DB_BADFORMAT; -} - -static size_t -serialize_rollback_log_size(ROLLBACK_LOG_NODE log) { - size_t size = node_header_overhead //8 "tokuroll", 4 version, 4 version_original, 4 build_id - +16 //TXNID_PAIR - +8 //sequence - +8 //blocknum - +8 //previous (blocknum) - +8 //resident_bytecount - +8 //memarena size - +log->rollentry_resident_bytecount; - return size; -} - -static void -serialize_rollback_log_node_to_buf(ROLLBACK_LOG_NODE log, char *buf, size_t calculated_size, int UU(n_sub_blocks), struct sub_block UU(sub_block[])) { - struct wbuf wb; - wbuf_init(&wb, buf, calculated_size); - { //Serialize rollback log to local wbuf - wbuf_nocrc_literal_bytes(&wb, "tokuroll", 8); - lazy_assert(log->layout_version == FT_LAYOUT_VERSION); - wbuf_nocrc_int(&wb, log->layout_version); - wbuf_nocrc_int(&wb, log->layout_version_original); - wbuf_nocrc_uint(&wb, BUILD_ID); - wbuf_nocrc_TXNID_PAIR(&wb, log->txnid); - wbuf_nocrc_ulonglong(&wb, log->sequence); - wbuf_nocrc_BLOCKNUM(&wb, log->blocknum); - wbuf_nocrc_BLOCKNUM(&wb, log->previous); - wbuf_nocrc_ulonglong(&wb, log->rollentry_resident_bytecount); - //Write down memarena size needed to restore - wbuf_nocrc_ulonglong(&wb, toku_memarena_total_size_in_use(log->rollentry_arena)); - - { - //Store rollback logs - struct roll_entry *item; - size_t done_before = wb.ndone; - for (item = log->newest_logentry; item; item = item->prev) { - toku_logger_rollback_wbuf_nocrc_write(&wb, item); - } - lazy_assert(done_before + log->rollentry_resident_bytecount == wb.ndone); - } - } - lazy_assert(wb.ndone == wb.size); - lazy_assert(calculated_size==wb.ndone); -} - -static void -serialize_uncompressed_block_to_memory(char * uncompressed_buf, - int n_sub_blocks, - struct sub_block sub_block[/*n_sub_blocks*/], - enum toku_compression_method method, - /*out*/ size_t *n_bytes_to_write, - /*out*/ char **bytes_to_write) -// Guarantees that the malloc'd BYTES_TO_WRITE is 512-byte aligned (so that O_DIRECT will work) -{ - // allocate space for the compressed uncompressed_buf - size_t compressed_len = get_sum_compressed_size_bound(n_sub_blocks, sub_block, method); - size_t sub_block_header_len = sub_block_header_size(n_sub_blocks); - size_t header_len = node_header_overhead + sub_block_header_len + sizeof (uint32_t); // node + sub_block + checksum - char *XMALLOC_N_ALIGNED(512, roundup_to_multiple(512, header_len + compressed_len), compressed_buf); - - // copy the header - memcpy(compressed_buf, uncompressed_buf, node_header_overhead); - if (0) printf("First 4 bytes before compressing data are %02x%02x%02x%02x\n", - uncompressed_buf[node_header_overhead], uncompressed_buf[node_header_overhead+1], - uncompressed_buf[node_header_overhead+2], uncompressed_buf[node_header_overhead+3]); - - // compress all of the sub blocks - char *uncompressed_ptr = uncompressed_buf + node_header_overhead; - char *compressed_ptr = compressed_buf + header_len; - compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block, uncompressed_ptr, compressed_ptr, num_cores, ft_pool, method); - - //if (0) printf("Block %" PRId64 " Size before compressing %u, after compression %" PRIu64 "\n", blocknum.b, calculated_size-node_header_overhead, (uint64_t) compressed_len); - - // serialize the sub block header - uint32_t *ptr = (uint32_t *)(compressed_buf + node_header_overhead); - *ptr++ = toku_htod32(n_sub_blocks); - for (int i=0; ilen = calculated_size; - serialized->n_sub_blocks = 0; - // choose sub block parameters - int sub_block_size = 0; - size_t data_size = calculated_size - node_header_overhead; - choose_sub_block_size(data_size, max_sub_blocks, &sub_block_size, &serialized->n_sub_blocks); - lazy_assert(0 < serialized->n_sub_blocks && serialized->n_sub_blocks <= max_sub_blocks); - lazy_assert(sub_block_size > 0); - - // set the initial sub block size for all of the sub blocks - for (int i = 0; i < serialized->n_sub_blocks; i++) - sub_block_init(&serialized->sub_block[i]); - set_all_sub_block_sizes(data_size, sub_block_size, serialized->n_sub_blocks, serialized->sub_block); - - // allocate space for the serialized node - XMALLOC_N(calculated_size, serialized->data); - // serialize the node into buf - serialize_rollback_log_node_to_buf(log, serialized->data, calculated_size, serialized->n_sub_blocks, serialized->sub_block); - serialized->blocknum = log->blocknum; -} - -int -toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized, - FT h, bool for_checkpoint) { - size_t n_to_write; - char *compressed_buf; - struct serialized_rollback_log_node serialized_local; - - if (is_serialized) { - invariant_null(log); - } else { - invariant_null(serialized_log); - serialized_log = &serialized_local; - toku_serialize_rollback_log_to_memory_uncompressed(log, serialized_log); - } - BLOCKNUM blocknum = serialized_log->blocknum; - - //Compress and malloc buffer to write - serialize_uncompressed_block_to_memory(serialized_log->data, - serialized_log->n_sub_blocks, serialized_log->sub_block, - h->h->compression_method, &n_to_write, &compressed_buf); - - { - lazy_assert(blocknum.b>=0); - DISKOFF offset; - toku_blocknum_realloc_on_disk(h->blocktable, blocknum, n_to_write, &offset, - h, fd, for_checkpoint); //dirties h - toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); - } - toku_free(compressed_buf); - if (!is_serialized) { - toku_static_serialized_rollback_log_destroy(&serialized_local); - log->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction. - } - return 0; -} - -static int -deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p, struct rbuf *rb) { - ROLLBACK_LOG_NODE MALLOC(result); - int r; - if (result==NULL) { - r=get_error_errno(); - if (0) { died0: toku_free(result); } - return r; - } - - //printf("Deserializing %lld datasize=%d\n", off, datasize); - bytevec magic; - rbuf_literal_bytes(rb, &magic, 8); - lazy_assert(!memcmp(magic, "tokuroll", 8)); - - result->layout_version = rbuf_int(rb); - lazy_assert(result->layout_version == FT_LAYOUT_VERSION); - result->layout_version_original = rbuf_int(rb); - result->layout_version_read_from_disk = result->layout_version; - result->build_id = rbuf_int(rb); - result->dirty = false; - //TODO: Maybe add descriptor (or just descriptor version) here eventually? - //TODO: This is hard.. everything is shared in a single dictionary. - rbuf_TXNID_PAIR(rb, &result->txnid); - result->sequence = rbuf_ulonglong(rb); - result->blocknum = rbuf_blocknum(rb); - if (result->blocknum.b != blocknum.b) { - r = toku_db_badformat(); - goto died0; - } - result->previous = rbuf_blocknum(rb); - result->rollentry_resident_bytecount = rbuf_ulonglong(rb); - - size_t arena_initial_size = rbuf_ulonglong(rb); - result->rollentry_arena = toku_memarena_create_presized(arena_initial_size); - if (0) { died1: toku_memarena_destroy(&result->rollentry_arena); goto died0; } - - //Load rollback entries - lazy_assert(rb->size > 4); - //Start with empty list - result->oldest_logentry = result->newest_logentry = NULL; - while (rb->ndone < rb->size) { - struct roll_entry *item; - uint32_t rollback_fsize = rbuf_int(rb); //Already read 4. Rest is 4 smaller - bytevec item_vec; - rbuf_literal_bytes(rb, &item_vec, rollback_fsize-4); - unsigned char* item_buf = (unsigned char*)item_vec; - r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, result->rollentry_arena); - if (r!=0) { - r = toku_db_badformat(); - goto died1; - } - //Add to head of list - if (result->oldest_logentry) { - result->oldest_logentry->prev = item; - result->oldest_logentry = item; - item->prev = NULL; - } - else { - result->oldest_logentry = result->newest_logentry = item; - item->prev = NULL; - } - } - - toku_free(rb->buf); - rb->buf = NULL; - *log_p = result; - return 0; -} - -static int -deserialize_rollback_log_from_rbuf_versioned (uint32_t version, BLOCKNUM blocknum, - ROLLBACK_LOG_NODE *log, - struct rbuf *rb) { - int r = 0; - ROLLBACK_LOG_NODE rollback_log_node = NULL; - invariant(version==FT_LAYOUT_VERSION); //Rollback log nodes do not survive version changes. - r = deserialize_rollback_log_from_rbuf(blocknum, &rollback_log_node, rb); - if (r==0) { - *log = rollback_log_node; - } - return r; -} - -int -decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) { - int r = 0; - // get the number of compressed sub blocks - int n_sub_blocks; - n_sub_blocks = toku_dtoh32(*(uint32_t*)(&raw_block[node_header_overhead])); - - // verify the number of sub blocks - invariant(0 <= n_sub_blocks); - invariant(n_sub_blocks <= max_sub_blocks); - - { // verify the header checksum - uint32_t header_length = node_header_overhead + sub_block_header_size(n_sub_blocks); - invariant(header_length <= raw_block_size); - uint32_t xsum = toku_x1764_memory(raw_block, header_length); - uint32_t stored_xsum = toku_dtoh32(*(uint32_t *)(raw_block + header_length)); - if (xsum != stored_xsum) { - r = TOKUDB_BAD_CHECKSUM; - } - } - - // deserialize the sub block header - struct sub_block sub_block[n_sub_blocks]; - uint32_t *sub_block_header = (uint32_t *) &raw_block[node_header_overhead+4]; - for (int i = 0; i < n_sub_blocks; i++) { - sub_block_init(&sub_block[i]); - sub_block[i].compressed_size = toku_dtoh32(sub_block_header[0]); - sub_block[i].uncompressed_size = toku_dtoh32(sub_block_header[1]); - sub_block[i].xsum = toku_dtoh32(sub_block_header[2]); - sub_block_header += 3; - } - - // This predicate needs to be here and instead of where it is set - // for the compiler. - if (r == TOKUDB_BAD_CHECKSUM) { - goto exit; - } - - // verify sub block sizes - for (int i = 0; i < n_sub_blocks; i++) { - uint32_t compressed_size = sub_block[i].compressed_size; - if (compressed_size<=0 || compressed_size>(1<<30)) { - r = toku_db_badformat(); - goto exit; - } - - uint32_t uncompressed_size = sub_block[i].uncompressed_size; - if (0) printf("Block %" PRId64 " Compressed size = %u, uncompressed size=%u\n", blocknum.b, compressed_size, uncompressed_size); - if (uncompressed_size<=0 || uncompressed_size>(1<<30)) { - r = toku_db_badformat(); - goto exit; - } - } - - // sum up the uncompressed size of the sub blocks - size_t uncompressed_size; - uncompressed_size = get_sum_uncompressed_size(n_sub_blocks, sub_block); - - // allocate the uncompressed buffer - size_t size; - size = node_header_overhead + uncompressed_size; - unsigned char *buf; - XMALLOC_N(size, buf); - rbuf_init(rb, buf, size); - - // copy the uncompressed node header to the uncompressed buffer - memcpy(rb->buf, raw_block, node_header_overhead); - - // point at the start of the compressed data (past the node header, the sub block header, and the header checksum) - unsigned char *compressed_data; - compressed_data = raw_block + node_header_overhead + sub_block_header_size(n_sub_blocks) + sizeof (uint32_t); - - // point at the start of the uncompressed data - unsigned char *uncompressed_data; - uncompressed_data = rb->buf + node_header_overhead; - - // decompress all the compressed sub blocks into the uncompressed buffer - r = decompress_all_sub_blocks(n_sub_blocks, sub_block, compressed_data, uncompressed_data, num_cores, ft_pool); - if (r != 0) { - fprintf(stderr, "%s:%d block %" PRId64 " failed %d at %p size %lu\n", __FUNCTION__, __LINE__, blocknum.b, r, raw_block, raw_block_size); - dump_bad_block(raw_block, raw_block_size); - goto exit; - } - - rb->ndone=0; -exit: - return r; -} - -static int -decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) { - // This function exists solely to accomodate future changes in compression. - int r = 0; - switch (version) { - case FT_LAYOUT_VERSION_13: - case FT_LAYOUT_VERSION_14: - case FT_LAYOUT_VERSION: - r = decompress_from_raw_block_into_rbuf(raw_block, raw_block_size, rb, blocknum); - break; - default: - abort(); - } - return r; -} - -static int -read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, - DISKOFF offset, DISKOFF size, - FT h, - struct rbuf *rb, - /* out */ int *layout_version_p) { - int r = 0; - if (0) printf("Deserializing Block %" PRId64 "\n", blocknum.b); - - DISKOFF size_aligned = roundup_to_multiple(512, size); - uint8_t *XMALLOC_N_ALIGNED(512, size_aligned, raw_block); - { - // read the (partially compressed) block - ssize_t rlen = toku_os_pread(fd, raw_block, size_aligned, offset); - lazy_assert((DISKOFF)rlen >= size); - lazy_assert((DISKOFF)rlen <= size_aligned); - } - // get the layout_version - int layout_version; - { - uint8_t *magic = raw_block + uncompressed_magic_offset; - if (memcmp(magic, "tokuleaf", 8)!=0 && - memcmp(magic, "tokunode", 8)!=0 && - memcmp(magic, "tokuroll", 8)!=0) { - r = toku_db_badformat(); - goto cleanup; - } - uint8_t *version = raw_block + uncompressed_version_offset; - layout_version = toku_dtoh32(*(uint32_t*)version); - if (layout_version < FT_LAYOUT_MIN_SUPPORTED_VERSION || layout_version > FT_LAYOUT_VERSION) { - r = toku_db_badformat(); - goto cleanup; - } - } - - r = decompress_from_raw_block_into_rbuf_versioned(layout_version, raw_block, size, rb, blocknum); - if (r != 0) { - // We either failed the checksome, or there is a bad format in - // the buffer. - if (r == TOKUDB_BAD_CHECKSUM) { - fprintf(stderr, - "Checksum failure while reading raw block in file %s.\n", - toku_cachefile_fname_in_env(h->cf)); - abort(); - } else { - r = toku_db_badformat(); - goto cleanup; - } - } - - *layout_version_p = layout_version; -cleanup: - if (r!=0) { - if (rb->buf) toku_free(rb->buf); - rb->buf = NULL; - } - if (raw_block) { - toku_free(raw_block); - } - return r; -} - -// Read rollback log node from file into struct. Perform version upgrade if necessary. -int -toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT h) { - int layout_version = 0; - int r; - struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0}; - - // get the file offset and block size for the block - DISKOFF offset, size; - toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &offset, &size); - // if the size is 0, then the blocknum is unused - if (size == 0) { - // blocknum is unused, just create an empty one and get out - ROLLBACK_LOG_NODE XMALLOC(log); - rollback_empty_log_init(log); - log->blocknum.b = blocknum.b; - r = 0; - *logp = log; - goto cleanup; - } - - r = read_and_decompress_block_from_fd_into_rbuf(fd, blocknum, offset, size, h, &rb, &layout_version); - if (r!=0) goto cleanup; - - { - uint8_t *magic = rb.buf + uncompressed_magic_offset; - if (memcmp(magic, "tokuroll", 8)!=0) { - r = toku_db_badformat(); - goto cleanup; - } - } - - r = deserialize_rollback_log_from_rbuf_versioned(layout_version, blocknum, logp, &rb); - -cleanup: - if (rb.buf) toku_free(rb.buf); - return r; -} - -int -toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h) -{ - int r = 0; - // 15 was the last version with subtree estimates - invariant(h->layout_version_read_from_disk <= FT_LAYOUT_VERSION_15); - - FTNODE unused_node = NULL; - FTNODE_DISK_DATA unused_ndd = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, h); - r = deserialize_ftnode_from_fd(fd, h->h->root_blocknum, 0, &unused_node, &unused_ndd, - &bfe, &h->h->on_disk_stats); - h->in_memory_stats = h->h->on_disk_stats; - - if (unused_node) { - toku_ftnode_free(&unused_node); - } - if (unused_ndd) { - toku_free(unused_ndd); - } - return r; -} - -int -toku_upgrade_msn_from_root_to_header(int fd, FT h) -{ - int r; - // 21 was the first version with max_msn_in_ft in the header - invariant(h->layout_version_read_from_disk <= FT_LAYOUT_VERSION_20); - - FTNODE node; - FTNODE_DISK_DATA ndd; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, h); - r = deserialize_ftnode_from_fd(fd, h->h->root_blocknum, 0, &node, &ndd, &bfe, nullptr); - if (r != 0) { - goto exit; - } - - h->h->max_msn_in_ft = node->max_msn_applied_to_node_on_disk; - toku_ftnode_free(&node); - toku_free(ndd); - exit: - return r; -} - -#undef UPGRADE_STATUS_VALUE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-ops.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-ops.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-ops.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-ops.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -168,7 +168,7 @@ return; If the child needs to be merged (it's a leaf with too little stuff (less than 1/4 full) or a nonleaf with too little fanout (less than 1/4) fetch node, the child and a sibling of the child into main memory. - move all messages from the node to the two children (so that the FIFOs are empty) + move all messages from the node to the two children (so that the message buffers are empty) If the two siblings together fit into one node then merge the two siblings. fixup the node to point at one child @@ -200,20 +200,26 @@ */ -#include "checkpoint.h" -#include "ft.h" -#include "ft-cachetable-wrappers.h" -#include "ft-flusher.h" -#include "ft-internal.h" -#include "ft_layout_version.h" -#include "key.h" -#include "log-internal.h" -#include "sub_block.h" -#include "txn_manager.h" -#include "leafentry.h" -#include "xids.h" -#include "ft_msg.h" -#include "ule.h" +#include + +#include "ft/cachetable/checkpoint.h" +#include "ft/cursor.h" +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-flusher.h" +#include "ft/ft-internal.h" +#include "ft/msg.h" +#include "ft/leafentry.h" +#include "ft/logger/log-internal.h" +#include "ft/node.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/sub_block.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_layout_version.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/txn/txn_manager.h" +#include "ft/ule.h" +#include "ft/txn/xids.h" #include @@ -233,7 +239,7 @@ */ static FT_STATUS_S ft_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ft_status, k, c, t, "ft: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_status, k, c, t, "ft: " l, inc) static toku_mutex_t ft_open_close_lock; @@ -422,6 +428,7 @@ } \ } while (0) + void toku_note_deserialized_basement_node(bool fixed_key_size) { if (fixed_key_size) { STATUS_INC(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, 1); @@ -430,172 +437,73 @@ } } -bool is_entire_node_in_memory(FTNODE node) { - for (int i = 0; i < node->n_children; i++) { - if(BP_STATE(node,i) != PT_AVAIL) { - return false; - } - } - return true; -} - -void -toku_assert_entire_node_in_memory(FTNODE UU() node) { - paranoid_invariant(is_entire_node_in_memory(node)); -} - -uint32_t -get_leaf_num_entries(FTNODE node) { - uint32_t result = 0; - int i; - toku_assert_entire_node_in_memory(node); - for ( i = 0; i < node->n_children; i++) { - result += BLB_DATA(node, i)->num_klpairs(); - } - return result; +static void ft_verify_flags(FT UU(ft), FTNODE UU(node)) { + paranoid_invariant(ft->h->flags == node->flags); } -static enum reactivity -get_leaf_reactivity (FTNODE node, uint32_t nodesize) { - enum reactivity re = RE_STABLE; - toku_assert_entire_node_in_memory(node); - paranoid_invariant(node->height==0); - unsigned int size = toku_serialize_ftnode_size(node); - if (size > nodesize && get_leaf_num_entries(node) > 1) { - re = RE_FISSIBLE; - } - else if ((size*4) < nodesize && !BLB_SEQINSERT(node, node->n_children-1)) { - re = RE_FUSIBLE; - } - return re; -} +int toku_ft_debug_mode = 0; -enum reactivity -get_nonleaf_reactivity(FTNODE node, unsigned int fanout) { +uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum) { paranoid_invariant(node->height>0); - int n_children = node->n_children; - if (n_children > (int) fanout) return RE_FISSIBLE; - if (n_children*4 < (int) fanout) return RE_FUSIBLE; - return RE_STABLE; + paranoid_invariant(childnumn_children); + return toku_cachetable_hash(cf, BP_BLOCKNUM(node, childnum)); } -enum reactivity -get_node_reactivity(FT ft, FTNODE node) { - toku_assert_entire_node_in_memory(node); - if (node->height==0) - return get_leaf_reactivity(node, ft->h->nodesize); - else - return get_nonleaf_reactivity(node, ft->h->fanout); -} +// +// pivot bounds +// TODO: move me to ft/node.cc? +// -unsigned int -toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc) -{ - return toku_fifo_buffer_size_in_use(bnc->buffer); +pivot_bounds::pivot_bounds(const DBT &lbe_dbt, const DBT &ubi_dbt) : + _lower_bound_exclusive(lbe_dbt), _upper_bound_inclusive(ubi_dbt) { } -// return true if the size of the buffers plus the amount of work done is large enough. (But return false if there is nothing to be flushed (the buffers empty)). -bool -toku_ft_nonleaf_is_gorged (FTNODE node, uint32_t nodesize) { - uint64_t size = toku_serialize_ftnode_size(node); - - bool buffers_are_empty = true; - toku_assert_entire_node_in_memory(node); - // - // the nonleaf node is gorged if the following holds true: - // - the buffers are non-empty - // - the total workdone by the buffers PLUS the size of the buffers - // is greater than nodesize (which as of Maxwell should be - // 4MB) - // - paranoid_invariant(node->height > 0); - for (int child = 0; child < node->n_children; ++child) { - size += BP_WORKDONE(node, child); - } - for (int child = 0; child < node->n_children; ++child) { - if (toku_bnc_nbytesinbuf(BNC(node, child)) > 0) { - buffers_are_empty = false; - break; - } - } - return ((size > nodesize) - && - (!buffers_are_empty)); -} +pivot_bounds pivot_bounds::infinite_bounds() { + DBT dbt; + toku_init_dbt(&dbt); -static void ft_verify_flags(FT UU(ft), FTNODE UU(node)) { - paranoid_invariant(ft->h->flags == node->flags); + // infinity is represented by an empty dbt + invariant(toku_dbt_is_empty(&dbt)); + return pivot_bounds(dbt, dbt); } -int toku_ft_debug_mode = 0; - -uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum) { - paranoid_invariant(node->height>0); - paranoid_invariant(childnumn_children); - return toku_cachetable_hash(cf, BP_BLOCKNUM(node, childnum)); +const DBT *pivot_bounds::lbe() const { + return &_lower_bound_exclusive; } -int -toku_bnc_n_entries(NONLEAF_CHILDINFO bnc) -{ - return toku_fifo_n_entries(bnc->buffer); +const DBT *pivot_bounds::ubi() const { + return &_upper_bound_inclusive; } -static const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound_exclusive) { - if (childnum==0) - return lower_bound_exclusive; - else { - return &node->childkeys[childnum-1]; +DBT pivot_bounds::_prepivotkey(FTNODE node, int childnum, const DBT &lbe_dbt) const { + if (childnum == 0) { + return lbe_dbt; + } else { + return node->pivotkeys.get_pivot(childnum - 1); } } -static const DBT *postpivotkey (FTNODE node, int childnum, const DBT * const upper_bound_inclusive) { - if (childnum+1 == node->n_children) - return upper_bound_inclusive; - else { - return &node->childkeys[childnum]; +DBT pivot_bounds::_postpivotkey(FTNODE node, int childnum, const DBT &ubi_dbt) const { + if (childnum + 1 == node->n_children) { + return ubi_dbt; + } else { + return node->pivotkeys.get_pivot(childnum); } } -static struct pivot_bounds next_pivot_keys (FTNODE node, int childnum, struct pivot_bounds const * const old_pb) { - struct pivot_bounds pb = {.lower_bound_exclusive = prepivotkey(node, childnum, old_pb->lower_bound_exclusive), - .upper_bound_inclusive = postpivotkey(node, childnum, old_pb->upper_bound_inclusive)}; - return pb; -} -// how much memory does this child buffer consume? -long -toku_bnc_memory_size(NONLEAF_CHILDINFO bnc) -{ - return (sizeof(*bnc) + - toku_fifo_memory_footprint(bnc->buffer) + - bnc->fresh_message_tree.memory_size() + - bnc->stale_message_tree.memory_size() + - bnc->broadcast_list.memory_size()); +pivot_bounds pivot_bounds::next_bounds(FTNODE node, int childnum) const { + return pivot_bounds(_prepivotkey(node, childnum, _lower_bound_exclusive), + _postpivotkey(node, childnum, _upper_bound_inclusive)); } -// how much memory in this child buffer holds useful data? -// originally created solely for use by test program(s). -long -toku_bnc_memory_used(NONLEAF_CHILDINFO bnc) -{ - return (sizeof(*bnc) + - toku_fifo_memory_size_in_use(bnc->buffer) + - bnc->fresh_message_tree.memory_size() + - bnc->stale_message_tree.memory_size() + - bnc->broadcast_list.memory_size()); -} +//////////////////////////////////////////////////////////////////////////////// -static long -get_avail_internal_node_partition_size(FTNODE node, int i) -{ +static long get_avail_internal_node_partition_size(FTNODE node, int i) { paranoid_invariant(node->height > 0); return toku_bnc_memory_size(BNC(node, i)); } - -static long -ftnode_cachepressure_size(FTNODE node) -{ +static long ftnode_cachepressure_size(FTNODE node) { long retval = 0; bool totally_empty = true; if (node->height == 0) { @@ -636,8 +544,7 @@ int n_children = node->n_children; retval += sizeof(*node); retval += (n_children)*(sizeof(node->bp[0])); - retval += (n_children > 0 ? n_children-1 : 0)*(sizeof(node->childkeys[0])); - retval += node->totalchildkeylens; + retval += node->pivotkeys.total_size(); // now calculate the sizes of the partitions for (int i = 0; i < n_children; i++) { @@ -703,42 +610,133 @@ return d; } -// -// Given a bfe and a childnum, returns whether the query that constructed the bfe -// wants the child available. -// Requires: bfe->child_to_read to have been set -// -bool -toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum) -{ - return bfe->type == ftnode_fetch_all || - (bfe->child_to_read == childnum && - (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_keymatch)); +// TODO: This isn't so pretty +void ftnode_fetch_extra::_create_internal(FT ft_) { + ft = ft_; + type = ftnode_fetch_none; + search = nullptr; + + toku_init_dbt(&range_lock_left_key); + toku_init_dbt(&range_lock_right_key); + left_is_neg_infty = false; + right_is_pos_infty = false; + + // -1 means 'unknown', which is the correct default state + child_to_read = -1; + disable_prefetching = false; + read_all_partitions = false; + + bytes_read = 0; + io_time = 0; + deserialize_time = 0; + decompress_time = 0; } -int -toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node) -{ - paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch || bfe->type == ftnode_fetch_keymatch); - if (bfe->left_is_neg_infty) { +void ftnode_fetch_extra::create_for_full_read(FT ft_) { + _create_internal(ft_); + + type = ftnode_fetch_all; +} + +void ftnode_fetch_extra::create_for_keymatch(FT ft_, const DBT *left, const DBT *right, + bool disable_prefetching_, bool read_all_partitions_) { + _create_internal(ft_); + invariant(ft->h->type == FT_CURRENT); + + type = ftnode_fetch_keymatch; + if (left != nullptr) { + toku_copyref_dbt(&range_lock_left_key, *left); + } + if (right != nullptr) { + toku_copyref_dbt(&range_lock_right_key, *right); + } + left_is_neg_infty = left == nullptr; + right_is_pos_infty = right == nullptr; + disable_prefetching = disable_prefetching_; + read_all_partitions = read_all_partitions_; +} + +void ftnode_fetch_extra::create_for_subset_read(FT ft_, ft_search *search_, + const DBT *left, const DBT *right, + bool left_is_neg_infty_, bool right_is_pos_infty_, + bool disable_prefetching_, bool read_all_partitions_) { + _create_internal(ft_); + invariant(ft->h->type == FT_CURRENT); + + type = ftnode_fetch_subset; + search = search_; + if (left != nullptr) { + toku_copyref_dbt(&range_lock_left_key, *left); + } + if (right != nullptr) { + toku_copyref_dbt(&range_lock_right_key, *right); + } + left_is_neg_infty = left_is_neg_infty_; + right_is_pos_infty = right_is_pos_infty_; + disable_prefetching = disable_prefetching_; + read_all_partitions = read_all_partitions_; +} + +void ftnode_fetch_extra::create_for_min_read(FT ft_) { + _create_internal(ft_); + invariant(ft->h->type == FT_CURRENT); + + type = ftnode_fetch_none; +} + +void ftnode_fetch_extra::create_for_prefetch(FT ft_, struct ft_cursor *cursor) { + _create_internal(ft_); + invariant(ft->h->type == FT_CURRENT); + + type = ftnode_fetch_prefetch; + const DBT *left = &cursor->range_lock_left_key; + if (left->data) { + toku_clone_dbt(&range_lock_left_key, *left); + } + const DBT *right = &cursor->range_lock_right_key; + if (right->data) { + toku_clone_dbt(&range_lock_right_key, *right); + } + left_is_neg_infty = cursor->left_is_neg_infty; + right_is_pos_infty = cursor->right_is_pos_infty; + disable_prefetching = cursor->disable_prefetching; +} + +void ftnode_fetch_extra::destroy(void) { + toku_destroy_dbt(&range_lock_left_key); + toku_destroy_dbt(&range_lock_right_key); +} + +// Requires: child_to_read to have been set +bool ftnode_fetch_extra::wants_child_available(int childnum) const { + return type == ftnode_fetch_all || + (child_to_read == childnum && + (type == ftnode_fetch_subset || type == ftnode_fetch_keymatch)); +} + +int ftnode_fetch_extra::leftmost_child_wanted(FTNODE node) const { + paranoid_invariant(type == ftnode_fetch_subset || + type == ftnode_fetch_prefetch || + type == ftnode_fetch_keymatch); + if (left_is_neg_infty) { return 0; - } else if (bfe->range_lock_left_key.data == nullptr) { + } else if (range_lock_left_key.data == nullptr) { return -1; } else { - return toku_ftnode_which_child(node, &bfe->range_lock_left_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun); + return toku_ftnode_which_child(node, &range_lock_left_key, ft->cmp); } } -int -toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node) -{ - paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch || bfe->type == ftnode_fetch_keymatch); - if (bfe->right_is_pos_infty) { +int ftnode_fetch_extra::rightmost_child_wanted(FTNODE node) const { + paranoid_invariant(type == ftnode_fetch_subset || + type == ftnode_fetch_prefetch || + type == ftnode_fetch_keymatch); + if (right_is_pos_infty) { return node->n_children - 1; - } else if (bfe->range_lock_right_key.data == nullptr) { + } else if (range_lock_right_key.data == nullptr) { return -1; } else { - return toku_ftnode_which_child(node, &bfe->range_lock_right_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun); + return toku_ftnode_which_child(node, &range_lock_right_key, ft->cmp); } } @@ -750,7 +748,7 @@ } else if (cursor->range_lock_right_key.data == nullptr) { return -1; } else { - return toku_ftnode_which_child(node, &cursor->range_lock_right_key, &ft_handle->ft->cmp_descriptor, ft_handle->ft->compare_fun); + return toku_ftnode_which_child(node, &cursor->range_lock_right_key, ft_handle->ft->cmp); } } @@ -801,45 +799,6 @@ } } -static void ftnode_update_disk_stats( - FTNODE ftnode, - FT ft, - bool for_checkpoint - ) -{ - STAT64INFO_S deltas = ZEROSTATS; - // capture deltas before rebalancing basements for serialization - deltas = toku_get_and_clear_basement_stats(ftnode); - // locking not necessary here with respect to checkpointing - // in Clayface (because of the pending lock and cachetable lock - // in toku_cachetable_begin_checkpoint) - // essentially, if we are dealing with a for_checkpoint - // parameter in a function that is called by the flush_callback, - // then the cachetable needs to ensure that this is called in a safe - // manner that does not interfere with the beginning - // of a checkpoint, which it does with the cachetable lock - // and pending lock - toku_ft_update_stats(&ft->h->on_disk_stats, deltas); - if (for_checkpoint) { - toku_ft_update_stats(&ft->checkpoint_header->on_disk_stats, deltas); - } -} - -static void ftnode_clone_partitions(FTNODE node, FTNODE cloned_node) { - for (int i = 0; i < node->n_children; i++) { - BP_BLOCKNUM(cloned_node,i) = BP_BLOCKNUM(node,i); - paranoid_invariant(BP_STATE(node,i) == PT_AVAIL); - BP_STATE(cloned_node,i) = PT_AVAIL; - BP_WORKDONE(cloned_node, i) = BP_WORKDONE(node, i); - if (node->height == 0) { - set_BLB(cloned_node, i, toku_clone_bn(BLB(node,i))); - } - else { - set_BNC(cloned_node, i, toku_clone_nl(BNC(node,i))); - } - } -} - void toku_ftnode_checkpoint_complete_callback(void *value_data) { FTNODE node = static_cast(value_data); if (node->height > 0) { @@ -863,20 +822,20 @@ ) { FTNODE node = static_cast(value_data); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); FT ft = static_cast(write_extraargs); FTNODE XCALLOC(cloned_node); if (node->height == 0) { // set header stats, must be done before rebalancing - ftnode_update_disk_stats(node, ft, for_checkpoint); + toku_ftnode_update_disk_stats(node, ft, for_checkpoint); // rebalance the leaf node - rebalance_ftnode_leaf(node, ft->h->basementnodesize); + toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize); } cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known; cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk; cloned_node->flags = node->flags; - cloned_node->thisnodename = node->thisnodename; + cloned_node->blocknum = node->blocknum; cloned_node->layout_version = node->layout_version; cloned_node->layout_version_original = node->layout_version_original; cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk; @@ -885,21 +844,17 @@ cloned_node->dirty = node->dirty; cloned_node->fullhash = node->fullhash; cloned_node->n_children = node->n_children; - cloned_node->totalchildkeylens = node->totalchildkeylens; - XMALLOC_N(node->n_children-1, cloned_node->childkeys); XMALLOC_N(node->n_children, cloned_node->bp); // clone pivots - for (int i = 0; i < node->n_children-1; i++) { - toku_clone_dbt(&cloned_node->childkeys[i], node->childkeys[i]); - } + cloned_node->pivotkeys.create_from_pivot_keys(node->pivotkeys); if (node->height > 0) { // need to move messages here so that we don't serialize stale // messages to the fresh tree - ft verify code complains otherwise. toku_move_ftnode_messages_to_stale(ft, node); } // clone partition - ftnode_clone_partitions(node, cloned_node); + toku_ftnode_clone_partitions(node, cloned_node); // clear dirty bit node->dirty = 0; @@ -916,12 +871,10 @@ *cloned_value_data = cloned_node; } -static void ft_leaf_run_gc(FT ft, FTNODE node); - void toku_ftnode_flush_callback( CACHEFILE UU(cachefile), int fd, - BLOCKNUM nodename, + BLOCKNUM blocknum, void *ftnode_v, void** disk_data, void *extraargs, @@ -933,23 +886,23 @@ bool is_clone ) { - FT h = (FT) extraargs; + FT ft = (FT) extraargs; FTNODE ftnode = (FTNODE) ftnode_v; FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data; - assert(ftnode->thisnodename.b==nodename.b); + assert(ftnode->blocknum.b == blocknum.b); int height = ftnode->height; if (write_me) { - toku_assert_entire_node_in_memory(ftnode); + toku_ftnode_assert_fully_in_memory(ftnode); if (height > 0 && !is_clone) { // cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback() - toku_move_ftnode_messages_to_stale(h, ftnode); + toku_move_ftnode_messages_to_stale(ft, ftnode); } else if (height == 0) { - ft_leaf_run_gc(h, ftnode); + toku_ftnode_leaf_run_gc(ft, ftnode); if (!is_clone) { - ftnode_update_disk_stats(ftnode, h, for_checkpoint); + toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint); } } - int r = toku_serialize_ftnode_to(fd, ftnode->thisnodename, ftnode, ndd, !is_clone, h, for_checkpoint); + int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint); assert_zero(r); ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION; } @@ -970,7 +923,7 @@ for (int i = 0; i < ftnode->n_children; i++) { if (BP_STATE(ftnode,i) == PT_AVAIL) { BASEMENTNODE bn = BLB(ftnode, i); - toku_ft_decrease_stats(&h->in_memory_stats, bn->stat64_delta); + toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); } } } @@ -983,7 +936,7 @@ } void -toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe) +toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe) { if (bfe->type == ftnode_fetch_prefetch) { STATUS_INC(FT_NUM_PIVOTS_FETCHED_PREFETCH, 1); @@ -1000,17 +953,17 @@ } } -int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, +int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** disk_data, PAIR_ATTR *sizep, int *dirtyp, void *extraargs) { assert(extraargs); assert(*ftnode_pv == NULL); FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data; - struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *)extraargs; + ftnode_fetch_extra *bfe = (ftnode_fetch_extra *)extraargs; FTNODE *node=(FTNODE*)ftnode_pv; // deserialize the node, must pass the bfe in because we cannot // evaluate what piece of the the node is necessary until we get it at // least partially into memory - int r = toku_deserialize_ftnode_from(fd, nodename, fullhash, node, ndd, bfe); + int r = toku_deserialize_ftnode_from(fd, blocknum, fullhash, node, ndd, bfe); if (r != 0) { if (r == TOKUDB_BAD_CHECKSUM) { fprintf(stderr, @@ -1090,12 +1043,8 @@ return; } -static void ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc); - // replace the child buffer with a compressed version of itself. -static void -compress_internal_node_partition(FTNODE node, int i, enum toku_compression_method compression_method) -{ +static void compress_internal_node_partition(FTNODE node, int i, enum toku_compression_method compression_method) { // if we should evict, compress the // message buffer into a sub_block assert(BP_STATE(node, i) == PT_AVAIL); @@ -1109,24 +1058,6 @@ BP_STATE(node,i) = PT_COMPRESSED; } -void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h) { - // free the basement node - assert(!node->dirty); - BASEMENTNODE bn = BLB(node, childnum); - toku_ft_decrease_stats(&h->in_memory_stats, bn->stat64_delta); - destroy_basement_node(bn); - set_BNULL(node, childnum); - BP_STATE(node, childnum) = PT_ON_DISK; -} - -BASEMENTNODE toku_detach_bn(FTNODE node, int childnum) { - assert(BP_STATE(node, childnum) == PT_AVAIL); - BASEMENTNODE bn = BLB(node, childnum); - set_BNULL(node, childnum); - BP_STATE(node, childnum) = PT_ON_DISK; - return bn; -} - // callback for partially evicting a node int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs, void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra) { @@ -1170,7 +1101,7 @@ // this rule would cause upgrade code to upgrade this partition // again after we serialize it as the current version, which is bad. node->layout_version == node->layout_version_read_from_disk) { - ft_bnc_move_messages_to_stale(ft, bnc); + toku_ft_bnc_move_messages_to_stale(ft, bnc); compress_internal_node_partition( node, i, @@ -1287,7 +1218,7 @@ // placeholder for now bool retval = false; FTNODE node = (FTNODE) ftnode_pv; - struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *) read_extraargs; + ftnode_fetch_extra *bfe = (ftnode_fetch_extra *) read_extraargs; // // The three types of fetches that the ft layer may request are: // - ftnode_fetch_none: no partitions are necessary (example use: stat64) @@ -1317,11 +1248,9 @@ // we can possibly require is a single basement node // we find out what basement node the query cares about // and check if it is available - paranoid_invariant(bfe->h->compare_fun); paranoid_invariant(bfe->search); bfe->child_to_read = toku_ft_search_which_child( - &bfe->h->cmp_descriptor, - bfe->h->compare_fun, + bfe->ft->cmp, node, bfe->search ); @@ -1333,8 +1262,8 @@ // makes no sense to have prefetching disabled // and still call this function paranoid_invariant(!bfe->disable_prefetching); - int lc = toku_bfe_leftmost_child_wanted(bfe, node); - int rc = toku_bfe_rightmost_child_wanted(bfe, node); + int lc = bfe->leftmost_child_wanted(node); + int rc = bfe->rightmost_child_wanted(node); for (int i = lc; i <= rc; ++i) { if (BP_STATE(node, i) != PT_AVAIL) { retval = true; @@ -1346,10 +1275,9 @@ // we can possibly require is a single basement node // we find out what basement node the query cares about // and check if it is available - paranoid_invariant(bfe->h->compare_fun); if (node->height == 0) { - int left_child = toku_bfe_leftmost_child_wanted(bfe, node); - int right_child = toku_bfe_rightmost_child_wanted(bfe, node); + int left_child = bfe->leftmost_child_wanted(node); + int right_child = bfe->rightmost_child_wanted(node); if (left_child == right_child) { bfe->child_to_read = left_child; unsafe_touch_clock(node,bfe->child_to_read); @@ -1366,7 +1294,7 @@ static void ft_status_update_partial_fetch_reason( - struct ftnode_fetch_extra* bfe, + ftnode_fetch_extra *bfe, int childnum, enum pt_state state, bool is_leaf @@ -1465,13 +1393,41 @@ } } +void toku_ft_status_note_msn_discard(void) { + STATUS_INC(FT_MSN_DISCARDS, 1); +} + +void toku_ft_status_note_update(bool broadcast) { + if (broadcast) { + STATUS_INC(FT_UPDATES_BROADCAST, 1); + } else { + STATUS_INC(FT_UPDATES, 1); + } +} + +void toku_ft_status_note_msg_bytes_out(size_t buffsize) { + STATUS_INC(FT_MSG_BYTES_OUT, buffsize); + STATUS_INC(FT_MSG_BYTES_CURR, -buffsize); +} +void toku_ft_status_note_ftnode(int height, bool created) { + if (created) { + if (height == 0) { + STATUS_INC(FT_CREATE_LEAF, 1); + } else { + STATUS_INC(FT_CREATE_NONLEAF, 1); + } + } else { + // created = false means destroyed + } +} + // callback for partially reading a node // could have just used toku_ftnode_fetch_callback, but wanted to separate the two cases to separate functions int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraargs, int fd, PAIR_ATTR* sizep) { int r = 0; FTNODE node = (FTNODE) ftnode_pv; FTNODE_DISK_DATA ndd = (FTNODE_DISK_DATA) disk_data; - struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *) read_extraargs; + ftnode_fetch_extra *bfe = (ftnode_fetch_extra *) read_extraargs; // there must be a reason this is being called. If we get a garbage type or the type is ftnode_fetch_none, // then something went wrong assert((bfe->type == ftnode_fetch_subset) || (bfe->type == ftnode_fetch_all) || (bfe->type == ftnode_fetch_prefetch) || (bfe->type == ftnode_fetch_keymatch)); @@ -1481,8 +1437,8 @@ (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch) ) { - lc = toku_bfe_leftmost_child_wanted(bfe, node); - rc = toku_bfe_rightmost_child_wanted(bfe, node); + lc = bfe->leftmost_child_wanted(node); + rc = bfe->rightmost_child_wanted(node); } else { lc = -1; rc = -1; @@ -1491,7 +1447,7 @@ if (BP_STATE(node,i) == PT_AVAIL) { continue; } - if ((lc <= i && i <= rc) || toku_bfe_wants_child_available(bfe, i)) { + if ((lc <= i && i <= rc) || bfe->wants_child_available(i)) { enum pt_state state = BP_STATE(node, i); if (state == PT_COMPRESSED) { r = toku_deserialize_bp_from_compressed(node, i, bfe); @@ -1506,7 +1462,7 @@ if (r == TOKUDB_BAD_CHECKSUM) { fprintf(stderr, "Checksum failure while reading node partition in file %s.\n", - toku_cachefile_fname_in_env(bfe->h->cf)); + toku_cachefile_fname_in_env(bfe->ft->cf)); } else { fprintf(stderr, "Error while reading node partition %d\n", @@ -1521,118 +1477,8 @@ return 0; } -struct msg_leafval_heaviside_extra { - ft_compare_func compare_fun; - DESCRIPTOR desc; - DBT const * const key; -}; - -//TODO: #1125 optimize -static int -toku_msg_leafval_heaviside(DBT const &kdbt, const struct msg_leafval_heaviside_extra &be) { - FAKE_DB(db, be.desc); - DBT const * const key = be.key; - return be.compare_fun(&db, &kdbt, key); -} - -static int -ft_compare_pivot(DESCRIPTOR desc, ft_compare_func cmp, const DBT *key, const DBT *pivot) -{ - int r; - FAKE_DB(db, desc); - r = cmp(&db, key, pivot); - return r; -} - - -// destroys the internals of the ftnode, but it does not free the values -// that are stored -// this is common functionality for toku_ftnode_free and rebalance_ftnode_leaf -// MUST NOT do anything besides free the structures that have been allocated -void toku_destroy_ftnode_internals(FTNODE node) -{ - for (int i=0; in_children-1; i++) { - toku_destroy_dbt(&node->childkeys[i]); - } - toku_free(node->childkeys); - node->childkeys = NULL; - - for (int i=0; i < node->n_children; i++) { - if (BP_STATE(node,i) == PT_AVAIL) { - if (node->height > 0) { - destroy_nonleaf_childinfo(BNC(node,i)); - } else { - destroy_basement_node(BLB(node, i)); - } - } else if (BP_STATE(node,i) == PT_COMPRESSED) { - SUB_BLOCK sb = BSB(node,i); - toku_free(sb->compressed_ptr); - toku_free(sb); - } else { - paranoid_invariant(is_BNULL(node, i)); - } - set_BNULL(node, i); - } - toku_free(node->bp); - node->bp = NULL; -} - -/* Frees a node, including all the stuff in the hash table. */ -void toku_ftnode_free(FTNODE *nodep) { - FTNODE node = *nodep; - if (node->height == 0) { - STATUS_INC(FT_DESTROY_LEAF, 1); - } else { - STATUS_INC(FT_DESTROY_NONLEAF, 1); - } - toku_destroy_ftnode_internals(node); - toku_free(node); - *nodep = nullptr; -} - -void -toku_initialize_empty_ftnode (FTNODE n, BLOCKNUM nodename, int height, int num_children, int layout_version, unsigned int flags) -// Effect: Fill in N as an empty ftnode. -{ - paranoid_invariant(layout_version != 0); - paranoid_invariant(height >= 0); - - if (height == 0) { - STATUS_INC(FT_CREATE_LEAF, 1); - } else { - STATUS_INC(FT_CREATE_NONLEAF, 1); - } - - n->max_msn_applied_to_node_on_disk = ZERO_MSN; // correct value for root node, harmless for others - n->flags = flags; - n->thisnodename = nodename; - n->layout_version = layout_version; - n->layout_version_original = layout_version; - n->layout_version_read_from_disk = layout_version; - n->height = height; - n->totalchildkeylens = 0; - n->childkeys = 0; - n->bp = 0; - n->n_children = num_children; - n->oldest_referenced_xid_known = TXNID_NONE; - - if (num_children > 0) { - XMALLOC_N(num_children-1, n->childkeys); - XMALLOC_N(num_children, n->bp); - for (int i = 0; i < num_children; i++) { - BP_BLOCKNUM(n,i).b=0; - BP_STATE(n,i) = PT_INVALID; - BP_WORKDONE(n,i) = 0; - BP_INIT_TOUCHED_CLOCK(n, i); - set_BNULL(n,i); - if (height > 0) { - set_BNC(n, i, toku_create_empty_nl()); - } else { - set_BLB(n, i, toku_create_empty_bn()); - } - } - } - n->dirty = 1; // special case exception, it's okay to mark as dirty because the basements are empty +int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_heaviside_extra &be) { + return be.cmp(&kdbt, be.key); } static void @@ -1643,7 +1489,7 @@ { FTNODE newroot; - BLOCKNUM old_blocknum = oldroot->thisnodename; + BLOCKNUM old_blocknum = oldroot->blocknum; uint32_t old_fullhash = oldroot->fullhash; int new_height = oldroot->height+1; @@ -1693,8 +1539,8 @@ // ft_split_child released locks on newroot // and oldroot, so now we repin and // return to caller - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); toku_pin_ftnode( ft, old_blocknum, @@ -1706,3587 +1552,1796 @@ ); } -static void -init_childinfo(FTNODE node, int childnum, FTNODE child) { - BP_BLOCKNUM(node,childnum) = child->thisnodename; - BP_STATE(node,childnum) = PT_AVAIL; - BP_WORKDONE(node, childnum) = 0; - set_BNC(node, childnum, toku_create_empty_nl()); -} - -static void -init_childkey(FTNODE node, int childnum, const DBT *pivotkey) { - toku_clone_dbt(&node->childkeys[childnum], *pivotkey); - node->totalchildkeylens += pivotkey->size; -} +static void inject_message_in_locked_node( + FT ft, + FTNODE node, + int childnum, + const ft_msg &msg, + size_t flow_deltas[], + txn_gc_info *gc_info + ) +{ + // No guarantee that we're the writer, but oh well. + // TODO(leif): Implement "do I have the lock or is it someone else?" + // check in frwlock. Should be possible with TOKU_PTHREAD_DEBUG, nop + // otherwise. + invariant(toku_ctpair_is_write_locked(node->ct_pair)); + toku_ftnode_assert_fully_in_memory(node); -// Used only by test programs: append a child node to a parent node -void -toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey) { - int childnum = node->n_children; - node->n_children++; - XREALLOC_N(node->n_children, node->bp); - init_childinfo(node, childnum, child); - XREALLOC_N(node->n_children-1, node->childkeys); - if (pivotkey) { - invariant(childnum > 0); - init_childkey(node, childnum-1, pivotkey); + // Take the newer of the two oldest referenced xid values from the node and gc_info. + // The gc_info usually has a newer value, because we got it at the top of this call + // stack from the txn manager. But sometimes the node has a newer value, if some + // other thread sees a newer value and writes to this node before we got the lock. + if (gc_info->oldest_referenced_xid_for_implicit_promotion > node->oldest_referenced_xid_known) { + node->oldest_referenced_xid_known = gc_info->oldest_referenced_xid_for_implicit_promotion; + } else if (gc_info->oldest_referenced_xid_for_implicit_promotion < node->oldest_referenced_xid_known) { + gc_info->oldest_referenced_xid_for_implicit_promotion = node->oldest_referenced_xid_known; } - node->dirty = 1; -} - -void -toku_ft_bn_apply_msg_once ( - BASEMENTNODE bn, - const FT_MSG msg, - uint32_t idx, - LEAFENTRY le, - txn_gc_info *gc_info, - uint64_t *workdone, - STAT64INFO stats_to_update - ) -// Effect: Apply msg to leafentry (msn is ignored) -// Calculate work done by message on leafentry and add it to caller's workdone counter. -// idx is the location where it goes -// le is old leafentry -{ - size_t newsize=0, oldsize=0, workdone_this_le=0; - LEAFENTRY new_le=0; - int64_t numbytes_delta = 0; // how many bytes of user data (not including overhead) were added or deleted from this row - int64_t numrows_delta = 0; // will be +1 or -1 or 0 (if row was added or deleted or not) - uint32_t key_storage_size = ft_msg_get_keylen(msg) + sizeof(uint32_t); - if (le) { - oldsize = leafentry_memsize(le) + key_storage_size; - } - - // toku_le_apply_msg() may call bn_data::mempool_malloc_and_update_dmt() to allocate more space. - // That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is - // no longer in use. We'll have to release the old mempool later. - toku_le_apply_msg( - msg, - le, - &bn->data_buffer, - idx, - gc_info, - &new_le, - &numbytes_delta - ); - // at this point, we cannot trust cmd->u.id.key to be valid. - // The dmt may have realloced its mempool and freed the one containing key. - newsize = new_le ? (leafentry_memsize(new_le) + + key_storage_size) : 0; - if (le && new_le) { - workdone_this_le = (oldsize > newsize ? oldsize : newsize); // work done is max of le size before and after message application + // Get the MSN from the header. Now that we have a write lock on the + // node we're injecting into, we know no other thread will get an MSN + // after us and get that message into our subtree before us. + MSN msg_msn = { .msn = toku_sync_add_and_fetch(&ft->h->max_msn_in_ft.msn, 1) }; + ft_msg msg_with_msn(msg.kdbt(), msg.vdbt(), msg.type(), msg_msn, msg.xids()); + paranoid_invariant(msg_with_msn.msn().msn > node->max_msn_applied_to_node_on_disk.msn); - } else { // we did not just replace a row, so ... - if (le) { - // ... we just deleted a row ... - workdone_this_le = oldsize; - numrows_delta = -1; - } - if (new_le) { - // ... or we just added a row - workdone_this_le = newsize; - numrows_delta = 1; - } - } - if (workdone) { // test programs may call with NULL - *workdone += workdone_this_le; + STAT64INFO_S stats_delta = {0,0}; + toku_ftnode_put_msg( + ft->cmp, + ft->update_fun, + node, + childnum, + msg_with_msn, + true, + gc_info, + flow_deltas, + &stats_delta + ); + if (stats_delta.numbytes || stats_delta.numrows) { + toku_ft_update_stats(&ft->in_memory_stats, stats_delta); } + // + // assumption is that toku_ftnode_put_msg will + // mark the node as dirty. + // enforcing invariant here. + // + paranoid_invariant(node->dirty != 0); - // now update stat64 statistics - bn->stat64_delta.numrows += numrows_delta; - bn->stat64_delta.numbytes += numbytes_delta; - // the only reason stats_to_update may be null is for tests - if (stats_to_update) { - stats_to_update->numrows += numrows_delta; - stats_to_update->numbytes += numbytes_delta; + // update some status variables + if (node->height != 0) { + size_t msgsize = msg.total_size(); + STATUS_INC(FT_MSG_BYTES_IN, msgsize); + STATUS_INC(FT_MSG_BYTES_CURR, msgsize); + STATUS_INC(FT_MSG_NUM, 1); + if (ft_msg_type_applies_all(msg.type())) { + STATUS_INC(FT_MSG_NUM_BROADCAST, 1); + } } -} - -static const uint32_t setval_tag = 0xee0ccb99; // this was gotten by doing "cat /dev/random|head -c4|od -x" to get a random number. We want to make sure that the user actually passes us the setval_extra_s that we passed in. -struct setval_extra_s { - uint32_t tag; - bool did_set_val; - int setval_r; // any error code that setval_fun wants to return goes here. - // need arguments for toku_ft_bn_apply_msg_once - BASEMENTNODE bn; - MSN msn; // captured from original message, not currently used - XIDS xids; - const DBT *key; - uint32_t idx; - LEAFENTRY le; - txn_gc_info *gc_info; - uint64_t * workdone; // set by toku_ft_bn_apply_msg_once() - STAT64INFO stats_to_update; -}; + // verify that msn of latest message was captured in root node + paranoid_invariant(msg_with_msn.msn().msn == node->max_msn_applied_to_node_on_disk.msn); -/* - * If new_val == NULL, we send a delete message instead of an insert. - * This happens here instead of in do_delete() for consistency. - * setval_fun() is called from handlerton, passing in svextra_v - * from setval_extra_s input arg to ft->update_fun(). - */ -static void setval_fun (const DBT *new_val, void *svextra_v) { - struct setval_extra_s *CAST_FROM_VOIDP(svextra, svextra_v); - paranoid_invariant(svextra->tag==setval_tag); - paranoid_invariant(!svextra->did_set_val); - svextra->did_set_val = true; + if (node->blocknum.b == ft->rightmost_blocknum.b) { + if (ft->seqinsert_score < FT_SEQINSERT_SCORE_THRESHOLD) { + // we promoted to the rightmost leaf node and the seqinsert score has not yet saturated. + toku_sync_fetch_and_add(&ft->seqinsert_score, 1); + } + } else if (ft->seqinsert_score != 0) { + // we promoted to something other than the rightmost leaf node and the score should reset + ft->seqinsert_score = 0; + } - { - // can't leave scope until toku_ft_bn_apply_msg_once if - // this is a delete - DBT val; - FT_MSG_S msg = { FT_NONE, svextra->msn, svextra->xids, - .u = { .id = {svextra->key, NULL} } }; - if (new_val) { - msg.type = FT_INSERT; - msg.u.id.val = new_val; - } else { - msg.type = FT_DELETE_ANY; - toku_init_dbt(&val); - msg.u.id.val = &val; - } - toku_ft_bn_apply_msg_once(svextra->bn, &msg, - svextra->idx, svextra->le, - svextra->gc_info, - svextra->workdone, svextra->stats_to_update); - svextra->setval_r = 0; + // if we call toku_ft_flush_some_child, then that function unpins the root + // otherwise, we unpin ourselves + if (node->height > 0 && toku_ftnode_nonleaf_is_gorged(node, ft->h->nodesize)) { + toku_ft_flush_node_on_background_thread(ft, node); + } + else { + toku_unpin_ftnode(ft, node); } } -// We are already past the msn filter (in toku_ft_bn_apply_msg(), which calls do_update()), -// so capturing the msn in the setval_extra_s is not strictly required. The alternative -// would be to put a dummy msn in the messages created by setval_fun(), but preserving -// the original msn seems cleaner and it preserves accountability at a lower layer. -static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn, FT_MSG msg, uint32_t idx, - LEAFENTRY le, - void* keydata, - uint32_t keylen, - txn_gc_info *gc_info, - uint64_t * workdone, - STAT64INFO stats_to_update) { - LEAFENTRY le_for_update; - DBT key; - const DBT *keyp; - const DBT *update_function_extra; - DBT vdbt; - const DBT *vdbtp; - - // the location of data depends whether this is a regular or - // broadcast update - if (msg->type == FT_UPDATE) { - // key is passed in with command (should be same as from le) - // update function extra is passed in with command - STATUS_INC(FT_UPDATES, 1); - keyp = msg->u.id.key; - update_function_extra = msg->u.id.val; - } else if (msg->type == FT_UPDATE_BROADCAST_ALL) { - // key is not passed in with broadcast, it comes from le - // update function extra is passed in with command - paranoid_invariant(le); // for broadcast updates, we just hit all leafentries - // so this cannot be null - paranoid_invariant(keydata); - paranoid_invariant(keylen); - paranoid_invariant(msg->u.id.key->size == 0); - STATUS_INC(FT_UPDATES_BROADCAST, 1); - keyp = toku_fill_dbt(&key, keydata, keylen); - update_function_extra = msg->u.id.val; - } else { - abort(); - } - - if (le && !le_latest_is_del(le)) { - // if the latest val exists, use it, and we'll use the leafentry later - uint32_t vallen; - void *valp = le_latest_val_and_len(le, &vallen); - vdbtp = toku_fill_dbt(&vdbt, valp, vallen); - } else { - // otherwise, the val and leafentry are both going to be null - vdbtp = NULL; - } - le_for_update = le; - - struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, msg->msn, msg->xids, - keyp, idx, le_for_update, gc_info, - workdone, stats_to_update}; - // call handlerton's ft->update_fun(), which passes setval_extra to setval_fun() - FAKE_DB(db, desc); - int r = update_fun( - &db, - keyp, - vdbtp, - update_function_extra, - setval_fun, &setval_extra - ); - - if (r == 0) { r = setval_extra.setval_r; } - return r; -} +// seqinsert_loc is a bitmask. +// The root counts as being both on the "left extreme" and on the "right extreme". +// Therefore, at the root, you're at LEFT_EXTREME | RIGHT_EXTREME. +typedef char seqinsert_loc; +static const seqinsert_loc NEITHER_EXTREME = 0; +static const seqinsert_loc LEFT_EXTREME = 1; +static const seqinsert_loc RIGHT_EXTREME = 2; -// Should be renamed as something like "apply_msg_to_basement()." -void -toku_ft_bn_apply_msg ( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - BASEMENTNODE bn, - FT_MSG msg, - txn_gc_info *gc_info, - uint64_t *workdone, - STAT64INFO stats_to_update - ) +static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int childnum, seqinsert_loc loc) // Effect: -// Put a msg into a leaf. -// Calculate work done by message on leafnode and add it to caller's workdone counter. -// The leaf could end up "too big" or "too small". The caller must fix that up. -{ - LEAFENTRY storeddata; - void* key = NULL; - uint32_t keylen = 0; - - uint32_t num_klpairs; - int r; - struct msg_leafval_heaviside_extra be = {compare_fun, desc, msg->u.id.key}; - - unsigned int doing_seqinsert = bn->seqinsert; - bn->seqinsert = 0; - - switch (msg->type) { - case FT_INSERT_NO_OVERWRITE: - case FT_INSERT: { - uint32_t idx; - if (doing_seqinsert) { - idx = bn->data_buffer.num_klpairs(); - DBT kdbt; - r = bn->data_buffer.fetch_key_and_len(idx-1, &kdbt.size, &kdbt.data); - if (r != 0) goto fz; - int cmp = toku_msg_leafval_heaviside(kdbt, be); - if (cmp >= 0) goto fz; - r = DB_NOTFOUND; - } else { - fz: - r = bn->data_buffer.find_zero( - be, - &storeddata, - &key, - &keylen, - &idx - ); - } - if (r==DB_NOTFOUND) { - storeddata = 0; - } else { - assert_zero(r); - } - toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update); - - // if the insertion point is within a window of the right edge of - // the leaf then it is sequential - // window = min(32, number of leaf entries/16) +// If child needs to be split or merged, do that. +// parent and child will be unlocked if this happens +// Requires: parent and child are read locked +// Returns: +// true if relocking is needed +// false otherwise +{ + enum reactivity re = toku_ftnode_get_reactivity(ft, child); + enum reactivity newre; + BLOCKNUM child_blocknum; + uint32_t child_fullhash; + switch (re) { + case RE_STABLE: + return false; + case RE_FISSIBLE: { - uint32_t s = bn->data_buffer.num_klpairs(); - uint32_t w = s / 16; - if (w == 0) w = 1; - if (w > 32) w = 32; - - // within the window? - if (s - idx <= w) - bn->seqinsert = doing_seqinsert + 1; - } - break; - } - case FT_DELETE_ANY: - case FT_ABORT_ANY: - case FT_COMMIT_ANY: { - uint32_t idx; - // Apply to all the matches - - r = bn->data_buffer.find_zero( - be, - &storeddata, - &key, - &keylen, - &idx - ); - if (r == DB_NOTFOUND) break; - assert_zero(r); - toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update); - - break; - } - case FT_OPTIMIZE_FOR_UPGRADE: - // fall through so that optimize_for_upgrade performs rest of the optimize logic - case FT_COMMIT_BROADCAST_ALL: - case FT_OPTIMIZE: - // Apply to all leafentries - num_klpairs = bn->data_buffer.num_klpairs(); - for (uint32_t idx = 0; idx < num_klpairs; ) { - DBT curr_keydbt; - void* curr_keyp = NULL; - uint32_t curr_keylen = 0; - r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp); - assert_zero(r); - toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen); - // because this is a broadcast message, we need - // to fill the key in the message that we pass into toku_ft_bn_apply_msg_once - msg->u.id.key = &curr_keydbt; - int deleted = 0; - if (!le_is_clean(storeddata)) { //If already clean, nothing to do. - toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update); - // at this point, we cannot trust msg->u.id.key to be valid. - uint32_t new_dmt_size = bn->data_buffer.num_klpairs(); - if (new_dmt_size != num_klpairs) { - paranoid_invariant(new_dmt_size + 1 == num_klpairs); - //Item was deleted. - deleted = 1; + // We only have a read lock on the parent. We need to drop both locks, and get write locks. + BLOCKNUM parent_blocknum = parent->blocknum; + uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum); + int parent_height = parent->height; + int parent_n_children = parent->n_children; + toku_unpin_ftnode_read_only(ft, child); + toku_unpin_ftnode_read_only(ft, parent); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + FTNODE newparent, newchild; + toku_pin_ftnode(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, &newparent, true); + if (newparent->height != parent_height || newparent->n_children != parent_n_children || + childnum >= newparent->n_children || toku_bnc_n_entries(BNC(newparent, childnum))) { + // If the height changed or childnum is now off the end, something clearly got split or merged out from under us. + // If something got injected in this node, then it got split or merged and we shouldn't be splitting it. + // But we already unpinned the child so we need to have the caller re-try the pins. + toku_unpin_ftnode_read_only(ft, newparent); + return true; + } + // It's ok to reuse the same childnum because if we get something + // else we need to split, well, that's crazy, but let's go ahead + // and split it. + child_blocknum = BP_BLOCKNUM(newparent, childnum); + child_fullhash = compute_child_fullhash(ft->cf, newparent, childnum); + toku_pin_ftnode_with_dep_nodes(ft, child_blocknum, child_fullhash, &bfe, PL_WRITE_CHEAP, 1, &newparent, &newchild, true); + newre = toku_ftnode_get_reactivity(ft, newchild); + if (newre == RE_FISSIBLE) { + enum split_mode split_mode; + if (newparent->height == 1 && (loc & LEFT_EXTREME) && childnum == 0) { + split_mode = SPLIT_RIGHT_HEAVY; + } else if (newparent->height == 1 && (loc & RIGHT_EXTREME) && childnum == newparent->n_children - 1) { + split_mode = SPLIT_LEFT_HEAVY; + } else { + split_mode = SPLIT_EVENLY; } + toku_ft_split_child(ft, newparent, childnum, newchild, split_mode); + } else { + // some other thread already got it, just unpin and tell the + // caller to retry + toku_unpin_ftnode_read_only(ft, newchild); + toku_unpin_ftnode_read_only(ft, newparent); } - if (deleted) - num_klpairs--; - else - idx++; + return true; } - paranoid_invariant(bn->data_buffer.num_klpairs() == num_klpairs); - - break; - case FT_COMMIT_BROADCAST_TXN: - case FT_ABORT_BROADCAST_TXN: - // Apply to all leafentries if txn is represented - num_klpairs = bn->data_buffer.num_klpairs(); - for (uint32_t idx = 0; idx < num_klpairs; ) { - DBT curr_keydbt; - void* curr_keyp = NULL; - uint32_t curr_keylen = 0; - r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp); - assert_zero(r); - toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen); - // because this is a broadcast message, we need - // to fill the key in the message that we pass into toku_ft_bn_apply_msg_once - msg->u.id.key = &curr_keydbt; - int deleted = 0; - if (le_has_xids(storeddata, msg->xids)) { - toku_ft_bn_apply_msg_once(bn, msg, idx, storeddata, gc_info, workdone, stats_to_update); - uint32_t new_dmt_size = bn->data_buffer.num_klpairs(); - if (new_dmt_size != num_klpairs) { - paranoid_invariant(new_dmt_size + 1 == num_klpairs); - //Item was deleted. - deleted = 1; + case RE_FUSIBLE: + { + if (parent->height == 1) { + // prevent re-merging of recently unevenly-split nodes + if (((loc & LEFT_EXTREME) && childnum <= 1) || + ((loc & RIGHT_EXTREME) && childnum >= parent->n_children - 2)) { + return false; } } - if (deleted) - num_klpairs--; - else - idx++; - } - paranoid_invariant(bn->data_buffer.num_klpairs() == num_klpairs); - - break; - case FT_UPDATE: { - uint32_t idx; - r = bn->data_buffer.find_zero( - be, - &storeddata, - &key, - &keylen, - &idx - ); - if (r==DB_NOTFOUND) { - { - //Point to msg's copy of the key so we don't worry about le being freed - //TODO: 46 MAYBE Get rid of this when le_apply message memory is better handled - key = msg->u.id.key->data; - keylen = msg->u.id.key->size; - } - r = do_update(update_fun, desc, bn, msg, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update); - } else if (r==0) { - r = do_update(update_fun, desc, bn, msg, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update); - } // otherwise, a worse error, just return it - break; - } - case FT_UPDATE_BROADCAST_ALL: { - // apply to all leafentries. - uint32_t idx = 0; - uint32_t num_leafentries_before; - while (idx < (num_leafentries_before = bn->data_buffer.num_klpairs())) { - void* curr_key = nullptr; - uint32_t curr_keylen = 0; - r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_key); - assert_zero(r); - //TODO: 46 replace this with something better than cloning key - // TODO: (Zardosht) This may be unnecessary now, due to how the key - // is handled in the bndata. Investigate and determine - char clone_mem[curr_keylen]; // only lasts one loop, alloca would overflow (end of function) - memcpy((void*)clone_mem, curr_key, curr_keylen); - curr_key = (void*)clone_mem; - - // This is broken below. Have a compilation error checked - // in as a reminder - r = do_update(update_fun, desc, bn, msg, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update); - assert_zero(r); - - if (num_leafentries_before == bn->data_buffer.num_klpairs()) { - // we didn't delete something, so increment the index. - idx++; + int parent_height = parent->height; + BLOCKNUM parent_blocknum = parent->blocknum; + uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum); + toku_unpin_ftnode_read_only(ft, child); + toku_unpin_ftnode_read_only(ft, parent); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + FTNODE newparent, newchild; + toku_pin_ftnode(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, &newparent, true); + if (newparent->height != parent_height || childnum >= newparent->n_children) { + // looks like this is the root and it got merged, let's just start over (like in the split case above) + toku_unpin_ftnode_read_only(ft, newparent); + return true; + } + child_blocknum = BP_BLOCKNUM(newparent, childnum); + child_fullhash = compute_child_fullhash(ft->cf, newparent, childnum); + toku_pin_ftnode_with_dep_nodes(ft, child_blocknum, child_fullhash, &bfe, PL_READ, 1, &newparent, &newchild, true); + newre = toku_ftnode_get_reactivity(ft, newchild); + if (newre == RE_FUSIBLE && newparent->n_children >= 2) { + toku_unpin_ftnode_read_only(ft, newchild); + toku_ft_merge_child(ft, newparent, childnum); + } else { + // Could be a weird case where newparent has only one + // child. In this case, we want to inject here but we've + // already unpinned the caller's copy of parent so we have + // to ask them to re-pin, or they could (very rarely) + // dereferenced memory in a freed node. TODO: we could + // give them back the copy of the parent we pinned. + // + // Otherwise, some other thread already got it, just unpin + // and tell the caller to retry + toku_unpin_ftnode_read_only(ft, newchild); + toku_unpin_ftnode_read_only(ft, newparent); } + return true; } - break; } - case FT_NONE: break; // don't do anything - } - - return; + abort(); } -static inline int -key_msn_cmp(const DBT *a, const DBT *b, const MSN amsn, const MSN bmsn, - DESCRIPTOR descriptor, ft_compare_func key_cmp) +static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t fullhash, const ft_msg &msg, size_t flow_deltas[], txn_gc_info *gc_info) +// Effect: +// Inject message into the node at this blocknum (cachekey). +// Gets a write lock on the node for you. { - FAKE_DB(db, descriptor); - int r = key_cmp(&db, a, b); - if (r == 0) { - if (amsn.msn > bmsn.msn) { - r = +1; - } else if (amsn.msn < bmsn.msn) { - r = -1; - } else { - r = 0; - } - } - return r; + toku::context inject_ctx(CTX_MESSAGE_INJECTION); + FTNODE node; + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode(ft, cachekey, fullhash, &bfe, PL_WRITE_CHEAP, &node, true); + toku_ftnode_assert_fully_in_memory(node); + paranoid_invariant(node->fullhash==fullhash); + ft_verify_flags(ft, node); + inject_message_in_locked_node(ft, node, -1, msg, flow_deltas, gc_info); } -int -toku_fifo_entry_key_msn_heaviside(const int32_t &offset, const struct toku_fifo_entry_key_msn_heaviside_extra &extra) +__attribute__((const)) +static inline bool should_inject_in_node(seqinsert_loc loc, int height, int depth) +// We should inject directly in a node if: +// - it's a leaf, or +// - it's a height 1 node not at either extreme, or +// - it's a depth 2 node not at either extreme { - const struct fifo_entry *query = toku_fifo_get_entry(extra.fifo, offset); - DBT qdbt; - const DBT *query_key = fill_dbt_for_fifo_entry(&qdbt, query); - const DBT *target_key = extra.key; - return key_msn_cmp(query_key, target_key, query->msn, extra.msn, - extra.desc, extra.cmp); + return (height == 0 || (loc == NEITHER_EXTREME && (height <= 1 || depth >= 2))); } -int -toku_fifo_entry_key_msn_cmp(const struct toku_fifo_entry_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo) +static void ft_set_or_verify_rightmost_blocknum(FT ft, BLOCKNUM b) +// Given: 'b', the _definitive_ and constant rightmost blocknum of 'ft' { - const struct fifo_entry *a = toku_fifo_get_entry(extra.fifo, ao); - const struct fifo_entry *b = toku_fifo_get_entry(extra.fifo, bo); - DBT adbt, bdbt; - const DBT *akey = fill_dbt_for_fifo_entry(&adbt, a); - const DBT *bkey = fill_dbt_for_fifo_entry(&bdbt, b); - return key_msn_cmp(akey, bkey, a->msn, b->msn, - extra.desc, extra.cmp); + if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) { + toku_ft_lock(ft); + if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) { + ft->rightmost_blocknum = b; + } + toku_ft_unlock(ft); + } + // The rightmost blocknum only transitions from RESERVED_BLOCKNUM_NULL to non-null. + // If it's already set, verify that the stored value is consistent with 'b' + invariant(ft->rightmost_blocknum.b == b.b); +} + +bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) { + static const double factor = 0.125; + const uint64_t flow_threshold = ft->h->nodesize * factor; + return bnc->flow[0] >= flow_threshold || bnc->flow[1] >= flow_threshold; } -void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp) -// Effect: Enqueue the message represented by the parameters into the -// bnc's buffer, and put it in either the fresh or stale message tree, -// or the broadcast list. +static void push_something_in_subtree( + FT ft, + FTNODE subtree_root, + int target_childnum, + const ft_msg &msg, + size_t flow_deltas[], + txn_gc_info *gc_info, + int depth, + seqinsert_loc loc, + bool just_did_split_or_merge + ) +// Effects: +// Assign message an MSN from ft->h. +// Put message in the subtree rooted at node. Due to promotion the message may not be injected directly in this node. +// Unlock node or schedule it to be unlocked (after a background flush). +// Either way, the caller is not responsible for unlocking node. +// Requires: +// subtree_root is read locked and fully in memory. +// Notes: +// In Ming, the basic rules of promotion are as follows: +// Don't promote broadcast messages. +// Don't promote past non-empty buffers. +// Otherwise, promote at most to height 1 or depth 2 (whichever is highest), as far as the birdie asks you to promote. +// We don't promote to leaves because injecting into leaves is expensive, mostly because of #5605 and some of #5552. +// We don't promote past depth 2 because we found that gives us enough parallelism without costing us too much pinning work. +// +// This is true with the following caveats: +// We always promote all the way to the leaves on the rightmost and leftmost edges of the tree, for sequential insertions. +// (That means we can promote past depth 2 near the edges of the tree.) // -// This is only exported for tests. +// When the birdie is still saying we should promote, we use get_and_pin so that we wait to get the node. +// If the birdie doesn't say to promote, we try maybe_get_and_pin. If we get the node cheaply, and it's dirty, we promote anyway. { - int32_t offset; - int r = toku_fifo_enq(bnc->buffer, key, keylen, data, datalen, type, msn, xids, is_fresh, &offset); - assert_zero(r); - if (ft_msg_type_applies_once(type)) { - DBT keydbt; - struct toku_fifo_entry_key_msn_heaviside_extra extra = { .desc = desc, .cmp = cmp, .fifo = bnc->buffer, .key = toku_fill_dbt(&keydbt, key, keylen), .msn = msn }; - if (is_fresh) { - r = bnc->fresh_message_tree.insert(offset, extra, nullptr); - assert_zero(r); - } else { - r = bnc->stale_message_tree.insert(offset, extra, nullptr); - assert_zero(r); + toku_ftnode_assert_fully_in_memory(subtree_root); + if (should_inject_in_node(loc, subtree_root->height, depth)) { + switch (depth) { + case 0: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break; + case 1: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break; + case 2: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break; + case 3: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break; + default: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break; + } + // If the target node is a non-root leaf node on the right extreme, + // set the rightmost blocknum. We know there are no messages above us + // because promotion would not chose to inject directly into this leaf + // otherwise. We explicitly skip the root node because then we don't have + // to worry about changing the rightmost blocknum when the root splits. + if (subtree_root->height == 0 && loc == RIGHT_EXTREME && subtree_root->blocknum.b != ft->h->root_blocknum.b) { + ft_set_or_verify_rightmost_blocknum(ft, subtree_root->blocknum); } + inject_message_in_locked_node(ft, subtree_root, target_childnum, msg, flow_deltas, gc_info); } else { - invariant(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)); - const uint32_t idx = bnc->broadcast_list.size(); - r = bnc->broadcast_list.insert_at(offset, idx); - assert_zero(r); - } -} + int r; + int childnum; + NONLEAF_CHILDINFO bnc; -// append a msg to a nonleaf node's child buffer -// should be static, but used by test programs -void toku_ft_append_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) { - paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL); - toku_bnc_insert_msg(BNC(node, childnum), key->data, key->size, val->data, val->size, type, msn, xids, is_fresh, desc, compare_fun); - node->dirty = 1; -} - -static void ft_nonleaf_msg_once_to_child(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[]) -// Previously we had passive aggressive promotion, but that causes a lot of I/O a the checkpoint. So now we are just putting it in the buffer here. -// Also we don't worry about the node getting overfull here. It's the caller's problem. -{ - unsigned int childnum = (target_childnum >= 0 - ? target_childnum - : toku_ftnode_which_child(node, msg->u.id.key, desc, compare_fun)); - toku_ft_append_to_child_buffer(compare_fun, desc, node, childnum, msg->type, msg->msn, msg->xids, is_fresh, msg->u.id.key, msg->u.id.val); - NONLEAF_CHILDINFO bnc = BNC(node, childnum); - bnc->flow[0] += flow_deltas[0]; - bnc->flow[1] += flow_deltas[1]; -} - -/* Find the leftmost child that may contain the key. - * If the key exists it will be in the child whose number - * is the return value of this function. - */ -int toku_ftnode_which_child(FTNODE node, const DBT *k, - DESCRIPTOR desc, ft_compare_func cmp) { - // a funny case of no pivots - if (node->n_children <= 1) return 0; + // toku_ft_root_put_msg should not have called us otherwise. + paranoid_invariant(ft_msg_type_applies_once(msg.type())); - // check the last key to optimize seq insertions - int n = node->n_children-1; - int c = ft_compare_pivot(desc, cmp, k, &node->childkeys[n-1]); - if (c > 0) return n; + childnum = (target_childnum >= 0 ? target_childnum + : toku_ftnode_which_child(subtree_root, msg.kdbt(), ft->cmp)); + bnc = BNC(subtree_root, childnum); - // binary search the pivots - int lo = 0; - int hi = n-1; // skip the last one, we checked it above - int mi; - while (lo < hi) { - mi = (lo + hi) / 2; - c = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]); - if (c > 0) { - lo = mi+1; - continue; - } - if (c < 0) { - hi = mi; - continue; + if (toku_bnc_n_entries(bnc) > 0) { + // The buffer is non-empty, give up on promoting. + STATUS_INC(FT_PRO_NUM_STOP_NONEMPTY_BUF, 1); + goto relock_and_push_here; } - return mi; - } - return lo; -} -// Used for HOT. -int -toku_ftnode_hot_next_child(FTNODE node, - const DBT *k, - DESCRIPTOR desc, - ft_compare_func cmp) { - int low = 0; - int hi = node->n_children - 1; - int mi; - while (low < hi) { - mi = (low + hi) / 2; - int r = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]); - if (r > 0) { - low = mi + 1; - } else if (r < 0) { - hi = mi; + seqinsert_loc next_loc; + if ((loc & LEFT_EXTREME) && childnum == 0) { + next_loc = LEFT_EXTREME; + } else if ((loc & RIGHT_EXTREME) && childnum == subtree_root->n_children - 1) { + next_loc = RIGHT_EXTREME; } else { - // if they were exactly equal, then we want the sub-tree under - // the next pivot. - return mi + 1; - } - } - invariant(low == hi); - return low; -} - -// TODO Use this function to clean up other places where bits of messages are passed around -// such as toku_bnc_insert_msg() and the call stack above it. -static uint64_t -ft_msg_size(FT_MSG msg) { - size_t keyval_size = msg->u.id.key->size + msg->u.id.val->size; - size_t xids_size = xids_get_serialize_size(msg->xids); - return keyval_size + KEY_VALUE_OVERHEAD + FT_MSG_OVERHEAD + xids_size; -} + next_loc = NEITHER_EXTREME; + } -static void -ft_nonleaf_msg_all(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, FT_MSG msg, bool is_fresh, size_t flow_deltas[]) -// Effect: Put the message into a nonleaf node. We put it into all children, possibly causing the children to become reactive. -// We don't do the splitting and merging. That's up to the caller after doing all the puts it wants to do. -// The re_array[i] gets set to the reactivity of any modified child i. (And there may be several such children.) -{ - for (int i = 0; i < node->n_children; i++) { - ft_nonleaf_msg_once_to_child(compare_fun, desc, node, i, msg, is_fresh, flow_deltas); - } -} + if (next_loc == NEITHER_EXTREME && subtree_root->height <= 1) { + // Never promote to leaf nodes except on the edges + STATUS_INC(FT_PRO_NUM_STOP_H1, 1); + goto relock_and_push_here; + } -static bool -ft_msg_applies_once(FT_MSG msg) -{ - return ft_msg_type_applies_once(msg->type); -} + { + const BLOCKNUM child_blocknum = BP_BLOCKNUM(subtree_root, childnum); + ft->blocktable.verify_blocknum_allocated(child_blocknum); + const uint32_t child_fullhash = toku_cachetable_hash(ft->cf, child_blocknum); -static bool -ft_msg_applies_all(FT_MSG msg) -{ - return ft_msg_type_applies_all(msg->type); -} + FTNODE child; + { + const int child_height = subtree_root->height - 1; + const int child_depth = depth + 1; + // If we're locking a leaf, or a height 1 node or depth 2 + // node in the middle, we know we won't promote further + // than that, so just get a write lock now. + const pair_lock_type lock_type = (should_inject_in_node(next_loc, child_height, child_depth) + ? PL_WRITE_CHEAP + : PL_READ); + if (next_loc != NEITHER_EXTREME || (toku_bnc_should_promote(ft, bnc) && depth <= 1)) { + // If we're on either extreme, or the birdie wants to + // promote and we're in the top two levels of the + // tree, don't stop just because someone else has the + // node locked. + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + if (lock_type == PL_WRITE_CHEAP) { + // We intend to take the write lock for message injection + toku::context inject_ctx(CTX_MESSAGE_INJECTION); + toku_pin_ftnode(ft, child_blocknum, child_fullhash, &bfe, lock_type, &child, true); + } else { + // We're going to keep promoting + toku::context promo_ctx(CTX_PROMO); + toku_pin_ftnode(ft, child_blocknum, child_fullhash, &bfe, lock_type, &child, true); + } + } else { + r = toku_maybe_pin_ftnode_clean(ft, child_blocknum, child_fullhash, lock_type, &child); + if (r != 0) { + // We couldn't get the child cheaply, so give up on promoting. + STATUS_INC(FT_PRO_NUM_STOP_LOCK_CHILD, 1); + goto relock_and_push_here; + } + if (toku_ftnode_fully_in_memory(child)) { + // toku_pin_ftnode... touches the clock but toku_maybe_pin_ftnode... doesn't. + // This prevents partial eviction. + for (int i = 0; i < child->n_children; ++i) { + BP_TOUCH_CLOCK(child, i); + } + } else { + // We got the child, but it's not fully in memory. Give up on promoting. + STATUS_INC(FT_PRO_NUM_STOP_CHILD_INMEM, 1); + goto unlock_child_and_push_here; + } + } + } + paranoid_invariant_notnull(child); -static bool -ft_msg_does_nothing(FT_MSG msg) -{ - return ft_msg_type_does_nothing(msg->type); -} + if (!just_did_split_or_merge) { + BLOCKNUM subtree_root_blocknum = subtree_root->blocknum; + uint32_t subtree_root_fullhash = toku_cachetable_hash(ft->cf, subtree_root_blocknum); + const bool did_split_or_merge = process_maybe_reactive_child(ft, subtree_root, child, childnum, loc); + if (did_split_or_merge) { + // Need to re-pin this node and try at this level again. + FTNODE newparent; + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); // should be fully in memory, we just split it + toku_pin_ftnode(ft, subtree_root_blocknum, subtree_root_fullhash, &bfe, PL_READ, &newparent, true); + push_something_in_subtree(ft, newparent, -1, msg, flow_deltas, gc_info, depth, loc, true); + return; + } + } -static void -ft_nonleaf_put_msg(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG msg, bool is_fresh, size_t flow_deltas[]) -// Effect: Put the message into a nonleaf node. We may put it into a child, possibly causing the child to become reactive. -// We don't do the splitting and merging. That's up to the caller after doing all the puts it wants to do. -// The re_array[i] gets set to the reactivity of any modified child i. (And there may be several such children.) -// -{ + if (next_loc != NEITHER_EXTREME || child->dirty || toku_bnc_should_promote(ft, bnc)) { + push_something_in_subtree(ft, child, -1, msg, flow_deltas, gc_info, depth + 1, next_loc, false); + toku_sync_fetch_and_add(&bnc->flow[0], flow_deltas[0]); + // The recursive call unpinned the child, but + // we're responsible for unpinning subtree_root. + toku_unpin_ftnode_read_only(ft, subtree_root); + return; + } - // - // see comments in toku_ft_leaf_apply_msg - // to understand why we handle setting - // node->max_msn_applied_to_node_on_disk here, - // and don't do it in toku_ft_node_put_msg - // - MSN msg_msn = msg->msn; - invariant(msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn); - node->max_msn_applied_to_node_on_disk = msg_msn; - - if (ft_msg_applies_once(msg)) { - ft_nonleaf_msg_once_to_child(compare_fun, desc, node, target_childnum, msg, is_fresh, flow_deltas); - } else if (ft_msg_applies_all(msg)) { - ft_nonleaf_msg_all(compare_fun, desc, node, msg, is_fresh, flow_deltas); - } else { - paranoid_invariant(ft_msg_does_nothing(msg)); + STATUS_INC(FT_PRO_NUM_DIDNT_WANT_PROMOTE, 1); + unlock_child_and_push_here: + // We locked the child, but we decided not to promote. + // Unlock the child, and fall through to the next case. + toku_unpin_ftnode_read_only(ft, child); + } + relock_and_push_here: + // Give up on promoting. + // We have subtree_root read-locked and we don't have a child locked. + // Drop the read lock, grab a write lock, and inject here. + { + // Right now we have a read lock on subtree_root, but we want + // to inject into it so we get a write lock instead. + BLOCKNUM subtree_root_blocknum = subtree_root->blocknum; + uint32_t subtree_root_fullhash = toku_cachetable_hash(ft->cf, subtree_root_blocknum); + toku_unpin_ftnode_read_only(ft, subtree_root); + switch (depth) { + case 0: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break; + case 1: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break; + case 2: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break; + case 3: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break; + default: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break; + } + inject_message_at_this_blocknum(ft, subtree_root_blocknum, subtree_root_fullhash, msg, flow_deltas, gc_info); + } } } -// Garbage collect one leaf entry. -static void -ft_basement_node_gc_once(BASEMENTNODE bn, - uint32_t index, - void* keyp, - uint32_t keylen, - LEAFENTRY leaf_entry, - txn_gc_info *gc_info, - STAT64INFO_S * delta) -{ - paranoid_invariant(leaf_entry); +void toku_ft_root_put_msg( + FT ft, + const ft_msg &msg, + txn_gc_info *gc_info + ) +// Effect: +// - assign msn to message and update msn in the header +// - push the message into the ft - // Don't run garbage collection on non-mvcc leaf entries. - if (leaf_entry->type != LE_MVCC) { - goto exit; - } +// As of Clayface, the root blocknum is a constant, so preventing a +// race between message injection and the split of a root is the job +// of the cachetable's locking rules. +// +// We also hold the MO lock for a number of reasons, but an important +// one is to make sure that a begin_checkpoint may not start while +// this code is executing. A begin_checkpoint does (at least) two things +// that can interfere with the operations here: +// - Copies the header to a checkpoint header. Because we may change +// the max_msn_in_ft below, we don't want the header to be copied in +// the middle of these operations. +// - Takes note of the log's LSN. Because this put operation has +// already been logged, this message injection must be included +// in any checkpoint that contains this put's logentry. +// Holding the mo lock throughout this function ensures that fact. +{ + toku::context promo_ctx(CTX_PROMO); - // Don't run garbage collection if this leafentry decides it's not worth it. - if (!toku_le_worth_running_garbage_collection(leaf_entry, gc_info)) { - goto exit; + // blackhole fractal trees drop all messages, so do nothing. + if (ft->blackhole) { + return; } - LEAFENTRY new_leaf_entry; - new_leaf_entry = NULL; - - // The mempool doesn't free itself. When it allocates new memory, - // this pointer will be set to the older memory that must now be - // freed. - void * maybe_free; - maybe_free = NULL; - - // These will represent the number of bytes and rows changed as - // part of the garbage collection. - int64_t numbytes_delta; - int64_t numrows_delta; - toku_le_garbage_collect(leaf_entry, - &bn->data_buffer, - index, - keyp, - keylen, - gc_info, - &new_leaf_entry, - &numbytes_delta); - - numrows_delta = 0; - if (new_leaf_entry) { - numrows_delta = 0; - } else { - numrows_delta = -1; - } + FTNODE node; - // If we created a new mempool buffer we must free the - // old/original buffer. - if (maybe_free) { - toku_free(maybe_free); - } + uint32_t fullhash; + CACHEKEY root_key; + toku_calculate_root_offset_pointer(ft, &root_key, &fullhash); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); - // Update stats. - bn->stat64_delta.numrows += numrows_delta; - bn->stat64_delta.numbytes += numbytes_delta; - delta->numrows += numrows_delta; - delta->numbytes += numbytes_delta; + size_t flow_deltas[] = { message_buffer::msg_memsize_in_buffer(msg), 0 }; -exit: - return; -} + pair_lock_type lock_type; + lock_type = PL_READ; // try first for a read lock + // If we need to split the root, we'll have to change from a read lock + // to a write lock and check again. We change the variable lock_type + // and jump back to here. + change_lock_type: + // get the root node + toku_pin_ftnode(ft, root_key, fullhash, &bfe, lock_type, &node, true); + toku_ftnode_assert_fully_in_memory(node); + paranoid_invariant(node->fullhash==fullhash); + ft_verify_flags(ft, node); -// Garbage collect all leaf entries for a given basement node. -static void -basement_node_gc_all_les(BASEMENTNODE bn, - txn_gc_info *gc_info, - STAT64INFO_S * delta) -{ - int r = 0; - uint32_t index = 0; - uint32_t num_leafentries_before; - while (index < (num_leafentries_before = bn->data_buffer.num_klpairs())) { - void* keyp = NULL; - uint32_t keylen = 0; - LEAFENTRY leaf_entry; - r = bn->data_buffer.fetch_klpair(index, &leaf_entry, &keylen, &keyp); - assert_zero(r); - ft_basement_node_gc_once( - bn, - index, - keyp, - keylen, - leaf_entry, - gc_info, - delta - ); - // Check if the leaf entry was deleted or not. - if (num_leafentries_before == bn->data_buffer.num_klpairs()) { - ++index; + // First handle a reactive root. + // This relocking for split algorithm will cause every message + // injection thread to change lock type back and forth, when only one + // of them needs to in order to handle the split. That's not great, + // but root splits are incredibly rare. + enum reactivity re = toku_ftnode_get_reactivity(ft, node); + switch (re) { + case RE_STABLE: + case RE_FUSIBLE: // cannot merge anything at the root + if (lock_type != PL_READ) { + // We thought we needed to split, but someone else got to + // it before us. Downgrade to a read lock. + toku_unpin_ftnode_read_only(ft, node); + lock_type = PL_READ; + goto change_lock_type; + } + break; + case RE_FISSIBLE: + if (lock_type == PL_READ) { + // Here, we only have a read lock on the root. In order + // to split it, we need a write lock, but in the course of + // gaining the write lock, someone else may have gotten in + // before us and split it. So we upgrade to a write lock + // and check again. + toku_unpin_ftnode_read_only(ft, node); + lock_type = PL_WRITE_CHEAP; + goto change_lock_type; + } else { + // We have a write lock, now we can split. + ft_init_new_root(ft, node, &node); + // Then downgrade back to a read lock, and we can finally + // do the injection. + toku_unpin_ftnode(ft, node); + lock_type = PL_READ; + STATUS_INC(FT_PRO_NUM_ROOT_SPLIT, 1); + goto change_lock_type; } + break; } -} + // If we get to here, we have a read lock and the root doesn't + // need to be split. It's safe to inject the message. + paranoid_invariant(lock_type == PL_READ); + // We cannot assert that we have the read lock because frwlock asserts + // that its mutex is locked when we check if there are any readers. + // That wouldn't give us a strong guarantee that we have the read lock + // anyway. -// Garbage collect all leaf entires in all basement nodes. -static void -ft_leaf_gc_all_les(FT ft, FTNODE node, txn_gc_info *gc_info) -{ - toku_assert_entire_node_in_memory(node); - paranoid_invariant_zero(node->height); - // Loop through each leaf entry, garbage collecting as we go. - for (int i = 0; i < node->n_children; ++i) { - // Perform the garbage collection. - BASEMENTNODE bn = BLB(node, i); - STAT64INFO_S delta; - delta.numrows = 0; - delta.numbytes = 0; - basement_node_gc_all_les(bn, gc_info, &delta); - toku_ft_update_stats(&ft->in_memory_stats, delta); - } -} - -static void -ft_leaf_run_gc(FT ft, FTNODE node) { - TOKULOGGER logger = toku_cachefile_logger(ft->cf); - if (logger) { - TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger); - txn_manager_state txn_state_for_gc(txn_manager); - txn_state_for_gc.init(); - TXNID oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager); - - // Perform full garbage collection. - // - // - txn_state_for_gc - // a fresh snapshot of the transaction system. - // - oldest_referenced_xid_for_simple_gc - // the oldest xid in any live list as of right now - suitible for simple gc - // - node->oldest_referenced_xid_known - // the last known oldest referenced xid for this node and any unapplied messages. - // it is a lower bound on the actual oldest referenced xid - but becasue there - // may be abort messages above us, we need to be careful to only use this value - // for implicit promotion (as opposed to the oldest referenced xid for simple gc) - // - // The node has its own oldest referenced xid because it must be careful not to implicitly promote - // provisional entries for transactions that are no longer live, but may have abort messages - // somewhere above us in the tree. - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_for_simple_gc, - node->oldest_referenced_xid_known, - true); - ft_leaf_gc_all_les(ft, node, &gc_info); + // Now, either inject here or promote. We decide based on a heuristic: + if (node->height == 0 || !ft_msg_type_applies_once(msg.type())) { + // If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here. + toku_unpin_ftnode_read_only(ft, node); + STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1); + inject_message_at_this_blocknum(ft, root_key, fullhash, msg, flow_deltas, gc_info); + } else if (node->height > 1) { + // If the root's above height 1, we are definitely eligible for promotion. + push_something_in_subtree(ft, node, -1, msg, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false); + } else { + // The root's height 1. We may be eligible for promotion here. + // On the extremes, we want to promote, in the middle, we don't. + int childnum = toku_ftnode_which_child(node, msg.kdbt(), ft->cmp); + if (childnum == 0 || childnum == node->n_children - 1) { + // On the extremes, promote. We know which childnum we're going to, so pass that down too. + push_something_in_subtree(ft, node, childnum, msg, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false); + } else { + // At height 1 in the middle, don't promote, drop the read lock and inject here. + toku_unpin_ftnode_read_only(ft, node); + STATUS_INC(FT_PRO_NUM_ROOT_H1_INJECT, 1); + inject_message_at_this_blocknum(ft, root_key, fullhash, msg, flow_deltas, gc_info); + } } } -void toku_bnc_flush_to_child( - FT ft, - NONLEAF_CHILDINFO bnc, - FTNODE child, - TXNID parent_oldest_referenced_xid_known - ) +// TODO: Remove me, I'm boring. +static int ft_compare_keys(FT ft, const DBT *a, const DBT *b) +// Effect: Compare two keys using the given fractal tree's comparator/descriptor { - paranoid_invariant(bnc); - STAT64INFO_S stats_delta = {0,0}; - size_t remaining_memsize = toku_fifo_buffer_size_in_use(bnc->buffer); - - TOKULOGGER logger = toku_cachefile_logger(ft->cf); - TXN_MANAGER txn_manager = logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr; - TXNID oldest_referenced_xid_for_simple_gc = TXNID_NONE; - - txn_manager_state txn_state_for_gc(txn_manager); - bool do_garbage_collection = child->height == 0 && txn_manager != nullptr; - if (do_garbage_collection) { - txn_state_for_gc.init(); - oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager); - } - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_for_simple_gc, - child->oldest_referenced_xid_known, - true); - FIFO_ITERATE( - bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - ({ - DBT hk,hv; - FT_MSG_S ftmsg = { type, msn, xids, .u = { .id = { toku_fill_dbt(&hk, key, keylen), - toku_fill_dbt(&hv, val, vallen) } } }; - size_t flow_deltas[] = { 0, 0 }; - if (remaining_memsize <= bnc->flow[0]) { - // this message is in the current checkpoint's worth of - // the end of the fifo - flow_deltas[0] = FIFO_CURRENT_ENTRY_MEMSIZE; - } else if (remaining_memsize <= bnc->flow[0] + bnc->flow[1]) { - // this message is in the last checkpoint's worth of the - // end of the fifo - flow_deltas[1] = FIFO_CURRENT_ENTRY_MEMSIZE; - } - toku_ft_node_put_msg( - ft->compare_fun, - ft->update_fun, - &ft->cmp_descriptor, - child, - -1, - &ftmsg, - is_fresh, - &gc_info, - flow_deltas, - &stats_delta - ); - remaining_memsize -= FIFO_CURRENT_ENTRY_MEMSIZE; - })); - child->oldest_referenced_xid_known = parent_oldest_referenced_xid_known; - - invariant(remaining_memsize == 0); - if (stats_delta.numbytes || stats_delta.numrows) { - toku_ft_update_stats(&ft->in_memory_stats, stats_delta); - } - if (do_garbage_collection) { - size_t buffsize = toku_fifo_buffer_size_in_use(bnc->buffer); - STATUS_INC(FT_MSG_BYTES_OUT, buffsize); - // may be misleading if there's a broadcast message in there - STATUS_INC(FT_MSG_BYTES_CURR, -buffsize); - } + return ft->cmp(a, b); } -bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) { - static const double factor = 0.125; - const uint64_t flow_threshold = ft->h->nodesize * factor; - return bnc->flow[0] >= flow_threshold || bnc->flow[1] >= flow_threshold; +static LEAFENTRY bn_get_le_and_key(BASEMENTNODE bn, int idx, DBT *key) +// Effect: Gets the i'th leafentry from the given basement node and +// fill its key in *key +// Requires: The i'th leafentry exists. +{ + LEAFENTRY le; + uint32_t le_len; + void *le_key; + int r = bn->data_buffer.fetch_klpair(idx, &le, &le_len, &le_key); + invariant_zero(r); + toku_fill_dbt(key, le_key, le_len); + return le; } -void -toku_ft_node_put_msg ( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - FTNODE node, - int target_childnum, - FT_MSG msg, - bool is_fresh, - txn_gc_info *gc_info, - size_t flow_deltas[], - STAT64INFO stats_to_update - ) -// Effect: Push message into the subtree rooted at NODE. -// If NODE is a leaf, then -// put message into leaf, applying it to the leafentries -// If NODE is a nonleaf, then push the message into the FIFO(s) of the relevent child(ren). -// The node may become overfull. That's not our problem. +static LEAFENTRY ft_leaf_leftmost_le_and_key(FTNODE leaf, DBT *leftmost_key) +// Effect: If a leftmost key exists in the given leaf, toku_fill_dbt() +// the key into *leftmost_key +// Requires: Leaf is fully in memory and pinned for read or write. +// Return: leafentry if it exists, nullptr otherwise { - toku_assert_entire_node_in_memory(node); - // - // see comments in toku_ft_leaf_apply_msg - // to understand why we don't handle setting - // node->max_msn_applied_to_node_on_disk here, - // and instead defer to these functions - // - if (node->height==0) { - toku_ft_leaf_apply_msg(compare_fun, update_fun, desc, node, target_childnum, msg, gc_info, nullptr, stats_to_update); - } else { - ft_nonleaf_put_msg(compare_fun, desc, node, target_childnum, msg, is_fresh, flow_deltas); + for (int i = 0; i < leaf->n_children; i++) { + BASEMENTNODE bn = BLB(leaf, i); + if (bn->data_buffer.num_klpairs() > 0) { + // Get the first (leftmost) leafentry and its key + return bn_get_le_and_key(bn, 0, leftmost_key); + } } + return nullptr; } -static const struct pivot_bounds infinite_bounds = {.lower_bound_exclusive=NULL, - .upper_bound_inclusive=NULL}; - - -// Effect: applies the message to the leaf if the appropriate basement node is in memory. -// This function is called during message injection and/or flushing, so the entire -// node MUST be in memory. -void toku_ft_leaf_apply_msg( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - FTNODE node, - int target_childnum, // which child to inject to, or -1 if unknown - FT_MSG msg, - txn_gc_info *gc_info, - uint64_t *workdone, - STAT64INFO stats_to_update - ) +static LEAFENTRY ft_leaf_rightmost_le_and_key(FTNODE leaf, DBT *rightmost_key) +// Effect: If a rightmost key exists in the given leaf, toku_fill_dbt() +// the key into *rightmost_key +// Requires: Leaf is fully in memory and pinned for read or write. +// Return: leafentry if it exists, nullptr otherwise { - VERIFY_NODE(t, node); - toku_assert_entire_node_in_memory(node); + for (int i = leaf->n_children - 1; i >= 0; i--) { + BASEMENTNODE bn = BLB(leaf, i); + size_t num_les = bn->data_buffer.num_klpairs(); + if (num_les > 0) { + // Get the last (rightmost) leafentry and its key + return bn_get_le_and_key(bn, num_les - 1, rightmost_key); + } + } + return nullptr; +} - // - // Because toku_ft_leaf_apply_msg is called with the intent of permanently - // applying a message to a leaf node (meaning the message is permanently applied - // and will be purged from the system after this call, as opposed to - // toku_apply_ancestors_messages_to_node, which applies a message - // for a query, but the message may still reside in the system and - // be reapplied later), we mark the node as dirty and - // take the opportunity to update node->max_msn_applied_to_node_on_disk. - // - node->dirty = 1; +static int ft_leaf_get_relative_key_pos(FT ft, FTNODE leaf, const DBT *key, bool *nondeleted_key_found, int *target_childnum) +// Effect: Determines what the relative position of the given key is with +// respect to a leaf node, and if it exists. +// Requires: Leaf is fully in memory and pinned for read or write. +// Requires: target_childnum is non-null +// Return: < 0 if key is less than the leftmost key in the leaf OR the relative position is unknown, for any reason. +// 0 if key is in the bounds [leftmost_key, rightmost_key] for this leaf or the leaf is empty +// > 0 if key is greater than the rightmost key in the leaf +// *nondeleted_key_found is set (if non-null) if the target key was found and is not deleted, unmodified otherwise +// *target_childnum is set to the child that (does or would) contain the key, if calculated, unmodified otherwise +{ + DBT rightmost_key; + LEAFENTRY rightmost_le = ft_leaf_rightmost_le_and_key(leaf, &rightmost_key); + if (rightmost_le == nullptr) { + // If we can't get a rightmost key then the leaf is empty. + // In such a case, we don't have any information about what keys would be in this leaf. + // We have to assume the leaf node that would contain this key is to the left. + return -1; + } + // We have a rightmost leafentry, so it must exist in some child node + invariant(leaf->n_children > 0); - // - // we cannot blindly update node->max_msn_applied_to_node_on_disk, - // we must check to see if the msn is greater that the one already stored, - // because the message may have already been applied earlier (via - // toku_apply_ancestors_messages_to_node) to answer a query - // - // This is why we handle node->max_msn_applied_to_node_on_disk both here - // and in ft_nonleaf_put_msg, as opposed to in one location, toku_ft_node_put_msg. - // - MSN msg_msn = msg->msn; - if (msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn) { - node->max_msn_applied_to_node_on_disk = msg_msn; - } - - if (ft_msg_applies_once(msg)) { - unsigned int childnum = (target_childnum >= 0 - ? target_childnum - : toku_ftnode_which_child(node, msg->u.id.key, desc, compare_fun)); - BASEMENTNODE bn = BLB(node, childnum); - if (msg->msn.msn > bn->max_msn_applied.msn) { - bn->max_msn_applied = msg->msn; - toku_ft_bn_apply_msg(compare_fun, - update_fun, - desc, - bn, - msg, - gc_info, - workdone, - stats_to_update); - } else { - STATUS_INC(FT_MSN_DISCARDS, 1); + int relative_pos = 0; + int c = ft_compare_keys(ft, key, &rightmost_key); + if (c > 0) { + relative_pos = 1; + *target_childnum = leaf->n_children - 1; + } else if (c == 0) { + if (nondeleted_key_found != nullptr && !le_latest_is_del(rightmost_le)) { + *nondeleted_key_found = true; } - } - else if (ft_msg_applies_all(msg)) { - for (int childnum=0; childnumn_children; childnum++) { - if (msg->msn.msn > BLB(node, childnum)->max_msn_applied.msn) { - BLB(node, childnum)->max_msn_applied = msg->msn; - toku_ft_bn_apply_msg(compare_fun, - update_fun, - desc, - BLB(node, childnum), - msg, - gc_info, - workdone, - stats_to_update); - } else { - STATUS_INC(FT_MSN_DISCARDS, 1); + relative_pos = 0; + *target_childnum = leaf->n_children - 1; + } else { + // The key is less than the rightmost. It may still be in bounds if it's >= the leftmost. + DBT leftmost_key; + LEAFENTRY leftmost_le = ft_leaf_leftmost_le_and_key(leaf, &leftmost_key); + invariant_notnull(leftmost_le); // Must exist because a rightmost exists + c = ft_compare_keys(ft, key, &leftmost_key); + if (c > 0) { + if (nondeleted_key_found != nullptr) { + // The caller wants to know if a nondeleted key can be found. + LEAFENTRY target_le; + int childnum = toku_ftnode_which_child(leaf, key, ft->cmp); + BASEMENTNODE bn = BLB(leaf, childnum); + struct toku_msg_leafval_heaviside_extra extra(ft->cmp, key); + int r = bn->data_buffer.find_zero( + extra, + &target_le, + nullptr, nullptr, nullptr + ); + *target_childnum = childnum; + if (r == 0 && !le_latest_is_del(leftmost_le)) { + *nondeleted_key_found = true; + } + } + relative_pos = 0; + } else if (c == 0) { + if (nondeleted_key_found != nullptr && !le_latest_is_del(leftmost_le)) { + *nondeleted_key_found = true; } + relative_pos = 0; + *target_childnum = 0; + } else { + relative_pos = -1; } } - else if (!ft_msg_does_nothing(msg)) { - abort(); - } - VERIFY_NODE(t, node); + + return relative_pos; } -static void inject_message_in_locked_node( - FT ft, - FTNODE node, - int childnum, - FT_MSG_S *msg, - size_t flow_deltas[], - txn_gc_info *gc_info - ) +static void ft_insert_directly_into_leaf(FT ft, FTNODE leaf, int target_childnum, DBT *key, DBT *val, + XIDS message_xids, enum ft_msg_type type, txn_gc_info *gc_info); +static int getf_nothing(uint32_t, const void *, uint32_t, const void *, void *, bool); + +static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS message_xids, enum ft_msg_type type, + txn_gc_info *gc_info, bool unique) +// Effect: Pins the rightmost leaf node and attempts to do an insert. +// There are three reasons why we may not succeed. +// - The rightmost leaf is too full and needs a split. +// - The key to insert is not within the provable bounds of this leaf node. +// - The key is within bounds, but it already exists. +// Return: 0 if this function did insert, DB_KEYEXIST if a unique key constraint exists and +// some nondeleted leafentry with the same key exists +// < 0 if this function did not insert, for a reason other than DB_KEYEXIST. +// Note: Treat this function as a possible, but not necessary, optimization for insert. +// Rationale: We want O(1) insertions down the rightmost path of the tree. { - // No guarantee that we're the writer, but oh well. - // TODO(leif): Implement "do I have the lock or is it someone else?" - // check in frwlock. Should be possible with TOKU_PTHREAD_DEBUG, nop - // otherwise. - invariant(toku_ctpair_is_write_locked(node->ct_pair)); - toku_assert_entire_node_in_memory(node); + int r = -1; - // Take the newer of the two oldest referenced xid values from the node and gc_info. - // The gc_info usually has a newer value, because we got it at the top of this call - // stack from the txn manager. But sometimes the node has a newer value, if some - // other thread sees a newer value and writes to this node before we got the lock. - if (gc_info->oldest_referenced_xid_for_implicit_promotion > node->oldest_referenced_xid_known) { - node->oldest_referenced_xid_known = gc_info->oldest_referenced_xid_for_implicit_promotion; - } else if (gc_info->oldest_referenced_xid_for_implicit_promotion < node->oldest_referenced_xid_known) { - gc_info->oldest_referenced_xid_for_implicit_promotion = node->oldest_referenced_xid_known; - } + uint32_t rightmost_fullhash; + BLOCKNUM rightmost_blocknum = ft->rightmost_blocknum; + FTNODE rightmost_leaf = nullptr; - // Get the MSN from the header. Now that we have a write lock on the - // node we're injecting into, we know no other thread will get an MSN - // after us and get that message into our subtree before us. - msg->msn.msn = toku_sync_add_and_fetch(&ft->h->max_msn_in_ft.msn, 1); - paranoid_invariant(msg->msn.msn > node->max_msn_applied_to_node_on_disk.msn); - STAT64INFO_S stats_delta = {0,0}; - toku_ft_node_put_msg( - ft->compare_fun, - ft->update_fun, - &ft->cmp_descriptor, - node, - childnum, - msg, - true, - gc_info, - flow_deltas, - &stats_delta - ); - if (stats_delta.numbytes || stats_delta.numrows) { - toku_ft_update_stats(&ft->in_memory_stats, stats_delta); + // Don't do the optimization if our heurstic suggests that + // insertion pattern is not sequential. + if (ft->seqinsert_score < FT_SEQINSERT_SCORE_THRESHOLD) { + goto cleanup; } - // - // assumption is that toku_ft_node_put_msg will - // mark the node as dirty. - // enforcing invariant here. - // - paranoid_invariant(node->dirty != 0); - // TODO: Why not at height 0? - // update some status variables - if (node->height != 0) { - uint64_t msgsize = ft_msg_size(msg); - STATUS_INC(FT_MSG_BYTES_IN, msgsize); - STATUS_INC(FT_MSG_BYTES_CURR, msgsize); - STATUS_INC(FT_MSG_NUM, 1); - if (ft_msg_applies_all(msg)) { - STATUS_INC(FT_MSG_NUM_BROADCAST, 1); - } - } + // We know the seqinsert score is high enough that we should + // attemp to directly insert into the right most leaf. Because + // the score is non-zero, the rightmost blocknum must have been + // set. See inject_message_in_locked_node(), which only increases + // the score if the target node blocknum == rightmost_blocknum + invariant(rightmost_blocknum.b != RESERVED_BLOCKNUM_NULL); - // verify that msn of latest message was captured in root node - paranoid_invariant(msg->msn.msn == node->max_msn_applied_to_node_on_disk.msn); + // Pin the rightmost leaf with a write lock. + rightmost_fullhash = toku_cachetable_hash(ft->cf, rightmost_blocknum); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode(ft, rightmost_blocknum, rightmost_fullhash, &bfe, PL_WRITE_CHEAP, &rightmost_leaf, true); - if (node->thisnodename.b == ft->rightmost_blocknum.b) { - if (ft->seqinsert_score < FT_SEQINSERT_SCORE_THRESHOLD) { - // we promoted to the rightmost leaf node and the seqinsert score has not yet saturated. - toku_sync_fetch_and_add(&ft->seqinsert_score, 1); - } - } else if (ft->seqinsert_score != 0) { - // we promoted to something other than the rightmost leaf node and the score should reset - ft->seqinsert_score = 0; - } - - // if we call toku_ft_flush_some_child, then that function unpins the root - // otherwise, we unpin ourselves - if (node->height > 0 && toku_ft_nonleaf_is_gorged(node, ft->h->nodesize)) { - toku_ft_flush_node_on_background_thread(ft, node); - } - else { - toku_unpin_ftnode(ft, node); - } -} - -// seqinsert_loc is a bitmask. -// The root counts as being both on the "left extreme" and on the "right extreme". -// Therefore, at the root, you're at LEFT_EXTREME | RIGHT_EXTREME. -typedef char seqinsert_loc; -static const seqinsert_loc NEITHER_EXTREME = 0; -static const seqinsert_loc LEFT_EXTREME = 1; -static const seqinsert_loc RIGHT_EXTREME = 2; - -static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int childnum, seqinsert_loc loc) -// Effect: -// If child needs to be split or merged, do that. -// parent and child will be unlocked if this happens -// Requires: parent and child are read locked -// Returns: -// true if relocking is needed -// false otherwise -{ - enum reactivity re = get_node_reactivity(ft, child); - enum reactivity newre; - BLOCKNUM child_blocknum; - uint32_t child_fullhash; - switch (re) { - case RE_STABLE: - return false; - case RE_FISSIBLE: - { - // We only have a read lock on the parent. We need to drop both locks, and get write locks. - BLOCKNUM parent_blocknum = parent->thisnodename; - uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum); - int parent_height = parent->height; - int parent_n_children = parent->n_children; - toku_unpin_ftnode_read_only(ft, child); - toku_unpin_ftnode_read_only(ft, parent); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - FTNODE newparent, newchild; - toku_pin_ftnode(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, &newparent, true); - if (newparent->height != parent_height || newparent->n_children != parent_n_children || - childnum >= newparent->n_children || toku_bnc_n_entries(BNC(newparent, childnum))) { - // If the height changed or childnum is now off the end, something clearly got split or merged out from under us. - // If something got injected in this node, then it got split or merged and we shouldn't be splitting it. - // But we already unpinned the child so we need to have the caller re-try the pins. - toku_unpin_ftnode_read_only(ft, newparent); - return true; - } - // It's ok to reuse the same childnum because if we get something - // else we need to split, well, that's crazy, but let's go ahead - // and split it. - child_blocknum = BP_BLOCKNUM(newparent, childnum); - child_fullhash = compute_child_fullhash(ft->cf, newparent, childnum); - toku_pin_ftnode_with_dep_nodes(ft, child_blocknum, child_fullhash, &bfe, PL_WRITE_CHEAP, 1, &newparent, &newchild, true); - newre = get_node_reactivity(ft, newchild); - if (newre == RE_FISSIBLE) { - enum split_mode split_mode; - if (newparent->height == 1 && (loc & LEFT_EXTREME) && childnum == 0) { - split_mode = SPLIT_RIGHT_HEAVY; - } else if (newparent->height == 1 && (loc & RIGHT_EXTREME) && childnum == newparent->n_children - 1) { - split_mode = SPLIT_LEFT_HEAVY; - } else { - split_mode = SPLIT_EVENLY; - } - toku_ft_split_child(ft, newparent, childnum, newchild, split_mode); - } else { - // some other thread already got it, just unpin and tell the - // caller to retry - toku_unpin_ftnode_read_only(ft, newchild); - toku_unpin_ftnode_read_only(ft, newparent); - } - return true; - } - case RE_FUSIBLE: - { - if (parent->height == 1) { - // prevent re-merging of recently unevenly-split nodes - if (((loc & LEFT_EXTREME) && childnum <= 1) || - ((loc & RIGHT_EXTREME) && childnum >= parent->n_children - 2)) { - return false; - } - } - - int parent_height = parent->height; - BLOCKNUM parent_blocknum = parent->thisnodename; - uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum); - toku_unpin_ftnode_read_only(ft, child); - toku_unpin_ftnode_read_only(ft, parent); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - FTNODE newparent, newchild; - toku_pin_ftnode(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, &newparent, true); - if (newparent->height != parent_height || childnum >= newparent->n_children) { - // looks like this is the root and it got merged, let's just start over (like in the split case above) - toku_unpin_ftnode_read_only(ft, newparent); - return true; - } - child_blocknum = BP_BLOCKNUM(newparent, childnum); - child_fullhash = compute_child_fullhash(ft->cf, newparent, childnum); - toku_pin_ftnode_with_dep_nodes(ft, child_blocknum, child_fullhash, &bfe, PL_READ, 1, &newparent, &newchild, true); - newre = get_node_reactivity(ft, newchild); - if (newre == RE_FUSIBLE && newparent->n_children >= 2) { - toku_unpin_ftnode_read_only(ft, newchild); - toku_ft_merge_child(ft, newparent, childnum); - } else { - // Could be a weird case where newparent has only one - // child. In this case, we want to inject here but we've - // already unpinned the caller's copy of parent so we have - // to ask them to re-pin, or they could (very rarely) - // dereferenced memory in a freed node. TODO: we could - // give them back the copy of the parent we pinned. - // - // Otherwise, some other thread already got it, just unpin - // and tell the caller to retry - toku_unpin_ftnode_read_only(ft, newchild); - toku_unpin_ftnode_read_only(ft, newparent); - } - return true; - } - } - abort(); -} - -static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t fullhash, FT_MSG_S *msg, size_t flow_deltas[], txn_gc_info *gc_info) -// Effect: -// Inject message into the node at this blocknum (cachekey). -// Gets a write lock on the node for you. -{ - toku::context inject_ctx(CTX_MESSAGE_INJECTION); - FTNODE node; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - toku_pin_ftnode(ft, cachekey, fullhash, &bfe, PL_WRITE_CHEAP, &node, true); - toku_assert_entire_node_in_memory(node); - paranoid_invariant(node->fullhash==fullhash); - ft_verify_flags(ft, node); - inject_message_in_locked_node(ft, node, -1, msg, flow_deltas, gc_info); -} - -__attribute__((const)) -static inline bool should_inject_in_node(seqinsert_loc loc, int height, int depth) -// We should inject directly in a node if: -// - it's a leaf, or -// - it's a height 1 node not at either extreme, or -// - it's a depth 2 node not at either extreme -{ - return (height == 0 || (loc == NEITHER_EXTREME && (height <= 1 || depth >= 2))); -} - -static void ft_set_or_verify_rightmost_blocknum(FT ft, BLOCKNUM b) -// Given: 'b', the _definitive_ and constant rightmost blocknum of 'ft' -{ - if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) { - toku_ft_lock(ft); - if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) { - ft->rightmost_blocknum = b; - } - toku_ft_unlock(ft); - } - // The rightmost blocknum only transitions from RESERVED_BLOCKNUM_NULL to non-null. - // If it's already set, verify that the stored value is consistent with 'b' - invariant(ft->rightmost_blocknum.b == b.b); -} - -static void push_something_in_subtree( - FT ft, - FTNODE subtree_root, - int target_childnum, - FT_MSG_S *msg, - size_t flow_deltas[], - txn_gc_info *gc_info, - int depth, - seqinsert_loc loc, - bool just_did_split_or_merge - ) -// Effects: -// Assign message an MSN from ft->h. -// Put message in the subtree rooted at node. Due to promotion the message may not be injected directly in this node. -// Unlock node or schedule it to be unlocked (after a background flush). -// Either way, the caller is not responsible for unlocking node. -// Requires: -// subtree_root is read locked and fully in memory. -// Notes: -// In Ming, the basic rules of promotion are as follows: -// Don't promote broadcast messages. -// Don't promote past non-empty buffers. -// Otherwise, promote at most to height 1 or depth 2 (whichever is highest), as far as the birdie asks you to promote. -// We don't promote to leaves because injecting into leaves is expensive, mostly because of #5605 and some of #5552. -// We don't promote past depth 2 because we found that gives us enough parallelism without costing us too much pinning work. -// -// This is true with the following caveats: -// We always promote all the way to the leaves on the rightmost and leftmost edges of the tree, for sequential insertions. -// (That means we can promote past depth 2 near the edges of the tree.) -// -// When the birdie is still saying we should promote, we use get_and_pin so that we wait to get the node. -// If the birdie doesn't say to promote, we try maybe_get_and_pin. If we get the node cheaply, and it's dirty, we promote anyway. -{ - toku_assert_entire_node_in_memory(subtree_root); - if (should_inject_in_node(loc, subtree_root->height, depth)) { - switch (depth) { - case 0: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break; - case 1: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break; - case 2: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break; - case 3: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break; - default: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break; - } - // If the target node is a non-root leaf node on the right extreme, - // set the rightmost blocknum. We know there are no messages above us - // because promotion would not chose to inject directly into this leaf - // otherwise. We explicitly skip the root node because then we don't have - // to worry about changing the rightmost blocknum when the root splits. - if (subtree_root->height == 0 && loc == RIGHT_EXTREME && subtree_root->thisnodename.b != ft->h->root_blocknum.b) { - ft_set_or_verify_rightmost_blocknum(ft, subtree_root->thisnodename); - } - inject_message_in_locked_node(ft, subtree_root, target_childnum, msg, flow_deltas, gc_info); - } else { - int r; - int childnum; - NONLEAF_CHILDINFO bnc; - - // toku_ft_root_put_msg should not have called us otherwise. - paranoid_invariant(ft_msg_applies_once(msg)); - - childnum = (target_childnum >= 0 ? target_childnum - : toku_ftnode_which_child(subtree_root, msg->u.id.key, &ft->cmp_descriptor, ft->compare_fun)); - bnc = BNC(subtree_root, childnum); - - if (toku_bnc_n_entries(bnc) > 0) { - // The buffer is non-empty, give up on promoting. - STATUS_INC(FT_PRO_NUM_STOP_NONEMPTY_BUF, 1); - goto relock_and_push_here; - } - - seqinsert_loc next_loc; - if ((loc & LEFT_EXTREME) && childnum == 0) { - next_loc = LEFT_EXTREME; - } else if ((loc & RIGHT_EXTREME) && childnum == subtree_root->n_children - 1) { - next_loc = RIGHT_EXTREME; - } else { - next_loc = NEITHER_EXTREME; - } - - if (next_loc == NEITHER_EXTREME && subtree_root->height <= 1) { - // Never promote to leaf nodes except on the edges - STATUS_INC(FT_PRO_NUM_STOP_H1, 1); - goto relock_and_push_here; - } - - { - const BLOCKNUM child_blocknum = BP_BLOCKNUM(subtree_root, childnum); - toku_verify_blocknum_allocated(ft->blocktable, child_blocknum); - const uint32_t child_fullhash = toku_cachetable_hash(ft->cf, child_blocknum); - - FTNODE child; - { - const int child_height = subtree_root->height - 1; - const int child_depth = depth + 1; - // If we're locking a leaf, or a height 1 node or depth 2 - // node in the middle, we know we won't promote further - // than that, so just get a write lock now. - const pair_lock_type lock_type = (should_inject_in_node(next_loc, child_height, child_depth) - ? PL_WRITE_CHEAP - : PL_READ); - if (next_loc != NEITHER_EXTREME || (toku_bnc_should_promote(ft, bnc) && depth <= 1)) { - // If we're on either extreme, or the birdie wants to - // promote and we're in the top two levels of the - // tree, don't stop just because someone else has the - // node locked. - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - if (lock_type == PL_WRITE_CHEAP) { - // We intend to take the write lock for message injection - toku::context inject_ctx(CTX_MESSAGE_INJECTION); - toku_pin_ftnode(ft, child_blocknum, child_fullhash, &bfe, lock_type, &child, true); - } else { - // We're going to keep promoting - toku::context promo_ctx(CTX_PROMO); - toku_pin_ftnode(ft, child_blocknum, child_fullhash, &bfe, lock_type, &child, true); - } - } else { - r = toku_maybe_pin_ftnode_clean(ft, child_blocknum, child_fullhash, lock_type, &child); - if (r != 0) { - // We couldn't get the child cheaply, so give up on promoting. - STATUS_INC(FT_PRO_NUM_STOP_LOCK_CHILD, 1); - goto relock_and_push_here; - } - if (is_entire_node_in_memory(child)) { - // toku_pin_ftnode... touches the clock but toku_maybe_pin_ftnode... doesn't. - // This prevents partial eviction. - for (int i = 0; i < child->n_children; ++i) { - BP_TOUCH_CLOCK(child, i); - } - } else { - // We got the child, but it's not fully in memory. Give up on promoting. - STATUS_INC(FT_PRO_NUM_STOP_CHILD_INMEM, 1); - goto unlock_child_and_push_here; - } - } - } - paranoid_invariant_notnull(child); - - if (!just_did_split_or_merge) { - BLOCKNUM subtree_root_blocknum = subtree_root->thisnodename; - uint32_t subtree_root_fullhash = toku_cachetable_hash(ft->cf, subtree_root_blocknum); - const bool did_split_or_merge = process_maybe_reactive_child(ft, subtree_root, child, childnum, loc); - if (did_split_or_merge) { - // Need to re-pin this node and try at this level again. - FTNODE newparent; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); // should be fully in memory, we just split it - toku_pin_ftnode(ft, subtree_root_blocknum, subtree_root_fullhash, &bfe, PL_READ, &newparent, true); - push_something_in_subtree(ft, newparent, -1, msg, flow_deltas, gc_info, depth, loc, true); - return; - } - } - - if (next_loc != NEITHER_EXTREME || child->dirty || toku_bnc_should_promote(ft, bnc)) { - push_something_in_subtree(ft, child, -1, msg, flow_deltas, gc_info, depth + 1, next_loc, false); - toku_sync_fetch_and_add(&bnc->flow[0], flow_deltas[0]); - // The recursive call unpinned the child, but - // we're responsible for unpinning subtree_root. - toku_unpin_ftnode_read_only(ft, subtree_root); - return; - } - - STATUS_INC(FT_PRO_NUM_DIDNT_WANT_PROMOTE, 1); - unlock_child_and_push_here: - // We locked the child, but we decided not to promote. - // Unlock the child, and fall through to the next case. - toku_unpin_ftnode_read_only(ft, child); - } - relock_and_push_here: - // Give up on promoting. - // We have subtree_root read-locked and we don't have a child locked. - // Drop the read lock, grab a write lock, and inject here. - { - // Right now we have a read lock on subtree_root, but we want - // to inject into it so we get a write lock instead. - BLOCKNUM subtree_root_blocknum = subtree_root->thisnodename; - uint32_t subtree_root_fullhash = toku_cachetable_hash(ft->cf, subtree_root_blocknum); - toku_unpin_ftnode_read_only(ft, subtree_root); - switch (depth) { - case 0: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break; - case 1: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break; - case 2: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break; - case 3: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break; - default: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break; - } - inject_message_at_this_blocknum(ft, subtree_root_blocknum, subtree_root_fullhash, msg, flow_deltas, gc_info); - } - } -} - -void toku_ft_root_put_msg( - FT ft, - FT_MSG_S *msg, - txn_gc_info *gc_info - ) -// Effect: -// - assign msn to message and update msn in the header -// - push the message into the ft - -// As of Clayface, the root blocknum is a constant, so preventing a -// race between message injection and the split of a root is the job -// of the cachetable's locking rules. -// -// We also hold the MO lock for a number of reasons, but an important -// one is to make sure that a begin_checkpoint may not start while -// this code is executing. A begin_checkpoint does (at least) two things -// that can interfere with the operations here: -// - Copies the header to a checkpoint header. Because we may change -// the max_msn_in_ft below, we don't want the header to be copied in -// the middle of these operations. -// - Takes note of the log's LSN. Because this put operation has -// already been logged, this message injection must be included -// in any checkpoint that contains this put's logentry. -// Holding the mo lock throughout this function ensures that fact. -{ - toku::context promo_ctx(CTX_PROMO); - - // blackhole fractal trees drop all messages, so do nothing. - if (ft->blackhole) { - return; - } - - FTNODE node; - - uint32_t fullhash; - CACHEKEY root_key; - toku_calculate_root_offset_pointer(ft, &root_key, &fullhash); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - - size_t flow_deltas[] = { toku_ft_msg_memsize_in_fifo(msg), 0 }; - - pair_lock_type lock_type; - lock_type = PL_READ; // try first for a read lock - // If we need to split the root, we'll have to change from a read lock - // to a write lock and check again. We change the variable lock_type - // and jump back to here. - change_lock_type: - // get the root node - toku_pin_ftnode(ft, root_key, fullhash, &bfe, lock_type, &node, true); - toku_assert_entire_node_in_memory(node); - paranoid_invariant(node->fullhash==fullhash); - ft_verify_flags(ft, node); - - // First handle a reactive root. - // This relocking for split algorithm will cause every message - // injection thread to change lock type back and forth, when only one - // of them needs to in order to handle the split. That's not great, - // but root splits are incredibly rare. - enum reactivity re = get_node_reactivity(ft, node); - switch (re) { - case RE_STABLE: - case RE_FUSIBLE: // cannot merge anything at the root - if (lock_type != PL_READ) { - // We thought we needed to split, but someone else got to - // it before us. Downgrade to a read lock. - toku_unpin_ftnode_read_only(ft, node); - lock_type = PL_READ; - goto change_lock_type; - } - break; - case RE_FISSIBLE: - if (lock_type == PL_READ) { - // Here, we only have a read lock on the root. In order - // to split it, we need a write lock, but in the course of - // gaining the write lock, someone else may have gotten in - // before us and split it. So we upgrade to a write lock - // and check again. - toku_unpin_ftnode_read_only(ft, node); - lock_type = PL_WRITE_CHEAP; - goto change_lock_type; - } else { - // We have a write lock, now we can split. - ft_init_new_root(ft, node, &node); - // Then downgrade back to a read lock, and we can finally - // do the injection. - toku_unpin_ftnode(ft, node); - lock_type = PL_READ; - STATUS_INC(FT_PRO_NUM_ROOT_SPLIT, 1); - goto change_lock_type; - } - break; - } - // If we get to here, we have a read lock and the root doesn't - // need to be split. It's safe to inject the message. - paranoid_invariant(lock_type == PL_READ); - // We cannot assert that we have the read lock because frwlock asserts - // that its mutex is locked when we check if there are any readers. - // That wouldn't give us a strong guarantee that we have the read lock - // anyway. - - // Now, either inject here or promote. We decide based on a heuristic: - if (node->height == 0 || !ft_msg_applies_once(msg)) { - // If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here. - toku_unpin_ftnode_read_only(ft, node); - STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1); - inject_message_at_this_blocknum(ft, root_key, fullhash, msg, flow_deltas, gc_info); - } else if (node->height > 1) { - // If the root's above height 1, we are definitely eligible for promotion. - push_something_in_subtree(ft, node, -1, msg, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false); - } else { - // The root's height 1. We may be eligible for promotion here. - // On the extremes, we want to promote, in the middle, we don't. - int childnum = toku_ftnode_which_child(node, msg->u.id.key, &ft->cmp_descriptor, ft->compare_fun); - if (childnum == 0 || childnum == node->n_children - 1) { - // On the extremes, promote. We know which childnum we're going to, so pass that down too. - push_something_in_subtree(ft, node, childnum, msg, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false); - } else { - // At height 1 in the middle, don't promote, drop the read lock and inject here. - toku_unpin_ftnode_read_only(ft, node); - STATUS_INC(FT_PRO_NUM_ROOT_H1_INJECT, 1); - inject_message_at_this_blocknum(ft, root_key, fullhash, msg, flow_deltas, gc_info); - } - } -} - -static int ft_compare_keys(FT ft, const DBT *a, const DBT *b) -// Effect: Compare two keys using the given fractal tree's comparator/descriptor -{ - FAKE_DB(db, &ft->cmp_descriptor); - return ft->compare_fun(&db, a, b); -} - -static LEAFENTRY bn_get_le_and_key(BASEMENTNODE bn, int idx, DBT *key) -// Effect: Gets the i'th leafentry from the given basement node and -// fill its key in *key -// Requires: The i'th leafentry exists. -{ - LEAFENTRY le; - uint32_t le_len; - void *le_key; - int r = bn->data_buffer.fetch_klpair(idx, &le, &le_len, &le_key); - invariant_zero(r); - toku_fill_dbt(key, le_key, le_len); - return le; -} - -static LEAFENTRY ft_leaf_leftmost_le_and_key(FTNODE leaf, DBT *leftmost_key) -// Effect: If a leftmost key exists in the given leaf, toku_fill_dbt() -// the key into *leftmost_key -// Requires: Leaf is fully in memory and pinned for read or write. -// Return: leafentry if it exists, nullptr otherwise -{ - for (int i = 0; i < leaf->n_children; i++) { - BASEMENTNODE bn = BLB(leaf, i); - if (bn->data_buffer.num_klpairs() > 0) { - // Get the first (leftmost) leafentry and its key - return bn_get_le_and_key(bn, 0, leftmost_key); - } - } - return nullptr; -} - -static LEAFENTRY ft_leaf_rightmost_le_and_key(FTNODE leaf, DBT *rightmost_key) -// Effect: If a rightmost key exists in the given leaf, toku_fill_dbt() -// the key into *rightmost_key -// Requires: Leaf is fully in memory and pinned for read or write. -// Return: leafentry if it exists, nullptr otherwise -{ - for (int i = leaf->n_children - 1; i >= 0; i--) { - BASEMENTNODE bn = BLB(leaf, i); - size_t num_les = bn->data_buffer.num_klpairs(); - if (num_les > 0) { - // Get the last (rightmost) leafentry and its key - return bn_get_le_and_key(bn, num_les - 1, rightmost_key); - } - } - return nullptr; -} - -static int ft_leaf_get_relative_key_pos(FT ft, FTNODE leaf, const DBT *key, bool *nondeleted_key_found, int *target_childnum) -// Effect: Determines what the relative position of the given key is with -// respect to a leaf node, and if it exists. -// Requires: Leaf is fully in memory and pinned for read or write. -// Requires: target_childnum is non-null -// Return: < 0 if key is less than the leftmost key in the leaf OR the relative position is unknown, for any reason. -// 0 if key is in the bounds [leftmost_key, rightmost_key] for this leaf or the leaf is empty -// > 0 if key is greater than the rightmost key in the leaf -// *nondeleted_key_found is set (if non-null) if the target key was found and is not deleted, unmodified otherwise -// *target_childnum is set to the child that (does or would) contain the key, if calculated, unmodified otherwise -{ - DBT rightmost_key; - LEAFENTRY rightmost_le = ft_leaf_rightmost_le_and_key(leaf, &rightmost_key); - if (rightmost_le == nullptr) { - // If we can't get a rightmost key then the leaf is empty. - // In such a case, we don't have any information about what keys would be in this leaf. - // We have to assume the leaf node that would contain this key is to the left. - return -1; - } - // We have a rightmost leafentry, so it must exist in some child node - invariant(leaf->n_children > 0); - - int relative_pos = 0; - int c = ft_compare_keys(ft, key, &rightmost_key); - if (c > 0) { - relative_pos = 1; - *target_childnum = leaf->n_children - 1; - } else if (c == 0) { - if (nondeleted_key_found != nullptr && !le_latest_is_del(rightmost_le)) { - *nondeleted_key_found = true; - } - relative_pos = 0; - *target_childnum = leaf->n_children - 1; - } else { - // The key is less than the rightmost. It may still be in bounds if it's >= the leftmost. - DBT leftmost_key; - LEAFENTRY leftmost_le = ft_leaf_leftmost_le_and_key(leaf, &leftmost_key); - invariant_notnull(leftmost_le); // Must exist because a rightmost exists - c = ft_compare_keys(ft, key, &leftmost_key); - if (c > 0) { - if (nondeleted_key_found != nullptr) { - // The caller wants to know if a nondeleted key can be found. - LEAFENTRY target_le; - int childnum = toku_ftnode_which_child(leaf, key, &ft->cmp_descriptor, ft->compare_fun); - BASEMENTNODE bn = BLB(leaf, childnum); - struct msg_leafval_heaviside_extra extra = { ft->compare_fun, &ft->cmp_descriptor, key }; - int r = bn->data_buffer.find_zero( - extra, - &target_le, - nullptr, nullptr, nullptr - ); - *target_childnum = childnum; - if (r == 0 && !le_latest_is_del(leftmost_le)) { - *nondeleted_key_found = true; - } - } - relative_pos = 0; - } else if (c == 0) { - if (nondeleted_key_found != nullptr && !le_latest_is_del(leftmost_le)) { - *nondeleted_key_found = true; - } - relative_pos = 0; - *target_childnum = 0; - } else { - relative_pos = -1; - } - } - - return relative_pos; -} - -static void ft_insert_directly_into_leaf(FT ft, FTNODE leaf, int target_childnum, DBT *key, DBT *val, - XIDS message_xids, enum ft_msg_type type, txn_gc_info *gc_info); -static int getf_nothing(ITEMLEN, bytevec, ITEMLEN, bytevec, void *, bool); - -static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS message_xids, enum ft_msg_type type, - txn_gc_info *gc_info, bool unique) -// Effect: Pins the rightmost leaf node and attempts to do an insert. -// There are three reasons why we may not succeed. -// - The rightmost leaf is too full and needs a split. -// - The key to insert is not within the provable bounds of this leaf node. -// - The key is within bounds, but it already exists. -// Return: 0 if this function did insert, DB_KEYEXIST if a unique key constraint exists and -// some nondeleted leafentry with the same key exists -// < 0 if this function did not insert, for a reason other than DB_KEYEXIST. -// Note: Treat this function as a possible, but not necessary, optimization for insert. -// Rationale: We want O(1) insertions down the rightmost path of the tree. -{ - int r = -1; - - uint32_t rightmost_fullhash; - BLOCKNUM rightmost_blocknum = ft->rightmost_blocknum; - FTNODE rightmost_leaf = nullptr; - - // Don't do the optimization if our heurstic suggests that - // insertion pattern is not sequential. - if (ft->seqinsert_score < FT_SEQINSERT_SCORE_THRESHOLD) { - goto cleanup; - } - - // We know the seqinsert score is high enough that we should - // attemp to directly insert into the right most leaf. Because - // the score is non-zero, the rightmost blocknum must have been - // set. See inject_message_in_locked_node(), which only increases - // the score if the target node blocknum == rightmost_blocknum - invariant(rightmost_blocknum.b != RESERVED_BLOCKNUM_NULL); - - // Pin the rightmost leaf with a write lock. - rightmost_fullhash = toku_cachetable_hash(ft->cf, rightmost_blocknum); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - toku_pin_ftnode(ft, rightmost_blocknum, rightmost_fullhash, &bfe, PL_WRITE_CHEAP, &rightmost_leaf, true); - - // The rightmost blocknum never chances once it is initialized to something - // other than null. Verify that the pinned node has the correct blocknum. - invariant(rightmost_leaf->thisnodename.b == rightmost_blocknum.b); - - // If the rightmost leaf is reactive, bail out out and let the normal promotion pass - // take care of it. This also ensures that if any of our ancestors are reactive, - // they'll be taken care of too. - if (get_leaf_reactivity(rightmost_leaf, ft->h->nodesize) != RE_STABLE) { - STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE, 1); - goto cleanup; - } - - // The groundwork has been laid for an insertion directly into the rightmost - // leaf node. We know that it is pinned for write, fully in memory, has - // no messages above it, and is not reactive. - // - // Now, two more things must be true for this insertion to actually happen: - // 1. The key to insert is within the bounds of this leafnode, or to the right. - // 2. If there is a uniqueness constraint, it passes. - bool nondeleted_key_found; - int relative_pos; - int target_childnum; - - nondeleted_key_found = false; - target_childnum = -1; - relative_pos = ft_leaf_get_relative_key_pos(ft, rightmost_leaf, key, - unique ? &nondeleted_key_found : nullptr, - &target_childnum); - if (relative_pos >= 0) { - STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, 1); - if (unique && nondeleted_key_found) { - r = DB_KEYEXIST; - } else { - ft_insert_directly_into_leaf(ft, rightmost_leaf, target_childnum, - key, val, message_xids, type, gc_info); - r = 0; - } - } else { - STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, 1); - r = -1; - } - -cleanup: - // If we did the insert, the rightmost leaf was unpinned for us. - if (r != 0 && rightmost_leaf != nullptr) { - toku_unpin_ftnode(ft, rightmost_leaf); - } - - return r; -} - -static void ft_txn_log_insert(FT ft, DBT *key, DBT *val, TOKUTXN txn, bool do_logging, enum ft_msg_type type); - -int toku_ft_insert_unique(FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool do_logging) { -// Effect: Insert a unique key-val pair into the fractal tree. -// Return: 0 on success, DB_KEYEXIST if the overwrite constraint failed - XIDS message_xids = txn != nullptr ? toku_txn_get_xids(txn) : xids_get_root_xids(); - - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - txn_manager_state txn_state_for_gc(txn_manager); - - TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_estimate, - // no messages above us, we can implicitly promote uxrs based on this xid - oldest_referenced_xid_estimate, - true); - int r = ft_maybe_insert_into_rightmost_leaf(ft_h->ft, key, val, message_xids, FT_INSERT, &gc_info, true); - if (r != 0 && r != DB_KEYEXIST) { - // Default to a regular unique check + insert algorithm if we couldn't - // do it based on the rightmost leaf alone. - int lookup_r = toku_ft_lookup(ft_h, key, getf_nothing, nullptr); - if (lookup_r == DB_NOTFOUND) { - toku_ft_send_insert(ft_h, key, val, message_xids, FT_INSERT, &gc_info); - r = 0; - } else { - r = DB_KEYEXIST; - } - } - - if (r == 0) { - ft_txn_log_insert(ft_h->ft, key, val, txn, do_logging, FT_INSERT); - } - return r; -} - -// Effect: Insert the key-val pair into an ft. -void toku_ft_insert (FT_HANDLE ft_handle, DBT *key, DBT *val, TOKUTXN txn) { - toku_ft_maybe_insert(ft_handle, key, val, txn, false, ZERO_LSN, true, FT_INSERT); -} - -void toku_ft_load_recovery(TOKUTXN txn, FILENUM old_filenum, char const * new_iname, int do_fsync, int do_log, LSN *load_lsn) { - paranoid_invariant(txn); - toku_txn_force_fsync_on_commit(txn); //If the txn commits, the commit MUST be in the log - //before the (old) file is actually unlinked - TOKULOGGER logger = toku_txn_logger(txn); - - BYTESTRING new_iname_bs = {.len=(uint32_t) strlen(new_iname), .data=(char*)new_iname}; - toku_logger_save_rollback_load(txn, old_filenum, &new_iname_bs); - if (do_log && logger) { - TXNID_PAIR xid = toku_txn_get_txnid(txn); - toku_log_load(logger, load_lsn, do_fsync, txn, xid, old_filenum, new_iname_bs); - } -} - -// 2954 -// this function handles the tasks needed to be recoverable -// - write to rollback log -// - write to recovery log -void toku_ft_hot_index_recovery(TOKUTXN txn, FILENUMS filenums, int do_fsync, int do_log, LSN *hot_index_lsn) -{ - paranoid_invariant(txn); - TOKULOGGER logger = toku_txn_logger(txn); - - // write to the rollback log - toku_logger_save_rollback_hot_index(txn, &filenums); - if (do_log && logger) { - TXNID_PAIR xid = toku_txn_get_txnid(txn); - // write to the recovery log - toku_log_hot_index(logger, hot_index_lsn, do_fsync, txn, xid, filenums); - } -} - -// Effect: Optimize the ft. -void toku_ft_optimize (FT_HANDLE ft_h) { - TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf); - if (logger) { - TXNID oldest = toku_txn_manager_get_oldest_living_xid(logger->txn_manager); - - XIDS root_xids = xids_get_root_xids(); - XIDS message_xids; - if (oldest == TXNID_NONE_LIVING) { - message_xids = root_xids; - } - else { - int r = xids_create_child(root_xids, &message_xids, oldest); - invariant(r == 0); - } - - DBT key; - DBT val; - toku_init_dbt(&key); - toku_init_dbt(&val); - FT_MSG_S ftmsg = { FT_OPTIMIZE, ZERO_MSN, message_xids, .u = { .id = {&key,&val} } }; - - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - txn_manager_state txn_state_for_gc(txn_manager); - - TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_estimate, - // no messages above us, we can implicitly promote uxrs based on this xid - oldest_referenced_xid_estimate, - true); - toku_ft_root_put_msg(ft_h->ft, &ftmsg, &gc_info); - xids_destroy(&message_xids); - } -} - -void toku_ft_load(FT_HANDLE ft_handle, TOKUTXN txn, char const * new_iname, int do_fsync, LSN *load_lsn) { - FILENUM old_filenum = toku_cachefile_filenum(ft_handle->ft->cf); - int do_log = 1; - toku_ft_load_recovery(txn, old_filenum, new_iname, do_fsync, do_log, load_lsn); -} - -// ft actions for logging hot index filenums -void toku_ft_hot_index(FT_HANDLE ft_handle __attribute__ ((unused)), TOKUTXN txn, FILENUMS filenums, int do_fsync, LSN *lsn) { - int do_log = 1; - toku_ft_hot_index_recovery(txn, filenums, do_fsync, do_log, lsn); -} - -void -toku_ft_log_put (TOKUTXN txn, FT_HANDLE ft_handle, const DBT *key, const DBT *val) { - TOKULOGGER logger = toku_txn_logger(txn); - if (logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; - TXNID_PAIR xid = toku_txn_get_txnid(txn); - toku_log_enq_insert(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_handle->ft->cf), xid, keybs, valbs); - } -} - -void -toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val) { - assert(txn); - assert(num_fts > 0); - TOKULOGGER logger = toku_txn_logger(txn); - if (logger) { - FILENUM fnums[num_fts]; - uint32_t i; - for (i = 0; i < num_fts; i++) { - fnums[i] = toku_cachefile_filenum(fts[i]->ft->cf); - } - FILENUMS filenums = {.num = num_fts, .filenums = fnums}; - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; - TXNID_PAIR xid = toku_txn_get_txnid(txn); - FILENUM src_filenum = src_ft ? toku_cachefile_filenum(src_ft->ft->cf) : FILENUM_NONE; - toku_log_enq_insert_multiple(logger, (LSN*)0, 0, txn, src_filenum, filenums, xid, keybs, valbs); - } -} - -TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h) { - TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf); - return logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr; -} - -TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h) { - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - return txn_manager != nullptr ? toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager) : TXNID_NONE; -} - -static void ft_txn_log_insert(FT ft, DBT *key, DBT *val, TOKUTXN txn, bool do_logging, enum ft_msg_type type) { - paranoid_invariant(type == FT_INSERT || type == FT_INSERT_NO_OVERWRITE); - - //By default use committed messages - TXNID_PAIR xid = toku_txn_get_txnid(txn); - if (txn) { - BYTESTRING keybs = {key->size, (char *) key->data}; - toku_logger_save_rollback_cmdinsert(txn, toku_cachefile_filenum(ft->cf), &keybs); - toku_txn_maybe_note_ft(txn, ft); - } - TOKULOGGER logger = toku_txn_logger(txn); - if (do_logging && logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; - if (type == FT_INSERT) { - toku_log_enq_insert(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft->cf), xid, keybs, valbs); - } - else { - toku_log_enq_insert_no_overwrite(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft->cf), xid, keybs, valbs); - } - } -} - -void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type) { - ft_txn_log_insert(ft_h->ft, key, val, txn, do_logging, type); - - LSN treelsn; - if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { - // do nothing - } else { - XIDS message_xids = txn ? toku_txn_get_xids(txn) : xids_get_root_xids(); - - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - txn_manager_state txn_state_for_gc(txn_manager); - - TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_estimate, - // no messages above us, we can implicitly promote uxrs based on this xid - oldest_referenced_xid_estimate, - txn != nullptr ? !txn->for_recovery : false); - int r = ft_maybe_insert_into_rightmost_leaf(ft_h->ft, key, val, message_xids, FT_INSERT, &gc_info, false); - if (r != 0) { - toku_ft_send_insert(ft_h, key, val, message_xids, type, &gc_info); - } - } -} - -static void ft_insert_directly_into_leaf(FT ft, FTNODE leaf, int target_childnum, DBT *key, DBT *val, - XIDS message_xids, enum ft_msg_type type, txn_gc_info *gc_info) -// Effect: Insert directly into a leaf node a fractal tree. Does not do any logging. -// Requires: Leaf is fully in memory and pinned for write. -// Requires: If this insertion were to happen through the root node, the promotion -// algorithm would have selected the given leaf node as the point of injection. -// That means this function relies on the current implementation of promotion. -{ - FT_MSG_S ftcmd = { type, ZERO_MSN, message_xids, .u = { .id = { key, val } } }; - size_t flow_deltas[] = { 0, 0 }; - inject_message_in_locked_node(ft, leaf, target_childnum, &ftcmd, flow_deltas, gc_info); -} - -static void -ft_send_update_msg(FT_HANDLE ft_h, FT_MSG_S *msg, TOKUTXN txn) { - msg->xids = (txn - ? toku_txn_get_xids(txn) - : xids_get_root_xids()); - - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - txn_manager_state txn_state_for_gc(txn_manager); - - TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_estimate, - // no messages above us, we can implicitly promote uxrs based on this xid - oldest_referenced_xid_estimate, - txn != nullptr ? !txn->for_recovery : false); - toku_ft_root_put_msg(ft_h->ft, msg, &gc_info); -} - -void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra, - TOKUTXN txn, bool oplsn_valid, LSN oplsn, - bool do_logging) { - TXNID_PAIR xid = toku_txn_get_txnid(txn); - if (txn) { - BYTESTRING keybs = { key->size, (char *) key->data }; - toku_logger_save_rollback_cmdupdate( - txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); - toku_txn_maybe_note_ft(txn, ft_h->ft); - } - - TOKULOGGER logger; - logger = toku_txn_logger(txn); - if (do_logging && logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING extrabs = {.len=update_function_extra->size, - .data = (char *) update_function_extra->data}; - toku_log_enq_update(logger, NULL, 0, txn, - toku_cachefile_filenum(ft_h->ft->cf), - xid, keybs, extrabs); - } - - LSN treelsn; - if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { - // do nothing - } else { - FT_MSG_S msg = { FT_UPDATE, ZERO_MSN, NULL, - .u = { .id = { key, update_function_extra } } }; - ft_send_update_msg(ft_h, &msg, txn); - } -} - -void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra, - TOKUTXN txn, bool oplsn_valid, LSN oplsn, - bool do_logging, bool is_resetting_op) { - TXNID_PAIR xid = toku_txn_get_txnid(txn); - uint8_t resetting = is_resetting_op ? 1 : 0; - if (txn) { - toku_logger_save_rollback_cmdupdatebroadcast(txn, toku_cachefile_filenum(ft_h->ft->cf), resetting); - toku_txn_maybe_note_ft(txn, ft_h->ft); - } - - TOKULOGGER logger; - logger = toku_txn_logger(txn); - if (do_logging && logger) { - BYTESTRING extrabs = {.len=update_function_extra->size, - .data = (char *) update_function_extra->data}; - toku_log_enq_updatebroadcast(logger, NULL, 0, txn, - toku_cachefile_filenum(ft_h->ft->cf), - xid, extrabs, resetting); - } - - //TODO(yoni): remove treelsn here and similar calls (no longer being used) - LSN treelsn; - if (oplsn_valid && - oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { - - } else { - DBT nullkey; - const DBT *nullkeyp = toku_init_dbt(&nullkey); - FT_MSG_S msg = { FT_UPDATE_BROADCAST_ALL, ZERO_MSN, NULL, - .u = { .id = { nullkeyp, update_function_extra } } }; - ft_send_update_msg(ft_h, &msg, txn); - } -} - -void toku_ft_send_insert(FT_HANDLE ft_handle, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info) { - FT_MSG_S ftmsg = { type, ZERO_MSN, xids, .u = { .id = { key, val } } }; - toku_ft_root_put_msg(ft_handle->ft, &ftmsg, gc_info); -} - -void toku_ft_send_commit_any(FT_HANDLE ft_handle, DBT *key, XIDS xids, txn_gc_info *gc_info) { - DBT val; - FT_MSG_S ftmsg = { FT_COMMIT_ANY, ZERO_MSN, xids, .u = { .id = { key, toku_init_dbt(&val) } } }; - toku_ft_root_put_msg(ft_handle->ft, &ftmsg, gc_info); -} - -void toku_ft_delete(FT_HANDLE ft_handle, DBT *key, TOKUTXN txn) { - toku_ft_maybe_delete(ft_handle, key, txn, false, ZERO_LSN, true); -} - -void -toku_ft_log_del(TOKUTXN txn, FT_HANDLE ft_handle, const DBT *key) { - TOKULOGGER logger = toku_txn_logger(txn); - if (logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - TXNID_PAIR xid = toku_txn_get_txnid(txn); - toku_log_enq_delete_any(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_handle->ft->cf), xid, keybs); - } -} - -void -toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val) { - assert(txn); - assert(num_fts > 0); - TOKULOGGER logger = toku_txn_logger(txn); - if (logger) { - FILENUM fnums[num_fts]; - uint32_t i; - for (i = 0; i < num_fts; i++) { - fnums[i] = toku_cachefile_filenum(fts[i]->ft->cf); - } - FILENUMS filenums = {.num = num_fts, .filenums = fnums}; - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; - TXNID_PAIR xid = toku_txn_get_txnid(txn); - FILENUM src_filenum = src_ft ? toku_cachefile_filenum(src_ft->ft->cf) : FILENUM_NONE; - toku_log_enq_delete_multiple(logger, (LSN*)0, 0, txn, src_filenum, filenums, xid, keybs, valbs); - } -} - -void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging) { - XIDS message_xids = xids_get_root_xids(); //By default use committed messages - TXNID_PAIR xid = toku_txn_get_txnid(txn); - if (txn) { - BYTESTRING keybs = {key->size, (char *) key->data}; - toku_logger_save_rollback_cmddelete(txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); - toku_txn_maybe_note_ft(txn, ft_h->ft); - message_xids = toku_txn_get_xids(txn); - } - TOKULOGGER logger = toku_txn_logger(txn); - if (do_logging && logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - toku_log_enq_delete_any(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_h->ft->cf), xid, keybs); - } - - LSN treelsn; - if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { - // do nothing - } else { - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - txn_manager_state txn_state_for_gc(txn_manager); - - TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_estimate, - // no messages above us, we can implicitly promote uxrs based on this xid - oldest_referenced_xid_estimate, - txn != nullptr ? !txn->for_recovery : false); - toku_ft_send_delete(ft_h, key, message_xids, &gc_info); - } -} - -void toku_ft_send_delete(FT_HANDLE ft_handle, DBT *key, XIDS xids, txn_gc_info *gc_info) { - DBT val; toku_init_dbt(&val); - FT_MSG_S ftmsg = { FT_DELETE_ANY, ZERO_MSN, xids, .u = { .id = { key, &val } } }; - toku_ft_root_put_msg(ft_handle->ft, &ftmsg, gc_info); -} - -/* ******************** open,close and create ********************** */ - -// Test only function (not used in running system). This one has no env -int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *ft_handle_p, int nodesize, - int basementnodesize, - enum toku_compression_method compression_method, - CACHETABLE cachetable, TOKUTXN txn, - int (*compare_fun)(DB *, const DBT*,const DBT*)) { - FT_HANDLE ft_handle; - const int only_create = 0; - - toku_ft_handle_create(&ft_handle); - toku_ft_handle_set_nodesize(ft_handle, nodesize); - toku_ft_handle_set_basementnodesize(ft_handle, basementnodesize); - toku_ft_handle_set_compression_method(ft_handle, compression_method); - toku_ft_handle_set_fanout(ft_handle, 16); - toku_ft_set_bt_compare(ft_handle, compare_fun); - - int r = toku_ft_handle_open(ft_handle, fname, is_create, only_create, cachetable, txn); - if (r != 0) { - return r; - } - - *ft_handle_p = ft_handle; - return r; -} - -static bool use_direct_io = true; - -void toku_ft_set_direct_io (bool direct_io_on) { - use_direct_io = direct_io_on; -} - -static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode) { - if (use_direct_io) { - return toku_os_open_direct(filename, oflag, mode); - } else { - return toku_os_open(filename, oflag, mode); - } -} - -// open a file for use by the ft -// Requires: File does not exist. -static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) { - mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO; - int r; - int fd; - int er; - fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, mode); - assert(fd==-1); - if ((er = get_maybe_error_errno()) != ENOENT) { - return er; - } - fd = ft_open_maybe_direct(fname, O_RDWR | O_CREAT | O_BINARY, mode); - if (fd==-1) { - r = get_error_errno(); - return r; - } - - r = toku_fsync_directory(fname); - if (r == 0) { - *fdp = fd; - } else { - int rr = close(fd); - assert_zero(rr); - } - return r; -} - -// open a file for use by the ft. if the file does not exist, error -static int ft_open_file(const char *fname, int *fdp) { - mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO; - int fd; - fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, mode); - if (fd==-1) { - return get_error_errno(); - } - *fdp = fd; - return 0; -} - -void -toku_ft_handle_set_compression_method(FT_HANDLE t, enum toku_compression_method method) -{ - if (t->ft) { - toku_ft_set_compression_method(t->ft, method); - } - else { - t->options.compression_method = method; - } -} - -void -toku_ft_handle_get_compression_method(FT_HANDLE t, enum toku_compression_method *methodp) -{ - if (t->ft) { - toku_ft_get_compression_method(t->ft, methodp); - } - else { - *methodp = t->options.compression_method; - } -} - -void -toku_ft_handle_set_fanout(FT_HANDLE ft_handle, unsigned int fanout) -{ - if (ft_handle->ft) { - toku_ft_set_fanout(ft_handle->ft, fanout); - } - else { - ft_handle->options.fanout = fanout; - } -} + // The rightmost blocknum never chances once it is initialized to something + // other than null. Verify that the pinned node has the correct blocknum. + invariant(rightmost_leaf->blocknum.b == rightmost_blocknum.b); -void -toku_ft_handle_get_fanout(FT_HANDLE ft_handle, unsigned int *fanout) -{ - if (ft_handle->ft) { - toku_ft_get_fanout(ft_handle->ft, fanout); - } - else { - *fanout = ft_handle->options.fanout; + // If the rightmost leaf is reactive, bail out out and let the normal promotion pass + // take care of it. This also ensures that if any of our ancestors are reactive, + // they'll be taken care of too. + if (toku_ftnode_get_leaf_reactivity(rightmost_leaf, ft->h->nodesize) != RE_STABLE) { + STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE, 1); + goto cleanup; } -} -static int -verify_builtin_comparisons_consistent(FT_HANDLE t, uint32_t flags) { - if ((flags & TOKU_DB_KEYCMP_BUILTIN) && (t->options.compare_fun != toku_builtin_compare_fun)) - return EINVAL; - return 0; -} - -// -// See comments in toku_db_change_descriptor to understand invariants -// in the system when this function is called -// -void toku_ft_change_descriptor( - FT_HANDLE ft_h, - const DBT* old_descriptor, - const DBT* new_descriptor, - bool do_log, - TOKUTXN txn, - bool update_cmp_descriptor - ) -{ - DESCRIPTOR_S new_d; - // if running with txns, save to rollback + write to recovery log - if (txn) { - // put information into rollback file - BYTESTRING old_desc_bs = { old_descriptor->size, (char *) old_descriptor->data }; - BYTESTRING new_desc_bs = { new_descriptor->size, (char *) new_descriptor->data }; - toku_logger_save_rollback_change_fdescriptor( - txn, - toku_cachefile_filenum(ft_h->ft->cf), - &old_desc_bs - ); - toku_txn_maybe_note_ft(txn, ft_h->ft); + // The groundwork has been laid for an insertion directly into the rightmost + // leaf node. We know that it is pinned for write, fully in memory, has + // no messages above it, and is not reactive. + // + // Now, two more things must be true for this insertion to actually happen: + // 1. The key to insert is within the bounds of this leafnode, or to the right. + // 2. If there is a uniqueness constraint, it passes. + bool nondeleted_key_found; + int relative_pos; + int target_childnum; - if (do_log) { - TOKULOGGER logger = toku_txn_logger(txn); - TXNID_PAIR xid = toku_txn_get_txnid(txn); - toku_log_change_fdescriptor( - logger, NULL, 0, - txn, - toku_cachefile_filenum(ft_h->ft->cf), - xid, - old_desc_bs, - new_desc_bs, - update_cmp_descriptor - ); + nondeleted_key_found = false; + target_childnum = -1; + relative_pos = ft_leaf_get_relative_key_pos(ft, rightmost_leaf, key, + unique ? &nondeleted_key_found : nullptr, + &target_childnum); + if (relative_pos >= 0) { + STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, 1); + if (unique && nondeleted_key_found) { + r = DB_KEYEXIST; + } else { + ft_insert_directly_into_leaf(ft, rightmost_leaf, target_childnum, + key, val, message_xids, type, gc_info); + r = 0; } + } else { + STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, 1); + r = -1; } - // write new_descriptor to header - new_d.dbt = *new_descriptor; - toku_ft_update_descriptor(ft_h->ft, &new_d); - // very infrequent operation, worth precise threadsafe count - STATUS_INC(FT_DESCRIPTOR_SET, 1); - - if (update_cmp_descriptor) { - toku_ft_update_cmp_descriptor(ft_h->ft); +cleanup: + // If we did the insert, the rightmost leaf was unpinned for us. + if (r != 0 && rightmost_leaf != nullptr) { + toku_unpin_ftnode(ft, rightmost_leaf); } -} -static void -toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) { - struct ft_options options = { - .nodesize = ft->h->nodesize, - .basementnodesize = ft->h->basementnodesize, - .compression_method = ft->h->compression_method, - .fanout = ft->h->fanout, - .flags = ft->h->flags, - .compare_fun = ft->compare_fun, - .update_fun = ft->update_fun - }; - t->options = options; - t->did_set_flags = true; + return r; } - -// This is the actual open, used for various purposes, such as normal use, recovery, and redirect. -// fname_in_env is the iname, relative to the env_dir (data_dir is already in iname as prefix). -// The checkpointed version (checkpoint_lsn) of the dictionary must be no later than max_acceptable_lsn . -// Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. -static int -ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id, LSN max_acceptable_lsn) { - int r; - bool txn_created = false; - char *fname_in_cwd = NULL; - CACHEFILE cf = NULL; - FT ft = NULL; - bool did_create = false; - toku_ft_open_close_lock(); - - if (ft_h->did_set_flags) { - r = verify_builtin_comparisons_consistent(ft_h, ft_h->options.flags); - if (r!=0) { goto exit; } - } - - assert(is_create || !only_create); - FILENUM reserved_filenum; - reserved_filenum = use_filenum; - fname_in_cwd = toku_cachetable_get_fname_in_cwd(cachetable, fname_in_env); - bool was_already_open; - { - int fd = -1; - r = ft_open_file(fname_in_cwd, &fd); - if (reserved_filenum.fileid == FILENUM_NONE.fileid) { - reserved_filenum = toku_cachetable_reserve_filenum(cachetable); - } - if (r==ENOENT && is_create) { - did_create = true; - mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO; - if (txn) { - BYTESTRING bs = { .len=(uint32_t) strlen(fname_in_env), .data = (char*)fname_in_env }; - toku_logger_save_rollback_fcreate(txn, reserved_filenum, &bs); // bs is a copy of the fname relative to the environment - } - txn_created = (bool)(txn!=NULL); - toku_logger_log_fcreate(txn, fname_in_env, reserved_filenum, mode, ft_h->options.flags, ft_h->options.nodesize, ft_h->options.basementnodesize, ft_h->options.compression_method); - r = ft_create_file(ft_h, fname_in_cwd, &fd); - if (r) { goto exit; } - } - if (r) { goto exit; } - r=toku_cachetable_openfd_with_filenum(&cf, cachetable, fd, fname_in_env, reserved_filenum, &was_already_open); - if (r) { goto exit; } - } - assert(ft_h->options.nodesize>0); - if (is_create) { - r = toku_read_ft_and_store_in_cachefile(ft_h, cf, max_acceptable_lsn, &ft); - if (r==TOKUDB_DICTIONARY_NO_HEADER) { - toku_ft_create(&ft, &ft_h->options, cf, txn); - } - else if (r!=0) { - goto exit; - } - else if (only_create) { - assert_zero(r); - r = EEXIST; - goto exit; - } - // if we get here, then is_create was true but only_create was false, - // so it is ok for toku_read_ft_and_store_in_cachefile to have read - // the header via toku_read_ft_and_store_in_cachefile - } else { - r = toku_read_ft_and_store_in_cachefile(ft_h, cf, max_acceptable_lsn, &ft); - if (r) { goto exit; } - } - if (!ft_h->did_set_flags) { - r = verify_builtin_comparisons_consistent(ft_h, ft_h->options.flags); - if (r) { goto exit; } - } else if (ft_h->options.flags != ft->h->flags) { /* if flags have been set then flags must match */ - r = EINVAL; - goto exit; - } - toku_ft_handle_inherit_options(ft_h, ft); - - if (!was_already_open) { - if (!did_create) { //Only log the fopen that OPENs the file. If it was already open, don't log. - toku_logger_log_fopen(txn, fname_in_env, toku_cachefile_filenum(cf), ft_h->options.flags); - } - } - int use_reserved_dict_id; - use_reserved_dict_id = use_dictionary_id.dictid != DICTIONARY_ID_NONE.dictid; - if (!was_already_open) { - DICTIONARY_ID dict_id; - if (use_reserved_dict_id) { - dict_id = use_dictionary_id; - } - else { - dict_id = next_dict_id(); - } - ft->dict_id = dict_id; - } - else { - // dict_id is already in header - if (use_reserved_dict_id) { - assert(ft->dict_id.dictid == use_dictionary_id.dictid); + +static void ft_txn_log_insert(FT ft, DBT *key, DBT *val, TOKUTXN txn, bool do_logging, enum ft_msg_type type); + +int toku_ft_insert_unique(FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool do_logging) { +// Effect: Insert a unique key-val pair into the fractal tree. +// Return: 0 on success, DB_KEYEXIST if the overwrite constraint failed + XIDS message_xids = txn != nullptr ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); + + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + txn_manager_state txn_state_for_gc(txn_manager); + + TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + true); + int r = ft_maybe_insert_into_rightmost_leaf(ft_h->ft, key, val, message_xids, FT_INSERT, &gc_info, true); + if (r != 0 && r != DB_KEYEXIST) { + // Default to a regular unique check + insert algorithm if we couldn't + // do it based on the rightmost leaf alone. + int lookup_r = toku_ft_lookup(ft_h, key, getf_nothing, nullptr); + if (lookup_r == DB_NOTFOUND) { + toku_ft_send_insert(ft_h, key, val, message_xids, FT_INSERT, &gc_info); + r = 0; + } else { + r = DB_KEYEXIST; } } - assert(ft); - assert(ft->dict_id.dictid != DICTIONARY_ID_NONE.dictid); - assert(ft->dict_id.dictid < dict_id_serial); - // important note here, - // after this point, where we associate the header - // with the ft_handle, the function is not allowed to fail - // Code that handles failure (located below "exit"), - // depends on this - toku_ft_note_ft_handle_open(ft, ft_h); - if (txn_created) { - assert(txn); - toku_txn_maybe_note_ft(txn, ft); + if (r == 0) { + ft_txn_log_insert(ft_h->ft, key, val, txn, do_logging, FT_INSERT); } + return r; +} - //Opening an ft may restore to previous checkpoint. Truncate if necessary. - { - int fd = toku_cachefile_get_fd (ft->cf); - toku_maybe_truncate_file_on_open(ft->blocktable, fd); +// Effect: Insert the key-val pair into an ft. +void toku_ft_insert (FT_HANDLE ft_handle, DBT *key, DBT *val, TOKUTXN txn) { + toku_ft_maybe_insert(ft_handle, key, val, txn, false, ZERO_LSN, true, FT_INSERT); +} + +void toku_ft_load_recovery(TOKUTXN txn, FILENUM old_filenum, char const * new_iname, int do_fsync, int do_log, LSN *load_lsn) { + paranoid_invariant(txn); + toku_txn_force_fsync_on_commit(txn); //If the txn commits, the commit MUST be in the log + //before the (old) file is actually unlinked + TOKULOGGER logger = toku_txn_logger(txn); + + BYTESTRING new_iname_bs = {.len=(uint32_t) strlen(new_iname), .data=(char*)new_iname}; + toku_logger_save_rollback_load(txn, old_filenum, &new_iname_bs); + if (do_log && logger) { + TXNID_PAIR xid = toku_txn_get_txnid(txn); + toku_log_load(logger, load_lsn, do_fsync, txn, xid, old_filenum, new_iname_bs); } +} - r = 0; -exit: - if (fname_in_cwd) { - toku_free(fname_in_cwd); +// 2954 +// this function handles the tasks needed to be recoverable +// - write to rollback log +// - write to recovery log +void toku_ft_hot_index_recovery(TOKUTXN txn, FILENUMS filenums, int do_fsync, int do_log, LSN *hot_index_lsn) +{ + paranoid_invariant(txn); + TOKULOGGER logger = toku_txn_logger(txn); + + // write to the rollback log + toku_logger_save_rollback_hot_index(txn, &filenums); + if (do_log && logger) { + TXNID_PAIR xid = toku_txn_get_txnid(txn); + // write to the recovery log + toku_log_hot_index(logger, hot_index_lsn, do_fsync, txn, xid, filenums); } - if (r != 0 && cf) { - if (ft) { - // we only call toku_ft_note_ft_handle_open - // when the function succeeds, so if we are here, - // then that means we have a reference to the header - // but we have not linked it to this ft. So, - // we can simply try to remove the header. - // We don't need to unlink this ft from the header - toku_ft_grab_reflock(ft); - bool needed = toku_ft_needed_unlocked(ft); - toku_ft_release_reflock(ft); - if (!needed) { - // close immediately. - toku_ft_evict_from_memory(ft, false, ZERO_LSN); - } +} + +// Effect: Optimize the ft. +void toku_ft_optimize (FT_HANDLE ft_h) { + TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf); + if (logger) { + TXNID oldest = toku_txn_manager_get_oldest_living_xid(logger->txn_manager); + + XIDS root_xids = toku_xids_get_root_xids(); + XIDS message_xids; + if (oldest == TXNID_NONE_LIVING) { + message_xids = root_xids; } else { - toku_cachefile_close(&cf, false, ZERO_LSN); + int r = toku_xids_create_child(root_xids, &message_xids, oldest); + invariant(r == 0); } + + DBT key; + DBT val; + toku_init_dbt(&key); + toku_init_dbt(&val); + ft_msg msg(&key, &val, FT_OPTIMIZE, ZERO_MSN, message_xids); + + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + txn_manager_state txn_state_for_gc(txn_manager); + + TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + true); + toku_ft_root_put_msg(ft_h->ft, msg, &gc_info); + toku_xids_destroy(&message_xids); } - toku_ft_open_close_unlock(); - return r; } -// Open an ft for the purpose of recovery, which requires that the ft be open to a pre-determined FILENUM -// and may require a specific checkpointed version of the file. -// (dict_id is assigned by the ft_handle_open() function.) -int -toku_ft_handle_open_recovery(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, LSN max_acceptable_lsn) { - int r; - assert(use_filenum.fileid != FILENUM_NONE.fileid); - r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, - txn, use_filenum, DICTIONARY_ID_NONE, max_acceptable_lsn); - return r; +void toku_ft_load(FT_HANDLE ft_handle, TOKUTXN txn, char const * new_iname, int do_fsync, LSN *load_lsn) { + FILENUM old_filenum = toku_cachefile_filenum(ft_handle->ft->cf); + int do_log = 1; + toku_ft_load_recovery(txn, old_filenum, new_iname, do_fsync, do_log, load_lsn); } -// Open an ft in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function. -// Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. -int -toku_ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn) { - int r; - r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, txn, FILENUM_NONE, DICTIONARY_ID_NONE, MAX_LSN); - return r; +// ft actions for logging hot index filenums +void toku_ft_hot_index(FT_HANDLE ft_handle __attribute__ ((unused)), TOKUTXN txn, FILENUMS filenums, int do_fsync, LSN *lsn) { + int do_log = 1; + toku_ft_hot_index_recovery(txn, filenums, do_fsync, do_log, lsn); } -// clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree -int -toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn) { - FT_HANDLE result_ft_handle; - toku_ft_handle_create(&result_ft_handle); +void +toku_ft_log_put (TOKUTXN txn, FT_HANDLE ft_handle, const DBT *key, const DBT *val) { + TOKULOGGER logger = toku_txn_logger(txn); + if (logger) { + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; + TXNID_PAIR xid = toku_txn_get_txnid(txn); + toku_log_enq_insert(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_handle->ft->cf), xid, keybs, valbs); + } +} - // we're cloning, so the handle better have an open ft and open cf - invariant(ft_handle->ft); - invariant(ft_handle->ft->cf); +void +toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val) { + assert(txn); + assert(num_fts > 0); + TOKULOGGER logger = toku_txn_logger(txn); + if (logger) { + FILENUM fnums[num_fts]; + uint32_t i; + for (i = 0; i < num_fts; i++) { + fnums[i] = toku_cachefile_filenum(fts[i]->ft->cf); + } + FILENUMS filenums = {.num = num_fts, .filenums = fnums}; + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; + TXNID_PAIR xid = toku_txn_get_txnid(txn); + FILENUM src_filenum = src_ft ? toku_cachefile_filenum(src_ft->ft->cf) : FILENUM_NONE; + toku_log_enq_insert_multiple(logger, (LSN*)0, 0, txn, src_filenum, filenums, xid, keybs, valbs); + } +} - // inherit the options of the ft whose handle is being cloned. - toku_ft_handle_inherit_options(result_ft_handle, ft_handle->ft); +TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h) { + TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf); + return logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr; +} - // we can clone the handle by creating a new handle with the same fname - CACHEFILE cf = ft_handle->ft->cf; - CACHETABLE ct = toku_cachefile_get_cachetable(cf); - const char *fname_in_env = toku_cachefile_fname_in_env(cf); - int r = toku_ft_handle_open(result_ft_handle, fname_in_env, false, false, ct, txn); - if (r != 0) { - toku_ft_handle_close(result_ft_handle); - result_ft_handle = NULL; - } - *cloned_ft_handle = result_ft_handle; - return r; +TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h) { + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + return txn_manager != nullptr ? toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager) : TXNID_NONE; } -// Open an ft in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function. -int -toku_ft_handle_open_with_dict_id( - FT_HANDLE t, - const char *fname_in_env, - int is_create, - int only_create, - CACHETABLE cachetable, - TOKUTXN txn, - DICTIONARY_ID use_dictionary_id - ) -{ - int r; - r = ft_handle_open( - t, - fname_in_env, - is_create, - only_create, - cachetable, - txn, - FILENUM_NONE, - use_dictionary_id, - MAX_LSN - ); - return r; +static void ft_txn_log_insert(FT ft, DBT *key, DBT *val, TOKUTXN txn, bool do_logging, enum ft_msg_type type) { + paranoid_invariant(type == FT_INSERT || type == FT_INSERT_NO_OVERWRITE); + + //By default use committed messages + TXNID_PAIR xid = toku_txn_get_txnid(txn); + if (txn) { + BYTESTRING keybs = {key->size, (char *) key->data}; + toku_logger_save_rollback_cmdinsert(txn, toku_cachefile_filenum(ft->cf), &keybs); + toku_txn_maybe_note_ft(txn, ft); + } + TOKULOGGER logger = toku_txn_logger(txn); + if (do_logging && logger) { + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; + if (type == FT_INSERT) { + toku_log_enq_insert(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft->cf), xid, keybs, valbs); + } + else { + toku_log_enq_insert_no_overwrite(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft->cf), xid, keybs, valbs); + } + } } -DICTIONARY_ID -toku_ft_get_dictionary_id(FT_HANDLE ft_handle) { - FT h = ft_handle->ft; - DICTIONARY_ID dict_id = h->dict_id; - return dict_id; -} +void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type) { + ft_txn_log_insert(ft_h->ft, key, val, txn, do_logging, type); + + LSN treelsn; + if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { + // do nothing + } else { + XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); -void toku_ft_set_flags(FT_HANDLE ft_handle, unsigned int flags) { - ft_handle->did_set_flags = true; - ft_handle->options.flags = flags; -} + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + txn_manager_state txn_state_for_gc(txn_manager); -void toku_ft_get_flags(FT_HANDLE ft_handle, unsigned int *flags) { - *flags = ft_handle->options.flags; + TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + txn != nullptr ? !txn->for_recovery : false); + int r = ft_maybe_insert_into_rightmost_leaf(ft_h->ft, key, val, message_xids, FT_INSERT, &gc_info, false); + if (r != 0) { + toku_ft_send_insert(ft_h, key, val, message_xids, type, &gc_info); + } + } } -void toku_ft_get_maximum_advised_key_value_lengths (unsigned int *max_key_len, unsigned int *max_val_len) -// return the maximum advisable key value lengths. The ft doesn't enforce these. +static void ft_insert_directly_into_leaf(FT ft, FTNODE leaf, int target_childnum, DBT *key, DBT *val, + XIDS message_xids, enum ft_msg_type type, txn_gc_info *gc_info) +// Effect: Insert directly into a leaf node a fractal tree. Does not do any logging. +// Requires: Leaf is fully in memory and pinned for write. +// Requires: If this insertion were to happen through the root node, the promotion +// algorithm would have selected the given leaf node as the point of injection. +// That means this function relies on the current implementation of promotion. { - *max_key_len = 32*1024; - *max_val_len = 32*1024*1024; + ft_msg msg(key, val, type, ZERO_MSN, message_xids); + size_t flow_deltas[] = { 0, 0 }; + inject_message_in_locked_node(ft, leaf, target_childnum, msg, flow_deltas, gc_info); } +static void +ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) { + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + txn_manager_state txn_state_for_gc(txn_manager); -void toku_ft_handle_set_nodesize(FT_HANDLE ft_handle, unsigned int nodesize) { - if (ft_handle->ft) { - toku_ft_set_nodesize(ft_handle->ft, nodesize); - } - else { - ft_handle->options.nodesize = nodesize; - } + TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + txn != nullptr ? !txn->for_recovery : false); + toku_ft_root_put_msg(ft_h->ft, msg, &gc_info); } -void toku_ft_handle_get_nodesize(FT_HANDLE ft_handle, unsigned int *nodesize) { - if (ft_handle->ft) { - toku_ft_get_nodesize(ft_handle->ft, nodesize); - } - else { - *nodesize = ft_handle->options.nodesize; +void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra, + TOKUTXN txn, bool oplsn_valid, LSN oplsn, + bool do_logging) { + TXNID_PAIR xid = toku_txn_get_txnid(txn); + if (txn) { + BYTESTRING keybs = { key->size, (char *) key->data }; + toku_logger_save_rollback_cmdupdate( + txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); + toku_txn_maybe_note_ft(txn, ft_h->ft); } -} -void toku_ft_handle_set_basementnodesize(FT_HANDLE ft_handle, unsigned int basementnodesize) { - if (ft_handle->ft) { - toku_ft_set_basementnodesize(ft_handle->ft, basementnodesize); + TOKULOGGER logger; + logger = toku_txn_logger(txn); + if (do_logging && logger) { + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + BYTESTRING extrabs = {.len=update_function_extra->size, + .data = (char *) update_function_extra->data}; + toku_log_enq_update(logger, NULL, 0, txn, + toku_cachefile_filenum(ft_h->ft->cf), + xid, keybs, extrabs); } - else { - ft_handle->options.basementnodesize = basementnodesize; + + LSN treelsn; + if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { + // do nothing + } else { + XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); + ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids); + ft_send_update_msg(ft_h, msg, txn); } } -void toku_ft_handle_get_basementnodesize(FT_HANDLE ft_handle, unsigned int *basementnodesize) { - if (ft_handle->ft) { - toku_ft_get_basementnodesize(ft_handle->ft, basementnodesize); +void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra, + TOKUTXN txn, bool oplsn_valid, LSN oplsn, + bool do_logging, bool is_resetting_op) { + TXNID_PAIR xid = toku_txn_get_txnid(txn); + uint8_t resetting = is_resetting_op ? 1 : 0; + if (txn) { + toku_logger_save_rollback_cmdupdatebroadcast(txn, toku_cachefile_filenum(ft_h->ft->cf), resetting); + toku_txn_maybe_note_ft(txn, ft_h->ft); } - else { - *basementnodesize = ft_handle->options.basementnodesize; + + TOKULOGGER logger; + logger = toku_txn_logger(txn); + if (do_logging && logger) { + BYTESTRING extrabs = {.len=update_function_extra->size, + .data = (char *) update_function_extra->data}; + toku_log_enq_updatebroadcast(logger, NULL, 0, txn, + toku_cachefile_filenum(ft_h->ft->cf), + xid, extrabs, resetting); } -} -void toku_ft_set_bt_compare(FT_HANDLE ft_handle, int (*bt_compare)(DB*, const DBT*, const DBT*)) { - ft_handle->options.compare_fun = bt_compare; + //TODO(yoni): remove treelsn here and similar calls (no longer being used) + LSN treelsn; + if (oplsn_valid && + oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { + + } else { + DBT empty_dbt; + XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); + ft_msg msg(toku_init_dbt(&empty_dbt), update_function_extra, FT_UPDATE_BROADCAST_ALL, ZERO_MSN, message_xids); + ft_send_update_msg(ft_h, msg, txn); + } } -void toku_ft_set_redirect_callback(FT_HANDLE ft_handle, on_redirect_callback redir_cb, void* extra) { - ft_handle->redirect_callback = redir_cb; - ft_handle->redirect_callback_extra = extra; +void toku_ft_send_insert(FT_HANDLE ft_handle, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info) { + ft_msg msg(key, val, type, ZERO_MSN, xids); + toku_ft_root_put_msg(ft_handle->ft, msg, gc_info); } -void toku_ft_set_update(FT_HANDLE ft_handle, ft_update_func update_fun) { - ft_handle->options.update_fun = update_fun; +void toku_ft_send_commit_any(FT_HANDLE ft_handle, DBT *key, XIDS xids, txn_gc_info *gc_info) { + DBT val; + ft_msg msg(key, toku_init_dbt(&val), FT_COMMIT_ANY, ZERO_MSN, xids); + toku_ft_root_put_msg(ft_handle->ft, msg, gc_info); } -ft_compare_func toku_ft_get_bt_compare (FT_HANDLE ft_handle) { - return ft_handle->options.compare_fun; +void toku_ft_delete(FT_HANDLE ft_handle, DBT *key, TOKUTXN txn) { + toku_ft_maybe_delete(ft_handle, key, txn, false, ZERO_LSN, true); } -static void -ft_remove_handle_ref_callback(FT UU(ft), void *extra) { - FT_HANDLE CAST_FROM_VOIDP(handle, extra); - toku_list_remove(&handle->live_ft_handle_link); +void +toku_ft_log_del(TOKUTXN txn, FT_HANDLE ft_handle, const DBT *key) { + TOKULOGGER logger = toku_txn_logger(txn); + if (logger) { + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + TXNID_PAIR xid = toku_txn_get_txnid(txn); + toku_log_enq_delete_any(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_handle->ft->cf), xid, keybs); + } } -// close an ft handle during normal operation. the underlying ft may or may not close, -// depending if there are still references. an lsn for this close will come from the logger. void -toku_ft_handle_close(FT_HANDLE ft_handle) { - // There are error paths in the ft_handle_open that end with ft_handle->ft==NULL. - FT ft = ft_handle->ft; - if (ft) { - const bool oplsn_valid = false; - toku_ft_remove_reference(ft, oplsn_valid, ZERO_LSN, ft_remove_handle_ref_callback, ft_handle); +toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val) { + assert(txn); + assert(num_fts > 0); + TOKULOGGER logger = toku_txn_logger(txn); + if (logger) { + FILENUM fnums[num_fts]; + uint32_t i; + for (i = 0; i < num_fts; i++) { + fnums[i] = toku_cachefile_filenum(fts[i]->ft->cf); + } + FILENUMS filenums = {.num = num_fts, .filenums = fnums}; + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; + TXNID_PAIR xid = toku_txn_get_txnid(txn); + FILENUM src_filenum = src_ft ? toku_cachefile_filenum(src_ft->ft->cf) : FILENUM_NONE; + toku_log_enq_delete_multiple(logger, (LSN*)0, 0, txn, src_filenum, filenums, xid, keybs, valbs); } - toku_free(ft_handle); } -// close an ft handle during recovery. the underlying ft must close, and will use the given lsn. -void -toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn) { - FT ft = ft_handle->ft; - // the ft must exist if closing during recovery. error paths during - // open for recovery should close handles using toku_ft_handle_close() - assert(ft); - const bool oplsn_valid = true; - toku_ft_remove_reference(ft, oplsn_valid, oplsn, ft_remove_handle_ref_callback, ft_handle); - toku_free(ft_handle); +void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging) { + XIDS message_xids = toku_xids_get_root_xids(); //By default use committed messages + TXNID_PAIR xid = toku_txn_get_txnid(txn); + if (txn) { + BYTESTRING keybs = {key->size, (char *) key->data}; + toku_logger_save_rollback_cmddelete(txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); + toku_txn_maybe_note_ft(txn, ft_h->ft); + message_xids = toku_txn_get_xids(txn); + } + TOKULOGGER logger = toku_txn_logger(txn); + if (do_logging && logger) { + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + toku_log_enq_delete_any(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_h->ft->cf), xid, keybs); + } + + LSN treelsn; + if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { + // do nothing + } else { + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + txn_manager_state txn_state_for_gc(txn_manager); + + TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + txn != nullptr ? !txn->for_recovery : false); + toku_ft_send_delete(ft_h, key, message_xids, &gc_info); + } } -// TODO: remove this, callers should instead just use toku_ft_handle_close() -int -toku_close_ft_handle_nolsn (FT_HANDLE ft_handle, char** UU(error_string)) { - toku_ft_handle_close(ft_handle); - return 0; +void toku_ft_send_delete(FT_HANDLE ft_handle, DBT *key, XIDS xids, txn_gc_info *gc_info) { + DBT val; toku_init_dbt(&val); + ft_msg msg(key, toku_init_dbt(&val), FT_DELETE_ANY, ZERO_MSN, xids); + toku_ft_root_put_msg(ft_handle->ft, msg, gc_info); } -void toku_ft_handle_create(FT_HANDLE *ft_handle_ptr) { - FT_HANDLE XMALLOC(ft_handle); - memset(ft_handle, 0, sizeof *ft_handle); - toku_list_init(&ft_handle->live_ft_handle_link); - ft_handle->options.flags = 0; - ft_handle->did_set_flags = false; - ft_handle->options.nodesize = FT_DEFAULT_NODE_SIZE; - ft_handle->options.basementnodesize = FT_DEFAULT_BASEMENT_NODE_SIZE; - ft_handle->options.compression_method = TOKU_DEFAULT_COMPRESSION_METHOD; - ft_handle->options.fanout = FT_DEFAULT_FANOUT; - ft_handle->options.compare_fun = toku_builtin_compare_fun; - ft_handle->options.update_fun = NULL; - *ft_handle_ptr = ft_handle; +/* ******************** open,close and create ********************** */ + +// Test only function (not used in running system). This one has no env +int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *ft_handle_p, int nodesize, + int basementnodesize, + enum toku_compression_method compression_method, + CACHETABLE cachetable, TOKUTXN txn, + int (*compare_fun)(DB *, const DBT*,const DBT*)) { + FT_HANDLE ft_handle; + const int only_create = 0; + + toku_ft_handle_create(&ft_handle); + toku_ft_handle_set_nodesize(ft_handle, nodesize); + toku_ft_handle_set_basementnodesize(ft_handle, basementnodesize); + toku_ft_handle_set_compression_method(ft_handle, compression_method); + toku_ft_handle_set_fanout(ft_handle, 16); + toku_ft_set_bt_compare(ft_handle, compare_fun); + + int r = toku_ft_handle_open(ft_handle, fname, is_create, only_create, cachetable, txn); + if (r != 0) { + return r; + } + + *ft_handle_p = ft_handle; + return r; +} + +static bool use_direct_io = true; + +void toku_ft_set_direct_io (bool direct_io_on) { + use_direct_io = direct_io_on; +} + +static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode) { + if (use_direct_io) { + return toku_os_open_direct(filename, oflag, mode); + } else { + return toku_os_open(filename, oflag, mode); + } } -/* ************* CURSORS ********************* */ - -static inline void -ft_cursor_cleanup_dbts(FT_CURSOR c) { - toku_destroy_dbt(&c->key); - toku_destroy_dbt(&c->val); -} +static const mode_t file_mode = S_IRUSR+S_IWUSR+S_IRGRP+S_IWGRP+S_IROTH+S_IWOTH; -// -// This function is used by the leafentry iterators. -// returns TOKUDB_ACCEPT if live transaction context is allowed to read a value -// that is written by transaction with LSN of id -// live transaction context may read value if either id is the root ancestor of context, or if -// id was committed before context's snapshot was taken. -// For id to be committed before context's snapshot was taken, the following must be true: -// - id < context->snapshot_txnid64 AND id is not in context's live root transaction list -// For the above to NOT be true: -// - id > context->snapshot_txnid64 OR id is in context's live root transaction list -// -static int -does_txn_read_entry(TXNID id, TOKUTXN context) { - int rval; - TXNID oldest_live_in_snapshot = toku_get_oldest_in_live_root_txn_list(context); - if (oldest_live_in_snapshot == TXNID_NONE && id < context->snapshot_txnid64) { - rval = TOKUDB_ACCEPT; - } - else if (id < oldest_live_in_snapshot || id == context->txnid.parent_id64) { - rval = TOKUDB_ACCEPT; - } - else if (id > context->snapshot_txnid64 || toku_is_txn_in_live_root_txn_list(*context->live_root_txn_list, id)) { - rval = 0; +// open a file for use by the ft +// Requires: File does not exist. +static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) { + int r; + int fd; + int er; + fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); + assert(fd==-1); + if ((er = get_maybe_error_errno()) != ENOENT) { + return er; } - else { - rval = TOKUDB_ACCEPT; + fd = ft_open_maybe_direct(fname, O_RDWR | O_CREAT | O_BINARY, file_mode); + if (fd==-1) { + r = get_error_errno(); + return r; } - return rval; -} -static inline void -ft_cursor_extract_val(LEAFENTRY le, - FT_CURSOR cursor, - uint32_t *vallen, - void **val) { - if (toku_ft_cursor_is_leaf_mode(cursor)) { - *val = le; - *vallen = leafentry_memsize(le); - } else if (cursor->is_snapshot_read) { - int r = le_iterate_val( - le, - does_txn_read_entry, - val, - vallen, - cursor->ttxn - ); - lazy_assert_zero(r); + r = toku_fsync_directory(fname); + if (r == 0) { + *fdp = fd; } else { - *val = le_latest_val_and_len(le, vallen); + int rr = close(fd); + assert_zero(rr); } + return r; } -int toku_ft_cursor ( - FT_HANDLE ft_handle, - FT_CURSOR *cursorptr, - TOKUTXN ttxn, - bool is_snapshot_read, - bool disable_prefetching - ) -{ - if (is_snapshot_read) { - invariant(ttxn != NULL); - int accepted = does_txn_read_entry(ft_handle->ft->h->root_xid_that_created, ttxn); - if (accepted!=TOKUDB_ACCEPT) { - invariant(accepted==0); - return TOKUDB_MVCC_DICTIONARY_TOO_NEW; - } - } - FT_CURSOR XCALLOC(cursor); - cursor->ft_handle = ft_handle; - cursor->prefetching = false; - toku_init_dbt(&cursor->range_lock_left_key); - toku_init_dbt(&cursor->range_lock_right_key); - cursor->left_is_neg_infty = false; - cursor->right_is_pos_infty = false; - cursor->is_snapshot_read = is_snapshot_read; - cursor->is_leaf_mode = false; - cursor->ttxn = ttxn; - cursor->disable_prefetching = disable_prefetching; - cursor->is_temporary = false; - *cursorptr = cursor; +// open a file for use by the ft. if the file does not exist, error +static int ft_open_file(const char *fname, int *fdp) { + int fd; + fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); + if (fd==-1) { + return get_error_errno(); + } + *fdp = fd; return 0; } -void toku_ft_cursor_remove_restriction(FT_CURSOR ftcursor) { - ftcursor->out_of_range_error = 0; - ftcursor->direction = 0; -} - -void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR ftcursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra) { - ftcursor->interrupt_cb = cb; - ftcursor->interrupt_cb_extra = extra; -} - - void -toku_ft_cursor_set_temporary(FT_CURSOR ftcursor) { - ftcursor->is_temporary = true; +toku_ft_handle_set_compression_method(FT_HANDLE t, enum toku_compression_method method) +{ + if (t->ft) { + toku_ft_set_compression_method(t->ft, method); + } + else { + t->options.compression_method = method; + } } void -toku_ft_cursor_set_leaf_mode(FT_CURSOR ftcursor) { - ftcursor->is_leaf_mode = true; -} - -int -toku_ft_cursor_is_leaf_mode(FT_CURSOR ftcursor) { - return ftcursor->is_leaf_mode; +toku_ft_handle_get_compression_method(FT_HANDLE t, enum toku_compression_method *methodp) +{ + if (t->ft) { + toku_ft_get_compression_method(t->ft, methodp); + } + else { + *methodp = t->options.compression_method; + } } void -toku_ft_cursor_set_range_lock(FT_CURSOR cursor, const DBT *left, const DBT *right, - bool left_is_neg_infty, bool right_is_pos_infty, - int out_of_range_error) +toku_ft_handle_set_fanout(FT_HANDLE ft_handle, unsigned int fanout) { - // Destroy any existing keys and then clone the given left, right keys - toku_destroy_dbt(&cursor->range_lock_left_key); - if (left_is_neg_infty) { - cursor->left_is_neg_infty = true; - } else { - toku_clone_dbt(&cursor->range_lock_left_key, *left); + if (ft_handle->ft) { + toku_ft_set_fanout(ft_handle->ft, fanout); } - - toku_destroy_dbt(&cursor->range_lock_right_key); - if (right_is_pos_infty) { - cursor->right_is_pos_infty = true; - } else { - toku_clone_dbt(&cursor->range_lock_right_key, *right); + else { + ft_handle->options.fanout = fanout; } - - // TOKUDB_FOUND_BUT_REJECTED is a DB_NOTFOUND with instructions to stop looking. (Faster) - cursor->out_of_range_error = out_of_range_error == DB_NOTFOUND ? TOKUDB_FOUND_BUT_REJECTED : out_of_range_error; - cursor->direction = 0; -} - -void toku_ft_cursor_close(FT_CURSOR cursor) { - ft_cursor_cleanup_dbts(cursor); - toku_destroy_dbt(&cursor->range_lock_left_key); - toku_destroy_dbt(&cursor->range_lock_right_key); - toku_free(cursor); } -static inline void ft_cursor_set_prefetching(FT_CURSOR cursor) { - cursor->prefetching = true; -} - -static inline bool ft_cursor_prefetching(FT_CURSOR cursor) { - return cursor->prefetching; +void +toku_ft_handle_get_fanout(FT_HANDLE ft_handle, unsigned int *fanout) +{ + if (ft_handle->ft) { + toku_ft_get_fanout(ft_handle->ft, fanout); + } + else { + *fanout = ft_handle->options.fanout; + } } -//Return true if cursor is uninitialized. false otherwise. -static bool -ft_cursor_not_set(FT_CURSOR cursor) { - assert((cursor->key.data==NULL) == (cursor->val.data==NULL)); - return (bool)(cursor->key.data == NULL); +// The memcmp magic byte may be set on a per fractal tree basis to communicate +// that if two keys begin with this byte, they may be compared with the builtin +// key comparison function. This greatly optimizes certain in-memory workloads, +// such as lookups by OID primary key in TokuMX. +int toku_ft_handle_set_memcmp_magic(FT_HANDLE ft_handle, uint8_t magic) { + if (magic == comparator::MEMCMP_MAGIC_NONE) { + return EINVAL; + } + if (ft_handle->ft != nullptr) { + // if the handle is already open, then we cannot set the memcmp magic + // (because it may or may not have been set by someone else already) + return EINVAL; + } + ft_handle->options.memcmp_magic = magic; + return 0; } -// -// -// -// -// -// -// -// -// -// TODO: ask Yoni why second parameter here is not const -// -// -// -// -// -// -// -// -// static int -heaviside_from_search_t(const DBT &kdbt, ft_search_t &search) { - int cmp = search.compare(search, - search.k ? &kdbt : 0); - // The search->compare function returns only 0 or 1 - switch (search.direction) { - case FT_SEARCH_LEFT: return cmp==0 ? -1 : +1; - case FT_SEARCH_RIGHT: return cmp==0 ? +1 : -1; // Because the comparison runs backwards for right searches. +verify_builtin_comparisons_consistent(FT_HANDLE t, uint32_t flags) { + if ((flags & TOKU_DB_KEYCMP_BUILTIN) && (t->options.compare_fun != toku_builtin_compare_fun)) { + return EINVAL; } - abort(); return 0; + return 0; } - // -// Returns true if the value that is to be read is empty. +// See comments in toku_db_change_descriptor to understand invariants +// in the system when this function is called // -static inline int -is_le_val_del(LEAFENTRY le, FT_CURSOR ftcursor) { - int rval; - if (ftcursor->is_snapshot_read) { - bool is_del; - le_iterate_is_del( - le, - does_txn_read_entry, - &is_del, - ftcursor->ttxn +void toku_ft_change_descriptor( + FT_HANDLE ft_h, + const DBT* old_descriptor, + const DBT* new_descriptor, + bool do_log, + TOKUTXN txn, + bool update_cmp_descriptor + ) +{ + DESCRIPTOR_S new_d; + + // if running with txns, save to rollback + write to recovery log + if (txn) { + // put information into rollback file + BYTESTRING old_desc_bs = { old_descriptor->size, (char *) old_descriptor->data }; + BYTESTRING new_desc_bs = { new_descriptor->size, (char *) new_descriptor->data }; + toku_logger_save_rollback_change_fdescriptor( + txn, + toku_cachefile_filenum(ft_h->ft->cf), + &old_desc_bs ); - rval = is_del; + toku_txn_maybe_note_ft(txn, ft_h->ft); + + if (do_log) { + TOKULOGGER logger = toku_txn_logger(txn); + TXNID_PAIR xid = toku_txn_get_txnid(txn); + toku_log_change_fdescriptor( + logger, NULL, 0, + txn, + toku_cachefile_filenum(ft_h->ft->cf), + xid, + old_desc_bs, + new_desc_bs, + update_cmp_descriptor + ); + } } - else { - rval = le_latest_is_del(le); + + // write new_descriptor to header + new_d.dbt = *new_descriptor; + toku_ft_update_descriptor(ft_h->ft, &new_d); + // very infrequent operation, worth precise threadsafe count + STATUS_INC(FT_DESCRIPTOR_SET, 1); + + if (update_cmp_descriptor) { + toku_ft_update_cmp_descriptor(ft_h->ft); } - return rval; } -struct store_fifo_offset_extra { - int32_t *offsets; - int i; -}; +static void +toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) { + struct ft_options options = { + .nodesize = ft->h->nodesize, + .basementnodesize = ft->h->basementnodesize, + .compression_method = ft->h->compression_method, + .fanout = ft->h->fanout, + .flags = ft->h->flags, + .memcmp_magic = ft->cmp.get_memcmp_magic(), + .compare_fun = ft->cmp.get_compare_func(), + .update_fun = ft->update_fun + }; + t->options = options; + t->did_set_flags = true; +} + +// This is the actual open, used for various purposes, such as normal use, recovery, and redirect. +// fname_in_env is the iname, relative to the env_dir (data_dir is already in iname as prefix). +// The checkpointed version (checkpoint_lsn) of the dictionary must be no later than max_acceptable_lsn . +// Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. +static int +ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id, LSN max_acceptable_lsn) { + int r; + bool txn_created = false; + char *fname_in_cwd = NULL; + CACHEFILE cf = NULL; + FT ft = NULL; + bool did_create = false; + toku_ft_open_close_lock(); + + if (ft_h->did_set_flags) { + r = verify_builtin_comparisons_consistent(ft_h, ft_h->options.flags); + if (r!=0) { goto exit; } + } + + assert(is_create || !only_create); + FILENUM reserved_filenum; + reserved_filenum = use_filenum; + fname_in_cwd = toku_cachetable_get_fname_in_cwd(cachetable, fname_in_env); + bool was_already_open; + { + int fd = -1; + r = ft_open_file(fname_in_cwd, &fd); + if (reserved_filenum.fileid == FILENUM_NONE.fileid) { + reserved_filenum = toku_cachetable_reserve_filenum(cachetable); + } + if (r==ENOENT && is_create) { + did_create = true; + if (txn) { + BYTESTRING bs = { .len=(uint32_t) strlen(fname_in_env), .data = (char*)fname_in_env }; + toku_logger_save_rollback_fcreate(txn, reserved_filenum, &bs); // bs is a copy of the fname relative to the environment + } + txn_created = (bool)(txn!=NULL); + toku_logger_log_fcreate(txn, fname_in_env, reserved_filenum, file_mode, ft_h->options.flags, ft_h->options.nodesize, ft_h->options.basementnodesize, ft_h->options.compression_method); + r = ft_create_file(ft_h, fname_in_cwd, &fd); + if (r) { goto exit; } + } + if (r) { goto exit; } + r=toku_cachetable_openfd_with_filenum(&cf, cachetable, fd, fname_in_env, reserved_filenum, &was_already_open); + if (r) { goto exit; } + } + assert(ft_h->options.nodesize>0); + if (is_create) { + r = toku_read_ft_and_store_in_cachefile(ft_h, cf, max_acceptable_lsn, &ft); + if (r==TOKUDB_DICTIONARY_NO_HEADER) { + toku_ft_create(&ft, &ft_h->options, cf, txn); + } + else if (r!=0) { + goto exit; + } + else if (only_create) { + assert_zero(r); + r = EEXIST; + goto exit; + } + // if we get here, then is_create was true but only_create was false, + // so it is ok for toku_read_ft_and_store_in_cachefile to have read + // the header via toku_read_ft_and_store_in_cachefile + } else { + r = toku_read_ft_and_store_in_cachefile(ft_h, cf, max_acceptable_lsn, &ft); + if (r) { goto exit; } + } + if (!ft_h->did_set_flags) { + r = verify_builtin_comparisons_consistent(ft_h, ft_h->options.flags); + if (r) { goto exit; } + } else if (ft_h->options.flags != ft->h->flags) { /* if flags have been set then flags must match */ + r = EINVAL; + goto exit; + } -int store_fifo_offset(const int32_t &offset, const uint32_t UU(idx), struct store_fifo_offset_extra *const extra) __attribute__((nonnull(3))); -int store_fifo_offset(const int32_t &offset, const uint32_t UU(idx), struct store_fifo_offset_extra *const extra) -{ - extra->offsets[extra->i] = offset; - extra->i++; - return 0; -} + // Ensure that the memcmp magic bits are consistent, if set. + if (ft->cmp.get_memcmp_magic() != toku::comparator::MEMCMP_MAGIC_NONE && + ft_h->options.memcmp_magic != toku::comparator::MEMCMP_MAGIC_NONE && + ft_h->options.memcmp_magic != ft->cmp.get_memcmp_magic()) { + r = EINVAL; + goto exit; + } + toku_ft_handle_inherit_options(ft_h, ft); -/** - * Given pointers to offsets within a FIFO where we can find messages, - * figure out the MSN of each message, and compare those MSNs. Returns 1, - * 0, or -1 if a is larger than, equal to, or smaller than b. - */ -int fifo_offset_msn_cmp(FIFO &fifo, const int32_t &ao, const int32_t &bo); -int fifo_offset_msn_cmp(FIFO &fifo, const int32_t &ao, const int32_t &bo) -{ - const struct fifo_entry *a = toku_fifo_get_entry(fifo, ao); - const struct fifo_entry *b = toku_fifo_get_entry(fifo, bo); - if (a->msn.msn > b->msn.msn) { - return +1; + if (!was_already_open) { + if (!did_create) { //Only log the fopen that OPENs the file. If it was already open, don't log. + toku_logger_log_fopen(txn, fname_in_env, toku_cachefile_filenum(cf), ft_h->options.flags); + } } - if (a->msn.msn < b->msn.msn) { - return -1; + int use_reserved_dict_id; + use_reserved_dict_id = use_dictionary_id.dictid != DICTIONARY_ID_NONE.dictid; + if (!was_already_open) { + DICTIONARY_ID dict_id; + if (use_reserved_dict_id) { + dict_id = use_dictionary_id; + } + else { + dict_id = next_dict_id(); + } + ft->dict_id = dict_id; } - return 0; -} - -/** - * Given a fifo_entry, either decompose it into its parameters and call - * toku_ft_bn_apply_msg, or discard it, based on its MSN and the MSN of the - * basement node. - */ -static void -do_bn_apply_msg(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update) -{ - // The messages are being iterated over in (key,msn) order or just in - // msn order, so all the messages for one key, from one buffer, are in - // ascending msn order. So it's ok that we don't update the basement - // node's msn until the end. - if (entry->msn.msn > bn->max_msn_applied.msn) { - ITEMLEN keylen = entry->keylen; - ITEMLEN vallen = entry->vallen; - enum ft_msg_type type = fifo_entry_get_msg_type(entry); - MSN msn = entry->msn; - const XIDS xids = (XIDS) &entry->xids_s; - bytevec key = xids_get_end_of_array(xids); - bytevec val = (uint8_t*)key + entry->keylen; - - DBT hk; - toku_fill_dbt(&hk, key, keylen); - DBT hv; - FT_MSG_S ftmsg = { type, msn, xids, .u = { .id = { &hk, toku_fill_dbt(&hv, val, vallen) } } }; - toku_ft_bn_apply_msg( - t->ft->compare_fun, - t->ft->update_fun, - &t->ft->cmp_descriptor, - bn, - &ftmsg, - gc_info, - workdone, - stats_to_update - ); - } else { - STATUS_INC(FT_MSN_DISCARDS, 1); + else { + // dict_id is already in header + if (use_reserved_dict_id) { + assert(ft->dict_id.dictid == use_dictionary_id.dictid); + } } - // We must always mark entry as stale since it has been marked - // (using omt::iterate_and_mark_range) - // It is possible to call do_bn_apply_msg even when it won't apply the message because - // the node containing it could have been evicted and brought back in. - entry->is_fresh = false; -} - -struct iterate_do_bn_apply_msg_extra { - FT_HANDLE t; - BASEMENTNODE bn; - NONLEAF_CHILDINFO bnc; - txn_gc_info *gc_info; - uint64_t *workdone; - STAT64INFO stats_to_update; -}; + assert(ft); + assert(ft->dict_id.dictid != DICTIONARY_ID_NONE.dictid); + assert(ft->dict_id.dictid < dict_id_serial); -int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e) __attribute__((nonnull(3))); -int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e) -{ - struct fifo_entry *entry = toku_fifo_get_entry(e->bnc->buffer, offset); - do_bn_apply_msg(e->t, e->bn, entry, e->gc_info, e->workdone, e->stats_to_update); - return 0; -} + // important note here, + // after this point, where we associate the header + // with the ft_handle, the function is not allowed to fail + // Code that handles failure (located below "exit"), + // depends on this + toku_ft_note_ft_handle_open(ft, ft_h); + if (txn_created) { + assert(txn); + toku_txn_maybe_note_ft(txn, ft); + } -/** - * Given the bounds of the basement node to which we will apply messages, - * find the indexes within message_tree which contain the range of - * relevant messages. - * - * The message tree contains offsets into the buffer, where messages are - * found. The pivot_bounds are the lower bound exclusive and upper bound - * inclusive, because they come from pivot keys in the tree. We want OMT - * indices, which must have the lower bound be inclusive and the upper - * bound exclusive. We will get these by telling omt::find to look - * for something strictly bigger than each of our pivot bounds. - * - * Outputs the OMT indices in lbi (lower bound inclusive) and ube (upper - * bound exclusive). - */ -template -static void -find_bounds_within_message_tree( - DESCRIPTOR desc, /// used for cmp - ft_compare_func cmp, /// used to compare keys - const find_bounds_omt_t &message_tree, /// tree holding FIFO offsets, in which we want to look for indices - FIFO buffer, /// buffer in which messages are found - struct pivot_bounds const * const bounds, /// key bounds within the basement node we're applying messages to - uint32_t *lbi, /// (output) "lower bound inclusive" (index into message_tree) - uint32_t *ube /// (output) "upper bound exclusive" (index into message_tree) - ) -{ - int r = 0; + // Opening an ft may restore to previous checkpoint. + // Truncate if necessary. + { + int fd = toku_cachefile_get_fd (ft->cf); + ft->blocktable.maybe_truncate_file_on_open(fd); + } - if (bounds->lower_bound_exclusive) { - // By setting msn to MAX_MSN and by using direction of +1, we will - // get the first message greater than (in (key, msn) order) any - // message (with any msn) with the key lower_bound_exclusive. - // This will be a message we want to try applying, so it is the - // "lower bound inclusive" within the message_tree. - struct toku_fifo_entry_key_msn_heaviside_extra lbi_extra; - ZERO_STRUCT(lbi_extra); - lbi_extra.desc = desc; - lbi_extra.cmp = cmp; - lbi_extra.fifo = buffer; - lbi_extra.key = bounds->lower_bound_exclusive; - lbi_extra.msn = MAX_MSN; - int32_t found_lb; - r = message_tree.template find(lbi_extra, +1, &found_lb, lbi); - if (r == DB_NOTFOUND) { - // There is no relevant data (the lower bound is bigger than - // any message in this tree), so we have no range and we're - // done. - *lbi = 0; - *ube = 0; - return; - } - if (bounds->upper_bound_inclusive) { - // Check if what we found for lbi is greater than the upper - // bound inclusive that we have. If so, there are no relevant - // messages between these bounds. - const DBT *ubi = bounds->upper_bound_inclusive; - const int32_t offset = found_lb; - DBT found_lbidbt; - fill_dbt_for_fifo_entry(&found_lbidbt, toku_fifo_get_entry(buffer, offset)); - FAKE_DB(db, desc); - int c = cmp(&db, &found_lbidbt, ubi); - // These DBTs really are both inclusive bounds, so we need - // strict inequality in order to determine that there's - // nothing between them. If they're equal, then we actually - // need to apply the message pointed to by lbi, and also - // anything with the same key but a bigger msn. - if (c > 0) { - *lbi = 0; - *ube = 0; - return; + r = 0; +exit: + if (fname_in_cwd) { + toku_free(fname_in_cwd); + } + if (r != 0 && cf) { + if (ft) { + // we only call toku_ft_note_ft_handle_open + // when the function succeeds, so if we are here, + // then that means we have a reference to the header + // but we have not linked it to this ft. So, + // we can simply try to remove the header. + // We don't need to unlink this ft from the header + toku_ft_grab_reflock(ft); + bool needed = toku_ft_needed_unlocked(ft); + toku_ft_release_reflock(ft); + if (!needed) { + // close immediately. + toku_ft_evict_from_memory(ft, false, ZERO_LSN); } } - } else { - // No lower bound given, it's negative infinity, so we start at - // the first message in the OMT. - *lbi = 0; - } - if (bounds->upper_bound_inclusive) { - // Again, we use an msn of MAX_MSN and a direction of +1 to get - // the first thing bigger than the upper_bound_inclusive key. - // This is therefore the smallest thing we don't want to apply, - // and omt::iterate_on_range will not examine it. - struct toku_fifo_entry_key_msn_heaviside_extra ube_extra; - ZERO_STRUCT(ube_extra); - ube_extra.desc = desc; - ube_extra.cmp = cmp; - ube_extra.fifo = buffer; - ube_extra.key = bounds->upper_bound_inclusive; - ube_extra.msn = MAX_MSN; - r = message_tree.template find(ube_extra, +1, nullptr, ube); - if (r == DB_NOTFOUND) { - // Couldn't find anything in the buffer bigger than our key, - // so we need to look at everything up to the end of - // message_tree. - *ube = message_tree.size(); + else { + toku_cachefile_close(&cf, false, ZERO_LSN); } - } else { - // No upper bound given, it's positive infinity, so we need to go - // through the end of the OMT. - *ube = message_tree.size(); } + toku_ft_open_close_unlock(); + return r; } -/** - * For each message in the ancestor's buffer (determined by childnum) that - * is key-wise between lower_bound_exclusive and upper_bound_inclusive, - * apply the message to the basement node. We treat the bounds as minus - * or plus infinity respectively if they are NULL. Do not mark the node - * as dirty (preserve previous state of 'dirty' bit). - */ -static void -bnc_apply_messages_to_basement_node( - FT_HANDLE t, // used for comparison function - BASEMENTNODE bn, // where to apply messages - FTNODE ancestor, // the ancestor node where we can find messages to apply - int childnum, // which child buffer of ancestor contains messages we want - struct pivot_bounds const * const bounds, // contains pivot key bounds of this basement node - txn_gc_info *gc_info, - bool* msgs_applied - ) -{ +// Open an ft for the purpose of recovery, which requires that the ft be open to a pre-determined FILENUM +// and may require a specific checkpointed version of the file. +// (dict_id is assigned by the ft_handle_open() function.) +int +toku_ft_handle_open_recovery(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, LSN max_acceptable_lsn) { int r; - NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum); - - // Determine the offsets in the message trees between which we need to - // apply messages from this buffer - STAT64INFO_S stats_delta = {0,0}; - uint64_t workdone_this_ancestor = 0; - - uint32_t stale_lbi, stale_ube; - if (!bn->stale_ancestor_messages_applied) { - find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->stale_message_tree, bnc->buffer, bounds, &stale_lbi, &stale_ube); - } else { - stale_lbi = 0; - stale_ube = 0; - } - uint32_t fresh_lbi, fresh_ube; - find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->fresh_message_tree, bnc->buffer, bounds, &fresh_lbi, &fresh_ube); - - // We now know where all the messages we must apply are, so one of the - // following 4 cases will do the application, depending on which of - // the lists contains relevant messages: - // - // 1. broadcast messages and anything else, or a mix of fresh and stale - // 2. only fresh messages - // 3. only stale messages - if (bnc->broadcast_list.size() > 0 || - (stale_lbi != stale_ube && fresh_lbi != fresh_ube)) { - // We have messages in multiple trees, so we grab all - // the relevant messages' offsets and sort them by MSN, then apply - // them in MSN order. - const int buffer_size = ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + bnc->broadcast_list.size()); - toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t)); - int32_t *offsets = reinterpret_cast(offsets_buf.get()); - struct store_fifo_offset_extra sfo_extra = { .offsets = offsets, .i = 0 }; - - // Populate offsets array with offsets to stale messages - r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &sfo_extra); - assert_zero(r); - - // Then store fresh offsets, and mark them to be moved to stale later. - r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &sfo_extra); - assert_zero(r); + assert(use_filenum.fileid != FILENUM_NONE.fileid); + r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, + txn, use_filenum, DICTIONARY_ID_NONE, max_acceptable_lsn); + return r; +} - // Store offsets of all broadcast messages. - r = bnc->broadcast_list.iterate(&sfo_extra); - assert_zero(r); - invariant(sfo_extra.i == buffer_size); +// Open an ft in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function. +// Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. +int +toku_ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn) { + int r; + r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, txn, FILENUM_NONE, DICTIONARY_ID_NONE, MAX_LSN); + return r; +} - // Sort by MSN. - r = toku::sort::mergesort_r(offsets, buffer_size, bnc->buffer); - assert_zero(r); +// clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree +int +toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn) { + FT_HANDLE result_ft_handle; + toku_ft_handle_create(&result_ft_handle); - // Apply the messages in MSN order. - for (int i = 0; i < buffer_size; ++i) { - *msgs_applied = true; - struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]); - do_bn_apply_msg(t, bn, entry, gc_info, &workdone_this_ancestor, &stats_delta); - } - } else if (stale_lbi == stale_ube) { - // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later. - struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta }; - if (fresh_ube - fresh_lbi > 0) *msgs_applied = true; - r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &iter_extra); - assert_zero(r); - } else { - invariant(fresh_lbi == fresh_ube); - // No fresh messages to apply, we just apply stale messages. + // we're cloning, so the handle better have an open ft and open cf + invariant(ft_handle->ft); + invariant(ft_handle->ft->cf); - if (stale_ube - stale_lbi > 0) *msgs_applied = true; - struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta }; + // inherit the options of the ft whose handle is being cloned. + toku_ft_handle_inherit_options(result_ft_handle, ft_handle->ft); - r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &iter_extra); - assert_zero(r); - } - // - // update stats - // - if (workdone_this_ancestor > 0) { - (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor); - } - if (stats_delta.numbytes || stats_delta.numrows) { - toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta); + // we can clone the handle by creating a new handle with the same fname + CACHEFILE cf = ft_handle->ft->cf; + CACHETABLE ct = toku_cachefile_get_cachetable(cf); + const char *fname_in_env = toku_cachefile_fname_in_env(cf); + int r = toku_ft_handle_open(result_ft_handle, fname_in_env, false, false, ct, txn); + if (r != 0) { + toku_ft_handle_close(result_ft_handle); + result_ft_handle = NULL; } + *cloned_ft_handle = result_ft_handle; + return r; } -static void -apply_ancestors_messages_to_bn( +// Open an ft in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function. +int +toku_ft_handle_open_with_dict_id( FT_HANDLE t, - FTNODE node, - int childnum, - ANCESTORS ancestors, - struct pivot_bounds const * const bounds, - txn_gc_info *gc_info, - bool* msgs_applied + const char *fname_in_env, + int is_create, + int only_create, + CACHETABLE cachetable, + TOKUTXN txn, + DICTIONARY_ID use_dictionary_id ) { - BASEMENTNODE curr_bn = BLB(node, childnum); - struct pivot_bounds curr_bounds = next_pivot_keys(node, childnum, bounds); - for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) { - if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > curr_bn->max_msn_applied.msn) { - paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL); - bnc_apply_messages_to_basement_node( - t, - curr_bn, - curr_ancestors->node, - curr_ancestors->childnum, - &curr_bounds, - gc_info, - msgs_applied - ); - // We don't want to check this ancestor node again if the - // next time we query it, the msn hasn't changed. - curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk; - } - } - // At this point, we know all the stale messages above this - // basement node have been applied, and any new messages will be - // fresh, so we don't need to look at stale messages for this - // basement node, unless it gets evicted (and this field becomes - // false when it's read in again). - curr_bn->stale_ancestor_messages_applied = true; + int r; + r = ft_handle_open( + t, + fname_in_env, + is_create, + only_create, + cachetable, + txn, + FILENUM_NONE, + use_dictionary_id, + MAX_LSN + ); + return r; } -void -toku_apply_ancestors_messages_to_node ( - FT_HANDLE t, - FTNODE node, - ANCESTORS ancestors, - struct pivot_bounds const * const bounds, - bool* msgs_applied, - int child_to_read - ) -// Effect: -// Bring a leaf node up-to-date according to all the messages in the ancestors. -// If the leaf node is already up-to-date then do nothing. -// If the leaf node is not already up-to-date, then record the work done -// for that leaf in each ancestor. -// Requires: -// This is being called when pinning a leaf node for the query path. -// The entire root-to-leaf path is pinned and appears in the ancestors list. +DICTIONARY_ID +toku_ft_get_dictionary_id(FT_HANDLE ft_handle) { + FT ft = ft_handle->ft; + return ft->dict_id; +} + +void toku_ft_set_flags(FT_HANDLE ft_handle, unsigned int flags) { + ft_handle->did_set_flags = true; + ft_handle->options.flags = flags; +} + +void toku_ft_get_flags(FT_HANDLE ft_handle, unsigned int *flags) { + *flags = ft_handle->options.flags; +} + +void toku_ft_get_maximum_advised_key_value_lengths (unsigned int *max_key_len, unsigned int *max_val_len) +// return the maximum advisable key value lengths. The ft doesn't enforce these. { - VERIFY_NODE(t, node); - paranoid_invariant(node->height == 0); + *max_key_len = 32*1024; + *max_val_len = 32*1024*1024; +} - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(t); - txn_manager_state txn_state_for_gc(txn_manager); - TXNID oldest_referenced_xid_for_simple_gc = toku_ft_get_oldest_referenced_xid_estimate(t); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_for_simple_gc, - node->oldest_referenced_xid_known, - true); - if (!node->dirty && child_to_read >= 0) { - paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); - apply_ancestors_messages_to_bn( - t, - node, - child_to_read, - ancestors, - bounds, - &gc_info, - msgs_applied - ); +void toku_ft_handle_set_nodesize(FT_HANDLE ft_handle, unsigned int nodesize) { + if (ft_handle->ft) { + toku_ft_set_nodesize(ft_handle->ft, nodesize); } else { - // know we are a leaf node - // An important invariant: - // We MUST bring every available basement node for a dirty node up to date. - // flushing on the cleaner thread depends on this. This invariant - // allows the cleaner thread to just pick an internal node and flush it - // as opposed to being forced to start from the root. - for (int i = 0; i < node->n_children; i++) { - if (BP_STATE(node, i) != PT_AVAIL) { continue; } - apply_ancestors_messages_to_bn( - t, - node, - i, - ancestors, - bounds, - &gc_info, - msgs_applied - ); - } + ft_handle->options.nodesize = nodesize; } - VERIFY_NODE(t, node); } -static bool bn_needs_ancestors_messages( - FT ft, - FTNODE node, - int childnum, - struct pivot_bounds const * const bounds, - ANCESTORS ancestors, - MSN* max_msn_applied - ) -{ - BASEMENTNODE bn = BLB(node, childnum); - struct pivot_bounds curr_bounds = next_pivot_keys(node, childnum, bounds); - bool needs_ancestors_messages = false; - for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) { - if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > bn->max_msn_applied.msn) { - paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL); - NONLEAF_CHILDINFO bnc = BNC(curr_ancestors->node, curr_ancestors->childnum); - if (bnc->broadcast_list.size() > 0) { - needs_ancestors_messages = true; - goto cleanup; - } - if (!bn->stale_ancestor_messages_applied) { - uint32_t stale_lbi, stale_ube; - find_bounds_within_message_tree(&ft->cmp_descriptor, - ft->compare_fun, - bnc->stale_message_tree, - bnc->buffer, - &curr_bounds, - &stale_lbi, - &stale_ube); - if (stale_lbi < stale_ube) { - needs_ancestors_messages = true; - goto cleanup; - } - } - uint32_t fresh_lbi, fresh_ube; - find_bounds_within_message_tree(&ft->cmp_descriptor, - ft->compare_fun, - bnc->fresh_message_tree, - bnc->buffer, - &curr_bounds, - &fresh_lbi, - &fresh_ube); - if (fresh_lbi < fresh_ube) { - needs_ancestors_messages = true; - goto cleanup; - } - if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > max_msn_applied->msn) { - max_msn_applied->msn = curr_ancestors->node->max_msn_applied_to_node_on_disk.msn; - } - } +void toku_ft_handle_get_nodesize(FT_HANDLE ft_handle, unsigned int *nodesize) { + if (ft_handle->ft) { + toku_ft_get_nodesize(ft_handle->ft, nodesize); + } + else { + *nodesize = ft_handle->options.nodesize; } -cleanup: - return needs_ancestors_messages; } -bool toku_ft_leaf_needs_ancestors_messages( - FT ft, - FTNODE node, - ANCESTORS ancestors, - struct pivot_bounds const * const bounds, - MSN *const max_msn_in_path, - int child_to_read - ) -// Effect: Determine whether there are messages in a node's ancestors -// which must be applied to it. These messages are in the correct -// keyrange for any available basement nodes, and are in nodes with the -// correct max_msn_applied_to_node_on_disk. -// Notes: -// This is an approximate query. -// Output: -// max_msn_in_path: max of "max_msn_applied_to_node_on_disk" over -// ancestors. This is used later to update basement nodes' -// max_msn_applied values in case we don't do the full algorithm. -// Returns: -// true if there may be some such messages -// false only if there are definitely no such messages -// Rationale: -// When we pin a node with a read lock, we want to quickly determine if -// we should exchange it for a write lock in preparation for applying -// messages. If there are no messages, we don't need the write lock. -{ - paranoid_invariant(node->height == 0); - bool needs_ancestors_messages = false; - // child_to_read may be -1 in test cases - if (!node->dirty && child_to_read >= 0) { - paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); - needs_ancestors_messages = bn_needs_ancestors_messages( - ft, - node, - child_to_read, - bounds, - ancestors, - max_msn_in_path - ); +void toku_ft_handle_set_basementnodesize(FT_HANDLE ft_handle, unsigned int basementnodesize) { + if (ft_handle->ft) { + toku_ft_set_basementnodesize(ft_handle->ft, basementnodesize); } else { - for (int i = 0; i < node->n_children; ++i) { - if (BP_STATE(node, i) != PT_AVAIL) { continue; } - needs_ancestors_messages = bn_needs_ancestors_messages( - ft, - node, - i, - bounds, - ancestors, - max_msn_in_path - ); - if (needs_ancestors_messages) { - goto cleanup; - } - } + ft_handle->options.basementnodesize = basementnodesize; } -cleanup: - return needs_ancestors_messages; } -void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read) { - invariant(node->height == 0); - if (!node->dirty && child_to_read >= 0) { - paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); - BASEMENTNODE bn = BLB(node, child_to_read); - if (max_msn_applied.msn > bn->max_msn_applied.msn) { - // see comment below - (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn); - } +void toku_ft_handle_get_basementnodesize(FT_HANDLE ft_handle, unsigned int *basementnodesize) { + if (ft_handle->ft) { + toku_ft_get_basementnodesize(ft_handle->ft, basementnodesize); } else { - for (int i = 0; i < node->n_children; ++i) { - if (BP_STATE(node, i) != PT_AVAIL) { continue; } - BASEMENTNODE bn = BLB(node, i); - if (max_msn_applied.msn > bn->max_msn_applied.msn) { - // This function runs in a shared access context, so to silence tools - // like DRD, we use a CAS and ignore the result. - // Any threads trying to update these basement nodes should be - // updating them to the same thing (since they all have a read lock on - // the same root-to-leaf path) so this is safe. - (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn); - } - } + *basementnodesize = ft_handle->options.basementnodesize; } } -struct copy_to_stale_extra { - FT ft; - NONLEAF_CHILDINFO bnc; -}; +void toku_ft_set_bt_compare(FT_HANDLE ft_handle, int (*bt_compare)(DB*, const DBT*, const DBT*)) { + ft_handle->options.compare_fun = bt_compare; +} -int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) __attribute__((nonnull(3))); -int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) -{ - struct fifo_entry *entry = toku_fifo_get_entry(extra->bnc->buffer, offset); - DBT keydbt; - DBT *key = fill_dbt_for_fifo_entry(&keydbt, entry); - struct toku_fifo_entry_key_msn_heaviside_extra heaviside_extra = { .desc = &extra->ft->cmp_descriptor, .cmp = extra->ft->compare_fun, .fifo = extra->bnc->buffer, .key = key, .msn = entry->msn }; - int r = extra->bnc->stale_message_tree.insert(offset, heaviside_extra, nullptr); - invariant_zero(r); - return 0; +void toku_ft_set_redirect_callback(FT_HANDLE ft_handle, on_redirect_callback redir_cb, void* extra) { + ft_handle->redirect_callback = redir_cb; + ft_handle->redirect_callback_extra = extra; } -static void ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc) { - struct copy_to_stale_extra cts_extra = { .ft = ft, .bnc = bnc }; - int r = bnc->fresh_message_tree.iterate_over_marked(&cts_extra); - invariant_zero(r); - bnc->fresh_message_tree.delete_all_marked(); +void toku_ft_set_update(FT_HANDLE ft_handle, ft_update_func update_fun) { + ft_handle->options.update_fun = update_fun; } -__attribute__((nonnull)) -void -toku_move_ftnode_messages_to_stale(FT ft, FTNODE node) { - invariant(node->height > 0); - for (int i = 0; i < node->n_children; ++i) { - if (BP_STATE(node, i) != PT_AVAIL) { - continue; - } - NONLEAF_CHILDINFO bnc = BNC(node, i); - // We can't delete things out of the fresh tree inside the above - // procedures because we're still looking at the fresh tree. Instead - // we have to move messages after we're done looking at it. - ft_bnc_move_messages_to_stale(ft, bnc); +const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle) { + invariant_notnull(ft_handle->ft); + return ft_handle->ft->cmp; +} + +static void +ft_remove_handle_ref_callback(FT UU(ft), void *extra) { + FT_HANDLE CAST_FROM_VOIDP(handle, extra); + toku_list_remove(&handle->live_ft_handle_link); +} + +static void ft_handle_close(FT_HANDLE ft_handle, bool oplsn_valid, LSN oplsn) { + FT ft = ft_handle->ft; + // There are error paths in the ft_handle_open that end with ft_handle->ft == nullptr. + if (ft != nullptr) { + toku_ft_remove_reference(ft, oplsn_valid, oplsn, ft_remove_handle_ref_callback, ft_handle); } + toku_free(ft_handle); +} + +// close an ft handle during normal operation. the underlying ft may or may not close, +// depending if there are still references. an lsn for this close will come from the logger. +void toku_ft_handle_close(FT_HANDLE ft_handle) { + ft_handle_close(ft_handle, false, ZERO_LSN); +} + +// close an ft handle during recovery. the underlying ft must close, and will use the given lsn. +void toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn) { + // the ft must exist if closing during recovery. error paths during + // open for recovery should close handles using toku_ft_handle_close() + invariant_notnull(ft_handle->ft); + ft_handle_close(ft_handle, true, oplsn); } -static int cursor_check_restricted_range(FT_CURSOR c, bytevec key, ITEMLEN keylen) { - if (c->out_of_range_error) { - FT ft = c->ft_handle->ft; - FAKE_DB(db, &ft->cmp_descriptor); - DBT found_key; - toku_fill_dbt(&found_key, key, keylen); - if ((!c->left_is_neg_infty && c->direction <= 0 && ft->compare_fun(&db, &found_key, &c->range_lock_left_key) < 0) || - (!c->right_is_pos_infty && c->direction >= 0 && ft->compare_fun(&db, &found_key, &c->range_lock_right_key) > 0)) { - invariant(c->out_of_range_error); - return c->out_of_range_error; - } - } - // Reset cursor direction to mitigate risk if some query type doesn't set the direction. - // It is always correct to check both bounds (which happens when direction==0) but it can be slower. - c->direction = 0; +// TODO: remove this, callers should instead just use toku_ft_handle_close() +int toku_close_ft_handle_nolsn(FT_HANDLE ft_handle, char **UU(error_string)) { + toku_ft_handle_close(ft_handle); return 0; } -static int -ft_cursor_shortcut ( - FT_CURSOR cursor, - int direction, - uint32_t index, - bn_data* bd, - FT_GET_CALLBACK_FUNCTION getf, - void *getf_v, - uint32_t *keylen, - void **key, - uint32_t *vallen, - void **val - ); +void toku_ft_handle_create(FT_HANDLE *ft_handle_ptr) { + FT_HANDLE XMALLOC(ft_handle); + memset(ft_handle, 0, sizeof *ft_handle); + toku_list_init(&ft_handle->live_ft_handle_link); + ft_handle->options.flags = 0; + ft_handle->did_set_flags = false; + ft_handle->options.nodesize = FT_DEFAULT_NODE_SIZE; + ft_handle->options.basementnodesize = FT_DEFAULT_BASEMENT_NODE_SIZE; + ft_handle->options.compression_method = TOKU_DEFAULT_COMPRESSION_METHOD; + ft_handle->options.fanout = FT_DEFAULT_FANOUT; + ft_handle->options.compare_fun = toku_builtin_compare_fun; + ft_handle->options.update_fun = NULL; + *ft_handle_ptr = ft_handle; +} + +/******************************* search ***************************************/ // Return true if this key is within the search bound. If there is no search bound then the tree search continues. static bool search_continue(ft_search *search, void *key, uint32_t key_len) { bool result = true; if (search->direction == FT_SEARCH_LEFT && search->k_bound) { FT_HANDLE CAST_FROM_VOIDP(ft_handle, search->context); - FAKE_DB(db, &ft_handle->ft->cmp_descriptor); DBT this_key = { .data = key, .size = key_len }; // search continues if this key <= key bound - result = (ft_handle->ft->compare_fun(&db, &this_key, search->k_bound) <= 0); + result = (ft_handle->ft->cmp(&this_key, search->k_bound) <= 0); } return result; } +static int heaviside_from_search_t(const DBT &kdbt, ft_search &search) { + int cmp = search.compare(search, + search.k ? &kdbt : 0); + // The search->compare function returns only 0 or 1 + switch (search.direction) { + case FT_SEARCH_LEFT: return cmp==0 ? -1 : +1; + case FT_SEARCH_RIGHT: return cmp==0 ? +1 : -1; // Because the comparison runs backwards for right searches. + } + abort(); return 0; +} + // This is a bottom layer of the search functions. static int ft_search_basement_node( BASEMENTNODE bn, - ft_search_t *search, + ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool *doprefetch, @@ -5294,7 +3349,7 @@ bool can_bulk_fetch ) { - // Now we have to convert from ft_search_t to the heaviside function with a direction. What a pain... + // Now we have to convert from ft_search to the heaviside function with a direction. What a pain... int direction; switch (search->direction) { @@ -5319,7 +3374,7 @@ if (toku_ft_cursor_is_leaf_mode(ftcursor)) goto got_a_good_value; // leaf mode cursors see all leaf entries - if (is_le_val_del(le,ftcursor)) { + if (le_val_is_del(le, ftcursor->is_snapshot_read, ftcursor->ttxn)) { // Provisionally deleted stuff is gone. // So we need to scan in the direction to see if we can find something. // Every 100 deleted leaf entries check if the leaf's key is within the search bounds. @@ -5349,7 +3404,9 @@ } r = bn->data_buffer.fetch_klpair(idx, &le, &keylen, &key); assert_zero(r); // we just validated the index - if (!is_le_val_del(le,ftcursor)) goto got_a_good_value; + if (!le_val_is_del(le, ftcursor->is_snapshot_read, ftcursor->ttxn)) { + goto got_a_good_value; + } } } got_a_good_value: @@ -5357,42 +3414,31 @@ uint32_t vallen; void *val; - ft_cursor_extract_val(le, - ftcursor, - &vallen, - &val - ); - r = cursor_check_restricted_range(ftcursor, key, keylen); - if (r==0) { + le_extract_val(le, toku_ft_cursor_is_leaf_mode(ftcursor), + ftcursor->is_snapshot_read, ftcursor->ttxn, + &vallen, &val); + r = toku_ft_cursor_check_restricted_range(ftcursor, key, keylen); + if (r == 0) { r = getf(keylen, key, vallen, val, getf_v, false); } - if (r==0 || r == TOKUDB_CURSOR_CONTINUE) { + if (r == 0 || r == TOKUDB_CURSOR_CONTINUE) { // // IMPORTANT: bulk fetch CANNOT go past the current basement node, // because there is no guarantee that messages have been applied // to other basement nodes, as part of #5770 // if (r == TOKUDB_CURSOR_CONTINUE && can_bulk_fetch) { - r = ft_cursor_shortcut( - ftcursor, - direction, - idx, - &bn->data_buffer, - getf, - getf_v, - &keylen, - &key, - &vallen, - &val - ); + r = toku_ft_cursor_shortcut(ftcursor, direction, idx, &bn->data_buffer, + getf, getf_v, &keylen, &key, &vallen, &val); } - ft_cursor_cleanup_dbts(ftcursor); + toku_destroy_dbt(&ftcursor->key); + toku_destroy_dbt(&ftcursor->val); if (!ftcursor->is_temporary) { toku_memdup_dbt(&ftcursor->key, key, keylen); toku_memdup_dbt(&ftcursor->val, val, vallen); } - //The search was successful. Prefetching can continue. + // The search was successful. Prefetching can continue. *doprefetch = true; } } @@ -5404,7 +3450,7 @@ ft_search_node ( FT_HANDLE ft_handle, FTNODE node, - ft_search_t *search, + ft_search *search, int child_to_search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, @@ -5412,17 +3458,17 @@ FT_CURSOR ftcursor, UNLOCKERS unlockers, ANCESTORS, - struct pivot_bounds const * const bounds, + const pivot_bounds &bounds, bool can_bulk_fetch ); static int -ftnode_fetch_callback_and_free_bfe(CACHEFILE cf, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs) +ftnode_fetch_callback_and_free_bfe(CACHEFILE cf, PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs) { - int r = toku_ftnode_fetch_callback(cf, p, fd, nodename, fullhash, ftnode_pv, disk_data, sizep, dirtyp, extraargs); - struct ftnode_fetch_extra *CAST_FROM_VOIDP(ffe, extraargs); - destroy_bfe_for_prefetch(ffe); - toku_free(ffe); + int r = toku_ftnode_fetch_callback(cf, p, fd, blocknum, fullhash, ftnode_pv, disk_data, sizep, dirtyp, extraargs); + ftnode_fetch_extra *CAST_FROM_VOIDP(bfe, extraargs); + bfe->destroy(); + toku_free(bfe); return r; } @@ -5430,12 +3476,24 @@ ftnode_pf_callback_and_free_bfe(void *ftnode_pv, void* disk_data, void *read_extraargs, int fd, PAIR_ATTR *sizep) { int r = toku_ftnode_pf_callback(ftnode_pv, disk_data, read_extraargs, fd, sizep); - struct ftnode_fetch_extra *CAST_FROM_VOIDP(ffe, read_extraargs); - destroy_bfe_for_prefetch(ffe); - toku_free(ffe); + ftnode_fetch_extra *CAST_FROM_VOIDP(bfe, read_extraargs); + bfe->destroy(); + toku_free(bfe); return r; } +CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft) { + CACHETABLE_WRITE_CALLBACK wc; + wc.flush_callback = toku_ftnode_flush_callback; + wc.pe_est_callback = toku_ftnode_pe_est_callback; + wc.pe_callback = toku_ftnode_pe_callback; + wc.cleaner_callback = toku_ftnode_cleaner_callback; + wc.clone_callback = toku_ftnode_clone_callback; + wc.checkpoint_complete_callback = toku_ftnode_checkpoint_complete_callback; + wc.write_extraargs = ft; + return wc; +} + static void ft_node_maybe_prefetch(FT_HANDLE ft_handle, FTNODE node, int childnum, FT_CURSOR ftcursor, bool *doprefetch) { // the number of nodes to prefetch @@ -5443,13 +3501,13 @@ // if we want to prefetch in the tree // then prefetch the next children if there are any - if (*doprefetch && ft_cursor_prefetching(ftcursor) && !ftcursor->disable_prefetching) { + if (*doprefetch && toku_ft_cursor_prefetching(ftcursor) && !ftcursor->disable_prefetching) { int rc = ft_cursor_rightmost_child_wanted(ftcursor, ft_handle, node); for (int i = childnum + 1; (i <= childnum + num_nodes_to_prefetch) && (i <= rc); i++) { BLOCKNUM nextchildblocknum = BP_BLOCKNUM(node, i); uint32_t nextfullhash = compute_child_fullhash(ft_handle->ft->cf, node, i); - struct ftnode_fetch_extra *MALLOC(bfe); - fill_bfe_for_prefetch(bfe, ft_handle->ft, ftcursor); + ftnode_fetch_extra *XCALLOC(bfe); + bfe->create_for_prefetch(ft_handle->ft, ftcursor); bool doing_prefetch = false; toku_cachefile_prefetch( ft_handle->ft->cf, @@ -5463,7 +3521,7 @@ &doing_prefetch ); if (!doing_prefetch) { - destroy_bfe_for_prefetch(bfe); + bfe->destroy(); toku_free(bfe); } *doprefetch = false; @@ -5476,6 +3534,7 @@ FTNODE node; bool msgs_applied; }; + // When this is called, the cachetable lock is held static void unlock_ftnode_fun (void *v) { @@ -5495,8 +3554,8 @@ /* search in a node's child */ static int -ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool *doprefetch, FT_CURSOR ftcursor, UNLOCKERS unlockers, - ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool can_bulk_fetch) +ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool *doprefetch, FT_CURSOR ftcursor, UNLOCKERS unlockers, + ANCESTORS ancestors, const pivot_bounds &bounds, bool can_bulk_fetch) // Effect: Search in a node's child. Searches are read-only now (at least as far as the hardcopy is concerned). { struct ancestors next_ancestors = {node, childnum, ancestors}; @@ -5508,9 +3567,8 @@ // If the current node's height is greater than 1, then its child is an internal node. // Therefore, to warm the cache better (#5798), we want to read all the partitions off disk in one shot. bool read_all_partitions = node->height > 1; - struct ftnode_fetch_extra bfe; - fill_bfe_for_subset_read( - &bfe, + ftnode_fetch_extra bfe; + bfe.create_for_subset_read( ft_handle->ft, search, &ftcursor->range_lock_left_key, @@ -5574,19 +3632,13 @@ } static inline int -search_which_child_cmp_with_bound(DB *db, ft_compare_func cmp, FTNODE node, int childnum, ft_search_t *search, DBT *dbt) -{ - return cmp(db, toku_copy_dbt(dbt, node->childkeys[childnum]), &search->pivot_bound); +search_which_child_cmp_with_bound(const toku::comparator &cmp, FTNODE node, int childnum, + ft_search *search, DBT *dbt) { + return cmp(toku_copyref_dbt(dbt, node->pivotkeys.get_pivot(childnum)), &search->pivot_bound); } int -toku_ft_search_which_child( - DESCRIPTOR desc, - ft_compare_func cmp, - FTNODE node, - ft_search_t *search - ) -{ +toku_ft_search_which_child(const toku::comparator &cmp, FTNODE node, ft_search *search) { if (node->n_children <= 1) return 0; DBT pivotkey; @@ -5596,7 +3648,7 @@ int mi; while (lo < hi) { mi = (lo + hi) / 2; - toku_copy_dbt(&pivotkey, node->childkeys[mi]); + node->pivotkeys.fill_pivot(mi, &pivotkey); // search->compare is really strange, and only works well with a // linear search, it makes binary search a pita. // @@ -5621,10 +3673,9 @@ // ready to return something, if the pivot is bounded, we have to move // over a bit to get away from what we've already searched if (search->pivot_bound.data != nullptr) { - FAKE_DB(db, desc); if (search->direction == FT_SEARCH_LEFT) { while (lo < node->n_children - 1 && - search_which_child_cmp_with_bound(&db, cmp, node, lo, search, &pivotkey) <= 0) { + search_which_child_cmp_with_bound(cmp, node, lo, search, &pivotkey) <= 0) { // searching left to right, if the comparison says the // current pivot (lo) is left of or equal to our bound, // don't search that child again @@ -5632,11 +3683,11 @@ } } else { while (lo > 0 && - search_which_child_cmp_with_bound(&db, cmp, node, lo - 1, search, &pivotkey) >= 0) { + search_which_child_cmp_with_bound(cmp, node, lo - 1, search, &pivotkey) >= 0) { // searching right to left, same argument as just above // (but we had to pass lo - 1 because the pivot between lo // and the thing just less than it is at that position in - // the childkeys array) + // the pivot keys array) lo--; } } @@ -5648,17 +3699,17 @@ maybe_search_save_bound( FTNODE node, int child_searched, - ft_search_t *search) + ft_search *search) { int p = (search->direction == FT_SEARCH_LEFT) ? child_searched : child_searched - 1; if (p >= 0 && p < node->n_children-1) { toku_destroy_dbt(&search->pivot_bound); - toku_clone_dbt(&search->pivot_bound, node->childkeys[p]); + toku_clone_dbt(&search->pivot_bound, node->pivotkeys.get_pivot(p)); } } // Returns true if there are still children left to search in this node within the search bound (if any). -static bool search_try_again(FTNODE node, int child_to_search, ft_search_t *search) { +static bool search_try_again(FTNODE node, int child_to_search, ft_search *search) { bool try_again = false; if (search->direction == FT_SEARCH_LEFT) { if (child_to_search < node->n_children-1) { @@ -5666,8 +3717,7 @@ // if there is a search bound and the bound is within the search pivot then continue the search if (search->k_bound) { FT_HANDLE CAST_FROM_VOIDP(ft_handle, search->context); - FAKE_DB(db, &ft_handle->ft->cmp_descriptor); - try_again = (ft_handle->ft->compare_fun(&db, search->k_bound, &search->pivot_bound) > 0); + try_again = (ft_handle->ft->cmp(search->k_bound, &search->pivot_bound) > 0); } } } else if (search->direction == FT_SEARCH_RIGHT) { @@ -5681,7 +3731,7 @@ ft_search_node( FT_HANDLE ft_handle, FTNODE node, - ft_search_t *search, + ft_search *search, int child_to_search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, @@ -5689,7 +3739,7 @@ FT_CURSOR ftcursor, UNLOCKERS unlockers, ANCESTORS ancestors, - struct pivot_bounds const * const bounds, + const pivot_bounds &bounds, bool can_bulk_fetch ) { @@ -5701,7 +3751,7 @@ // At this point, we must have the necessary partition available to continue the search // assert(BP_STATE(node,child_to_search) == PT_AVAIL); - const struct pivot_bounds next_bounds = next_pivot_keys(node, child_to_search, bounds); + const pivot_bounds next_bounds = bounds.next_bounds(node, child_to_search); if (node->height > 0) { r = ft_search_child( ft_handle, @@ -5714,7 +3764,7 @@ ftcursor, unlockers, ancestors, - &next_bounds, + next_bounds, can_bulk_fetch ); } @@ -5743,12 +3793,8 @@ // we have a new pivotkey if (node->height == 0) { // when we run off the end of a basement, try to lock the range up to the pivot. solves #3529 - const DBT *pivot = nullptr; - if (search->direction == FT_SEARCH_LEFT) { - pivot = next_bounds.upper_bound_inclusive; // left -> right - } else { - pivot = next_bounds.lower_bound_exclusive; // right -> left - } + const DBT *pivot = search->direction == FT_SEARCH_LEFT ? next_bounds.ubi() : // left -> right + next_bounds.lbe(); // right -> left if (pivot != nullptr) { int rr = getf(pivot->size, pivot->data, 0, nullptr, getf_v, true); if (rr != 0) { @@ -5776,8 +3822,7 @@ return r; } -static int -toku_ft_search (FT_HANDLE ft_handle, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, FT_CURSOR ftcursor, bool can_bulk_fetch) +int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, FT_CURSOR ftcursor, bool can_bulk_fetch) // Effect: Perform a search. Associate cursor with a leaf if possible. // All searches are performed through this function. { @@ -5807,7 +3852,7 @@ // and the partial fetch callback (in case the node is perhaps partially in memory) to the fetch the node // - This eventually calls either toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback depending on whether the node is in // memory at all or not. - // - Within these functions, the "ft_search_t search" parameter is used to evaluate which child the search is interested in. + // - Within these functions, the "ft_search search" parameter is used to evaluate which child the search is interested in. // If the node is not in memory at all, toku_ftnode_fetch_callback will read the node and decompress only the partition for the // relevant child, be it a message buffer or basement node. If the node is in memory, then toku_ftnode_pf_req_callback // will tell the cachetable that a partial fetch is required if and only if the relevant child is not in memory. If the relevant child @@ -5817,9 +3862,8 @@ // - At this point, toku_ftnode_pin_holding_lock has returned, with bfe.child_to_read set, // - ft_search_node is called, assuming that the node and its relevant partition are in memory. // - struct ftnode_fetch_extra bfe; - fill_bfe_for_subset_read( - &bfe, + ftnode_fetch_extra bfe; + bfe.create_for_subset_read( ft, search, &ftcursor->range_lock_left_key, @@ -5854,7 +3898,7 @@ { bool doprefetch = false; //static int counter = 0; counter++; - r = ft_search_node(ft_handle, node, search, bfe.child_to_read, getf, getf_v, &doprefetch, ftcursor, &unlockers, (ANCESTORS)NULL, &infinite_bounds, can_bulk_fetch); + r = ft_search_node(ft_handle, node, search, bfe.child_to_read, getf, getf_v, &doprefetch, ftcursor, &unlockers, (ANCESTORS)NULL, pivot_bounds::infinite_bounds(), can_bulk_fetch); if (r==TOKUDB_TRY_AGAIN) { // there are two cases where we get TOKUDB_TRY_AGAIN // case 1 is when some later call to toku_pin_ftnode returned @@ -5908,355 +3952,20 @@ return r; } -struct ft_cursor_search_struct { - FT_GET_CALLBACK_FUNCTION getf; - void *getf_v; - FT_CURSOR cursor; - ft_search_t *search; -}; - -/* search for the first kv pair that matches the search object */ -static int -ft_cursor_search(FT_CURSOR cursor, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool can_bulk_fetch) -{ - int r = toku_ft_search(cursor->ft_handle, search, getf, getf_v, cursor, can_bulk_fetch); - return r; -} - -static inline int compare_k_x(FT_HANDLE ft_handle, const DBT *k, const DBT *x) { - FAKE_DB(db, &ft_handle->ft->cmp_descriptor); - return ft_handle->ft->compare_fun(&db, k, x); -} - -static int -ft_cursor_compare_one(const ft_search_t &search __attribute__((__unused__)), const DBT *x __attribute__((__unused__))) -{ - return 1; -} - -static int ft_cursor_compare_set(const ft_search_t &search, const DBT *x) { - FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); - return compare_k_x(ft_handle, search.k, x) <= 0; /* return min xy: kv <= xy */ -} - -static int -ft_cursor_current_getf(ITEMLEN keylen, bytevec key, - ITEMLEN vallen, bytevec val, - void *v, bool lock_only) { - struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v); - int r; - if (key==NULL) { - r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); - } else { - FT_CURSOR cursor = bcss->cursor; - DBT newkey; - toku_fill_dbt(&newkey, key, keylen); - if (compare_k_x(cursor->ft_handle, &cursor->key, &newkey) != 0) { - r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); // This was once DB_KEYEMPTY - if (r==0) r = TOKUDB_FOUND_BUT_REJECTED; - } - else - r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only); - } - return r; -} - -int -toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - if (ft_cursor_not_set(cursor)) - return EINVAL; - cursor->direction = 0; - if (op == DB_CURRENT) { - struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, 0}; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_set, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle); - int r = toku_ft_search(cursor->ft_handle, &search, ft_cursor_current_getf, &bcss, cursor, false); - ft_search_finish(&search); - return r; - } - return getf(cursor->key.size, cursor->key.data, cursor->val.size, cursor->val.data, getf_v, false); // ft_cursor_copyout(cursor, outkey, outval); -} - -int -toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = 0; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_one, FT_SEARCH_LEFT, nullptr, nullptr, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, false); - ft_search_finish(&search); - return r; -} - -int -toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = 0; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_one, FT_SEARCH_RIGHT, nullptr, nullptr, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, false); - ft_search_finish(&search); - return r; -} - -static int ft_cursor_compare_next(const ft_search_t &search, const DBT *x) { - FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); - return compare_k_x(ft_handle, search.k, x) < 0; /* return min xy: kv < xy */ -} - -static int -ft_cursor_shortcut ( - FT_CURSOR cursor, - int direction, - uint32_t index, - bn_data* bd, - FT_GET_CALLBACK_FUNCTION getf, - void *getf_v, - uint32_t *keylen, - void **key, - uint32_t *vallen, - void **val - ) -{ - int r = 0; - // if we are searching towards the end, limit is last element - // if we are searching towards the beginning, limit is the first element - uint32_t limit = (direction > 0) ? (bd->num_klpairs() - 1) : 0; - - //Starting with the prev, find the first real (non-provdel) leafentry. - while (index != limit) { - index += direction; - LEAFENTRY le; - void* foundkey = NULL; - uint32_t foundkeylen = 0; - - r = bd->fetch_klpair(index, &le, &foundkeylen, &foundkey); - invariant_zero(r); - - if (toku_ft_cursor_is_leaf_mode(cursor) || !is_le_val_del(le, cursor)) { - ft_cursor_extract_val( - le, - cursor, - vallen, - val - ); - *key = foundkey; - *keylen = foundkeylen; - - cursor->direction = direction; - r = cursor_check_restricted_range(cursor, *key, *keylen); - if (r!=0) { - paranoid_invariant(r == cursor->out_of_range_error); - // We already got at least one entry from the bulk fetch. - // Return 0 (instead of out of range error). - r = 0; - break; - } - r = getf(*keylen, *key, *vallen, *val, getf_v, false); - if (r == TOKUDB_CURSOR_CONTINUE) { - continue; - } - else { - break; - } - } - } - - return r; -} - -int -toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = +1; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_next, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, true); - ft_search_finish(&search); - if (r == 0) ft_cursor_set_prefetching(cursor); - return r; -} - -static int -ft_cursor_search_eq_k_x_getf(ITEMLEN keylen, bytevec key, - ITEMLEN vallen, bytevec val, - void *v, bool lock_only) { - struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v); - int r; - if (key==NULL) { - r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, false); - } else { - FT_CURSOR cursor = bcss->cursor; - DBT newkey; - toku_fill_dbt(&newkey, key, keylen); - if (compare_k_x(cursor->ft_handle, bcss->search->k, &newkey) == 0) { - r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only); - } else { - r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); - if (r==0) r = TOKUDB_FOUND_BUT_REJECTED; - } - } - return r; -} - -/* search for the kv pair that matches the search object and is equal to k */ -static int -ft_cursor_search_eq_k_x(FT_CURSOR cursor, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, search}; - int r = toku_ft_search(cursor->ft_handle, search, ft_cursor_search_eq_k_x_getf, &bcss, cursor, false); - return r; -} - -static int ft_cursor_compare_prev(const ft_search_t &search, const DBT *x) { - FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); - return compare_k_x(ft_handle, search.k, x) > 0; /* return max xy: kv > xy */ -} - -int -toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = -1; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_prev, FT_SEARCH_RIGHT, &cursor->key, nullptr, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, true); - ft_search_finish(&search); - return r; -} - -static int ft_cursor_compare_set_range(const ft_search_t &search, const DBT *x) { - FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); - return compare_k_x(ft_handle, search.k, x) <= 0; /* return kv <= xy */ -} - -int -toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = 0; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, nullptr, cursor->ft_handle); - int r = ft_cursor_search_eq_k_x(cursor, &search, getf, getf_v); - ft_search_finish(&search); - return r; -} - -int -toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = 0; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, key_bound, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, false); - ft_search_finish(&search); - return r; -} - -static int ft_cursor_compare_set_range_reverse(const ft_search_t &search, const DBT *x) { - FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); - return compare_k_x(ft_handle, search.k, x) >= 0; /* return kv >= xy */ -} - -int -toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = 0; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_set_range_reverse, FT_SEARCH_RIGHT, key, nullptr, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, false); - ft_search_finish(&search); - return r; -} - - -//TODO: When tests have been rewritten, get rid of this function. -//Only used by tests. -int -toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags) -{ - int op = get_flags & DB_OPFLAGS_MASK; - if (get_flags & ~DB_OPFLAGS_MASK) - return EINVAL; - - switch (op) { - case DB_CURRENT: - case DB_CURRENT_BINDING: - return toku_ft_cursor_current(cursor, op, getf, getf_v); - case DB_FIRST: - return toku_ft_cursor_first(cursor, getf, getf_v); - case DB_LAST: - return toku_ft_cursor_last(cursor, getf, getf_v); - case DB_NEXT: - if (ft_cursor_not_set(cursor)) { - return toku_ft_cursor_first(cursor, getf, getf_v); - } else { - return toku_ft_cursor_next(cursor, getf, getf_v); - } - case DB_PREV: - if (ft_cursor_not_set(cursor)) { - return toku_ft_cursor_last(cursor, getf, getf_v); - } else { - return toku_ft_cursor_prev(cursor, getf, getf_v); - } - case DB_SET: - return toku_ft_cursor_set(cursor, key, getf, getf_v); - case DB_SET_RANGE: - return toku_ft_cursor_set_range(cursor, key, nullptr, getf, getf_v); - default: ;// Fall through - } - return EINVAL; -} - -void -toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval) -// Effect: Retrieves a pointer to the DBTs for the current key and value. -// Requires: The caller may not modify the DBTs or the memory at which they points. -// Requires: The caller must be in the context of a -// FT_GET_(STRADDLE_)CALLBACK_FUNCTION -{ - *pkey = &cursor->key; - *pval = &cursor->val; -} - -bool toku_ft_cursor_uninitialized(FT_CURSOR c) { - return ft_cursor_not_set(c); -} - - -/* ********************************* lookup **************************************/ - -int -toku_ft_lookup (FT_HANDLE ft_handle, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - int r, rr; - FT_CURSOR cursor; - - rr = toku_ft_cursor(ft_handle, &cursor, NULL, false, false); - if (rr != 0) return rr; - - int op = DB_SET; - r = toku_ft_cursor_get(cursor, k, getf, getf_v, op); - - toku_ft_cursor_close(cursor); - - return r; -} - /* ********************************* delete **************************************/ static int -getf_nothing (ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN UU(vallen), bytevec UU(val), void *UU(pair_v), bool UU(lock_only)) { +getf_nothing (uint32_t UU(keylen), const void *UU(key), uint32_t UU(vallen), const void *UU(val), void *UU(pair_v), bool UU(lock_only)) { return 0; } -int -toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn) { +int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn) { int r; int unchecked_flags = flags; bool error_if_missing = (bool) !(flags&DB_DELETE_ANY); unchecked_flags &= ~DB_DELETE_ANY; if (unchecked_flags!=0) r = EINVAL; - else if (ft_cursor_not_set(cursor)) r = EINVAL; + else if (toku_ft_cursor_not_set(cursor)) r = EINVAL; else { r = 0; if (error_if_missing) { @@ -6271,17 +3980,14 @@ /* ********************* keyrange ************************ */ - struct keyrange_compare_s { FT ft; const DBT *key; }; -static int -keyrange_compare (DBT const &kdbt, const struct keyrange_compare_s &s) { - // TODO: maybe put a const fake_db in the header - FAKE_DB(db, &s.ft->cmp_descriptor); - return s.ft->compare_fun(&db, &kdbt, s.key); +// TODO: Remove me, I'm boring +static int keyrange_compare(DBT const &kdbt, const struct keyrange_compare_s &s) { + return s.ft->cmp(&kdbt, s.key); } static void @@ -6344,17 +4050,17 @@ uint64_t* less, uint64_t* equal_left, uint64_t* middle, uint64_t* equal_right, uint64_t* greater, bool* single_basement_node, uint64_t estimated_num_rows, - struct ftnode_fetch_extra *min_bfe, // set up to read a minimal read. - struct ftnode_fetch_extra *match_bfe, // set up to read a basement node iff both keys in it - struct unlockers *unlockers, ANCESTORS ancestors, struct pivot_bounds const * const bounds) + ftnode_fetch_extra *min_bfe, // set up to read a minimal read. + ftnode_fetch_extra *match_bfe, // set up to read a basement node iff both keys in it + struct unlockers *unlockers, ANCESTORS ancestors, const pivot_bounds &bounds) // Implementation note: Assign values to less, equal, and greater, and then on the way out (returning up the stack) we add more values in. { int r = 0; // if KEY is NULL then use the leftmost key. - int left_child_number = key_left ? toku_ftnode_which_child (node, key_left, &ft_handle->ft->cmp_descriptor, ft_handle->ft->compare_fun) : 0; + int left_child_number = key_left ? toku_ftnode_which_child (node, key_left, ft_handle->ft->cmp) : 0; int right_child_number = node->n_children; // Sentinel that does not equal left_child_number. if (may_find_right) { - right_child_number = key_right ? toku_ftnode_which_child (node, key_right, &ft_handle->ft->cmp_descriptor, ft_handle->ft->compare_fun) : node->n_children - 1; + right_child_number = key_right ? toku_ftnode_which_child (node, key_right, ft_handle->ft->cmp) : node->n_children - 1; } uint64_t rows_per_child = estimated_num_rows / node->n_children; @@ -6394,11 +4100,11 @@ struct unlock_ftnode_extra unlock_extra = {ft_handle,childnode,false}; struct unlockers next_unlockers = {true, unlock_ftnode_fun, (void*)&unlock_extra, unlockers}; - const struct pivot_bounds next_bounds = next_pivot_keys(node, left_child_number, bounds); + const pivot_bounds next_bounds = bounds.next_bounds(node, left_child_number); r = toku_ft_keysrange_internal(ft_handle, childnode, key_left, key_right, child_may_find_right, less, equal_left, middle, equal_right, greater, single_basement_node, - rows_per_child, min_bfe, match_bfe, &next_unlockers, &next_ancestors, &next_bounds); + rows_per_child, min_bfe, match_bfe, &next_unlockers, &next_ancestors, next_bounds); if (r != TOKUDB_TRY_AGAIN) { assert_zero(r); @@ -6442,10 +4148,10 @@ return; } paranoid_invariant(!(!key_left && key_right)); - struct ftnode_fetch_extra min_bfe; - struct ftnode_fetch_extra match_bfe; - fill_bfe_for_min_read(&min_bfe, ft_handle->ft); // read pivot keys but not message buffers - fill_bfe_for_keymatch(&match_bfe, ft_handle->ft, key_left, key_right, false, false); // read basement node only if both keys in it. + ftnode_fetch_extra min_bfe; + ftnode_fetch_extra match_bfe; + min_bfe.create_for_min_read(ft_handle->ft); // read pivot keys but not message buffers + match_bfe.create_for_keymatch(ft_handle->ft, key_left, key_right, false, false); // read basement node only if both keys in it. try_again: { uint64_t less = 0, equal_left = 0, middle = 0, equal_right = 0, greater = 0; @@ -6477,7 +4183,7 @@ r = toku_ft_keysrange_internal (ft_handle, node, key_left, key_right, true, &less, &equal_left, &middle, &equal_right, &greater, &single_basement_node, numrows, - &min_bfe, &match_bfe, &unlockers, (ANCESTORS)NULL, &infinite_bounds); + &min_bfe, &match_bfe, &unlockers, (ANCESTORS)NULL, pivot_bounds::infinite_bounds()); assert(r == 0 || r == TOKUDB_TRY_AGAIN); if (r == TOKUDB_TRY_AGAIN) { assert(!unlockers.locked); @@ -6493,7 +4199,7 @@ r = toku_ft_keysrange_internal (ft_handle, node, key_right, nullptr, false, &less2, &equal_left2, &middle2, &equal_right2, &greater2, &ignore, numrows, - &min_bfe, &match_bfe, &unlockers, (ANCESTORS)nullptr, &infinite_bounds); + &min_bfe, &match_bfe, &unlockers, (ANCESTORS)nullptr, pivot_bounds::infinite_bounds()); assert(r == 0 || r == TOKUDB_TRY_AGAIN); if (r == TOKUDB_TRY_AGAIN) { assert(!unlockers.locked); @@ -6580,9 +4286,9 @@ return r; } -static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search_t *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped); +static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped); -static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search_t *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) { +static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) { int r; struct ancestors next_ancestors = {node, childnum, ancestors}; BLOCKNUM childblocknum = BP_BLOCKNUM(node, childnum); @@ -6597,13 +4303,13 @@ assert_zero(r); struct unlock_ftnode_extra unlock_extra = {ft_h, child, false}; struct unlockers next_unlockers = {true, unlock_ftnode_fun, (void *) &unlock_extra, unlockers}; - const struct pivot_bounds next_bounds = next_pivot_keys(node, childnum, bounds); - return get_key_after_bytes_in_subtree(ft_h, ft, child, &next_unlockers, &next_ancestors, &next_bounds, bfe, search, subtree_bytes, start_key, skip_len, callback, cb_extra, skipped); + const pivot_bounds next_bounds = bounds.next_bounds(node, childnum); + return get_key_after_bytes_in_subtree(ft_h, ft, child, &next_unlockers, &next_ancestors, next_bounds, bfe, search, subtree_bytes, start_key, skip_len, callback, cb_extra, skipped); } -static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search_t *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) { +static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) { int r; - int childnum = toku_ft_search_which_child(&ft->cmp_descriptor, ft->compare_fun, node, search); + int childnum = toku_ft_search_which_child(ft->cmp, node, search); const uint64_t child_subtree_bytes = subtree_bytes / node->n_children; if (node->height == 0) { r = DB_NOTFOUND; @@ -6619,7 +4325,8 @@ } else { *skipped += child_subtree_bytes; if (*skipped >= skip_len && i < node->n_children - 1) { - callback(&node->childkeys[i], *skipped, cb_extra); + DBT pivot; + callback(node->pivotkeys.fill_pivot(i, &pivot), *skipped, cb_extra); r = 0; } // Otherwise, r is still DB_NOTFOUND. If this is the last @@ -6664,8 +4371,8 @@ // an error code otherwise { FT ft = ft_h->ft; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft); while (true) { FTNODE root; { @@ -6676,8 +4383,8 @@ } struct unlock_ftnode_extra unlock_extra = {ft_h, root, false}; struct unlockers unlockers = {true, unlock_ftnode_fun, (void*)&unlock_extra, (UNLOCKERS) nullptr}; - ft_search_t search; - ft_search_init(&search, (start_key == nullptr ? ft_cursor_compare_one : ft_cursor_compare_set_range), FT_SEARCH_LEFT, start_key, nullptr, ft_h); + ft_search search; + ft_search_init(&search, (start_key == nullptr ? toku_ft_cursor_compare_one : toku_ft_cursor_compare_set_range), FT_SEARCH_LEFT, start_key, nullptr, ft_h); int r; // We can't do this because of #5768, there may be dictionaries in the wild that have negative stats. This won't affect mongo so it's ok: @@ -6687,7 +4394,7 @@ numbytes = 0; } uint64_t skipped = 0; - r = get_key_after_bytes_in_subtree(ft_h, ft, root, &unlockers, nullptr, &infinite_bounds, &bfe, &search, (uint64_t) numbytes, start_key, skip_len, callback, cb_extra, &skipped); + r = get_key_after_bytes_in_subtree(ft_h, ft, root, &unlockers, nullptr, pivot_bounds::infinite_bounds(), &bfe, &search, (uint64_t) numbytes, start_key, skip_len, callback, cb_extra, &skipped); assert(!unlockers.locked); if (r != TOKUDB_TRY_AGAIN) { if (r == DB_NOTFOUND) { @@ -6728,8 +4435,8 @@ toku_get_node_for_verify(blocknum, ft_handle, &node); result=toku_verify_ftnode(ft_handle, ft_handle->ft->h->max_msn_in_ft, ft_handle->ft->h->max_msn_in_ft, false, node, -1, lorange, hirange, NULL, NULL, 0, 1, 0); uint32_t fullhash = toku_cachetable_hash(ft_handle->ft->cf, blocknum); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft_handle->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); toku_pin_ftnode( ft_handle->ft, blocknum, @@ -6748,20 +4455,27 @@ int i; for (i=0; i+1< node->n_children; i++) { fprintf(file, "%*spivotkey %d =", depth+1, "", i); - toku_print_BYTESTRING(file, node->childkeys[i].size, (char *) node->childkeys[i].data); + toku_print_BYTESTRING(file, node->pivotkeys.get_pivot(i).size, (char *) node->pivotkeys.get_pivot(i).data); fprintf(file, "\n"); } for (i=0; i< node->n_children; i++) { if (node->height > 0) { NONLEAF_CHILDINFO bnc = BNC(node, i); fprintf(file, "%*schild %d buffered (%d entries):", depth+1, "", i, toku_bnc_n_entries(bnc)); - FIFO_ITERATE(bnc->buffer, key, keylen, data, datalen, type, msn, xids, UU(is_fresh), - { - data=data; datalen=datalen; keylen=keylen; - fprintf(file, "%*s xid=%" PRIu64 " %u (type=%d) msn=0x%" PRIu64 "\n", depth+2, "", xids_get_innermost_xid(xids), (unsigned)toku_dtoh32(*(int*)key), type, msn.msn); - //assert(strlen((char*)key)+1==keylen); - //assert(strlen((char*)data)+1==datalen); - }); + struct print_msg_fn { + FILE *file; + int depth; + print_msg_fn(FILE *f, int d) : file(f), depth(d) { } + int operator()(const ft_msg &msg, bool UU(is_fresh)) { + fprintf(file, "%*s xid=%" PRIu64 " %u (type=%d) msn=0x%" PRIu64 "\n", + depth+2, "", + toku_xids_get_innermost_xid(msg.xids()), + static_cast(toku_dtoh32(*(int*)msg.kdbt()->data)), + msg.type(), msg.msn().msn); + return 0; + } + } print_fn(file, depth); + bnc->msg_buffer.iterate(print_fn); } else { int size = BLB_DATA(node, i)->num_klpairs(); @@ -6783,12 +4497,13 @@ for (i=0; in_children; i++) { fprintf(file, "%*schild %d\n", depth, "", i); if (i>0) { - char *CAST_FROM_VOIDP(key, node->childkeys[i-1].data); - fprintf(file, "%*spivot %d len=%u %u\n", depth+1, "", i-1, node->childkeys[i-1].size, (unsigned)toku_dtoh32(*(int*)key)); + char *CAST_FROM_VOIDP(key, node->pivotkeys.get_pivot(i - 1).data); + fprintf(file, "%*spivot %d len=%u %u\n", depth+1, "", i-1, node->pivotkeys.get_pivot(i - 1).size, (unsigned)toku_dtoh32(*(int*)key)); } + DBT x, y; toku_dump_ftnode(file, ft_handle, BP_BLOCKNUM(node, i), depth+4, - (i==0) ? lorange : &node->childkeys[i-1], - (i==node->n_children-1) ? hirange : &node->childkeys[i]); + (i==0) ? lorange : node->pivotkeys.fill_pivot(i - 1, &x), + (i==node->n_children-1) ? hirange : node->pivotkeys.fill_pivot(i, &y)); } } } @@ -6796,17 +4511,15 @@ return result; } -int toku_dump_ft (FILE *f, FT_HANDLE ft_handle) { - int r; - assert(ft_handle->ft); - toku_dump_translation_table(f, ft_handle->ft->blocktable); - { - uint32_t fullhash = 0; - CACHEKEY root_key; - toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash); - r = toku_dump_ftnode(f, ft_handle, root_key, 0, 0, 0); - } - return r; +int toku_dump_ft(FILE *f, FT_HANDLE ft_handle) { + FT ft = ft_handle->ft; + invariant_notnull(ft); + ft->blocktable.dump_translation_table(f); + + uint32_t fullhash = 0; + CACHEKEY root_key; + toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash); + return toku_dump_ftnode(f, ft_handle, root_key, 0, 0, 0); } int toku_ft_layer_init(void) { @@ -6898,18 +4611,15 @@ toku_cachefile_unlink_on_close(cf); } -int -toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) { - int r; - +int toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) { int fd = toku_cachefile_get_fd(ft_handle->ft->cf); toku_ft_lock(ft_handle->ft); int64_t file_size; - r = toku_os_get_file_size(fd, &file_size); - if (r==0) { + int r = toku_os_get_file_size(fd, &file_size); + if (r == 0) { report->file_size_bytes = file_size; - toku_block_table_get_fragmentation_unlocked(ft_handle->ft->blocktable, report); + ft_handle->ft->blocktable.get_fragmentation_unlocked(report); } toku_ft_unlock(ft_handle->ft); return r; @@ -6925,8 +4635,8 @@ { BLOCKNUM childblocknum = BP_BLOCKNUM(node,childnum); uint32_t fullhash = compute_child_fullhash(ft_handle->ft->cf, node, childnum); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft_handle->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); // don't need to pass in dependent nodes as we are not // modifying nodes we are pinning toku_pin_ftnode( @@ -6964,8 +4674,8 @@ { CACHEKEY root_key; toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft_handle->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); toku_pin_ftnode( ft_handle->ft, root_key, @@ -7001,6 +4711,26 @@ } } +int toku_keycompare(const void *key1, uint32_t key1len, const void *key2, uint32_t key2len) { + int comparelen = key1len < key2len ? key1len : key2len; + int c = memcmp(key1, key2, comparelen); + if (__builtin_expect(c != 0, 1)) { + return c; + } else { + if (key1len < key2len) { + return -1; + } else if (key1len > key2len) { + return 1; + } else { + return 0; + } + } +} + +int toku_builtin_compare_fun(DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) { + return toku_keycompare(a->data, a->size, b->data, b->size); +} + #include void __attribute__((__constructor__)) toku_ft_helgrind_ignore(void); void diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-ops.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-ops.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-ops.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-ops.h 2014-10-08 13:19:51.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_OPS_H -#define FT_OPS_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,33 +86,22 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." // This must be first to make the 64-bit file mode work right in Linux #define _FILE_OFFSET_BITS 64 -#include "fttypes.h" -#include "ybt.h" + #include -#include "cachetable.h" -#include "log.h" -#include "ft-search.h" -#include "compress.h" - -// A callback function is invoked with the key, and the data. -// The pointers (to the bytevecs) must not be modified. The data must be copied out before the callback function returns. -// Note: In the thread-safe version, the ftnode remains locked while the callback function runs. So return soon, and don't call the ft code from the callback function. -// If the callback function returns a nonzero value (an error code), then that error code is returned from the get function itself. -// The cursor object will have been updated (so that if result==0 the current value is the value being passed) -// (If r!=0 then the cursor won't have been updated.) -// If r!=0, it's up to the callback function to return that value of r. -// A 'key' bytevec of NULL means that element is not found (effectively infinity or -// -infinity depending on direction) -// When lock_only is false, the callback does optional lock tree locking and then processes the key and val. -// When lock_only is true, the callback only does optional lock tree locking. -typedef int(*FT_GET_CALLBACK_FUNCTION)(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only); -typedef bool(*FT_CHECK_INTERRUPT_CALLBACK)(void* extra); +#include "ft/cachetable/cachetable.h" +#include "ft/comparator.h" +#include "ft/msg.h" +#include "util/dbt.h" + +typedef struct ft_handle *FT_HANDLE; int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result)); @@ -125,7 +112,7 @@ // ANY operations. to update the cmp descriptor after any operations have already happened, all handles // and transactions must close and reopen before the change, then you can update the cmp descriptor void toku_ft_change_descriptor(FT_HANDLE t, const DBT* old_descriptor, const DBT* new_descriptor, bool do_log, TOKUTXN txn, bool update_cmp_descriptor); -uint32_t toku_serialize_descriptor_size(const DESCRIPTOR desc); +uint32_t toku_serialize_descriptor_size(DESCRIPTOR desc); void toku_ft_handle_create(FT_HANDLE *ft); void toku_ft_set_flags(FT_HANDLE, unsigned int flags); @@ -139,11 +126,13 @@ void toku_ft_handle_get_compression_method(FT_HANDLE, enum toku_compression_method *); void toku_ft_handle_set_fanout(FT_HANDLE, unsigned int fanout); void toku_ft_handle_get_fanout(FT_HANDLE, unsigned int *fanout); +int toku_ft_handle_set_memcmp_magic(FT_HANDLE, uint8_t magic); -void toku_ft_set_bt_compare(FT_HANDLE, ft_compare_func); -ft_compare_func toku_ft_get_bt_compare (FT_HANDLE ft_h); +void toku_ft_set_bt_compare(FT_HANDLE ft_handle, ft_compare_func cmp_func); +const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle); -void toku_ft_set_redirect_callback(FT_HANDLE ft_h, on_redirect_callback redir_cb, void* extra); +typedef void (*on_redirect_callback)(FT_HANDLE ft_handle, void *extra); +void toku_ft_set_redirect_callback(FT_HANDLE ft_handle, on_redirect_callback cb, void *extra); // How updates (update/insert/deletes) work: // There are two flavers of upsertdels: Singleton and broadcast. @@ -181,6 +170,9 @@ // Implementation note: Acquires a write lock on the entire database. // This function works by sending an BROADCAST-UPDATE message containing // the key and the extra. +typedef int (*ft_update_func)(DB *db, const DBT *key, const DBT *old_val, const DBT *extra, + void (*set_val)(const DBT *new_val, void *set_extra), + void *set_extra); void toku_ft_set_update(FT_HANDLE ft_h, ft_update_func update_fun); int toku_ft_handle_open(FT_HANDLE, const char *fname_in_env, @@ -197,9 +189,17 @@ // close an ft handle during recovery. the underlying ft must close, and will use the given lsn. void toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn); +// At the ydb layer, a DICTIONARY_ID uniquely identifies an open dictionary. +// With the introduction of the loader (ticket 2216), it is possible for the file that holds +// an open dictionary to change, so these are now separate and independent unique identifiers (see FILENUM) +struct DICTIONARY_ID { + uint64_t dictid; +}; +static const DICTIONARY_ID DICTIONARY_ID_NONE = { .dictid = 0 }; + int toku_ft_handle_open_with_dict_id( - FT_HANDLE t, + FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, @@ -208,8 +208,6 @@ DICTIONARY_ID use_dictionary_id ) __attribute__ ((warn_unused_result)); -int toku_ft_lookup (FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); - // Effect: Insert a key and data pair into an ft void toku_ft_insert (FT_HANDLE ft_h, DBT *k, DBT *v, TOKUTXN txn); @@ -247,8 +245,9 @@ void toku_ft_maybe_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging); TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h); -TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h); +struct txn_manager *toku_ft_get_txn_manager(FT_HANDLE ft_h); +struct txn_gc_info; void toku_ft_send_insert(FT_HANDLE ft_h, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info); void toku_ft_send_delete(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info); void toku_ft_send_commit_any(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info); @@ -261,37 +260,6 @@ int toku_verify_ft (FT_HANDLE ft_h) __attribute__ ((warn_unused_result)); int toku_verify_ft_with_progress (FT_HANDLE ft_h, int (*progress_callback)(void *extra, float progress), void *extra, int verbose, int keep_going) __attribute__ ((warn_unused_result)); -typedef struct ft_cursor *FT_CURSOR; -int toku_ft_cursor (FT_HANDLE, FT_CURSOR*, TOKUTXN, bool, bool) __attribute__ ((warn_unused_result)); -void toku_ft_cursor_set_leaf_mode(FT_CURSOR); -// Sets a boolean on the ft cursor that prevents uncessary copying of -// the cursor duing a one query. -void toku_ft_cursor_set_temporary(FT_CURSOR); -void toku_ft_cursor_remove_restriction(FT_CURSOR); -void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR ftcursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra); -int toku_ft_cursor_is_leaf_mode(FT_CURSOR); -void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int); - -// get is deprecated in favor of the individual functions below -int toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags) __attribute__ ((warn_unused_result)); - -int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_get_both_range(FT_CURSOR cursor, DBT *key, DBT *val, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_get_both_range_reverse(FT_CURSOR cursor, DBT *key, DBT *val, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); - -int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN) __attribute__ ((warn_unused_result)); -void toku_ft_cursor_close (FT_CURSOR curs); -bool toku_ft_cursor_uninitialized(FT_CURSOR c) __attribute__ ((warn_unused_result)); - -void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval); - DICTIONARY_ID toku_ft_get_dictionary_id(FT_HANDLE); enum ft_flags { @@ -353,7 +321,7 @@ int toku_ft_strerror_r(int error, char *buf, size_t buflen); // Effect: LIke the XSI-compliant strerorr_r, extended to db_strerror(). // If error>=0 then the result is to do strerror_r(error, buf, buflen), that is fill buf with a descriptive error message. -// If error<0 then return a TokuDB-specific error code. For unknown cases, we return -1 and set errno=EINVAL, even for cases that *should* be known. (Not all DB errors are known by this function which is a bug.) +// If error<0 then return a TokuFT-specific error code. For unknown cases, we return -1 and set errno=EINVAL, even for cases that *should* be known. (Not all DB errors are known by this function which is a bug.) extern bool garbage_collection_debug; @@ -362,5 +330,3 @@ void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers); void toku_note_deserialized_basement_node(bool fixed_key_size); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-search.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-search.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-search.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-search.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,158 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#ifndef FT_SEARCH_H -#define FT_SEARCH_H - - -enum ft_search_direction_e { - FT_SEARCH_LEFT = 1, /* search left -> right, finds min xy as defined by the compare function */ - FT_SEARCH_RIGHT = 2, /* search right -> left, finds max xy as defined by the compare function */ -}; - -struct ft_search; - -/* the search compare function should return 0 for all xy < kv and 1 for all xy >= kv - the compare function should be a step function from 0 to 1 for a left to right search - and 1 to 0 for a right to left search */ - -typedef int (*ft_search_compare_func_t)(const struct ft_search &, const DBT *); - -/* the search object contains the compare function, search direction, and the kv pair that - is used in the compare function. the context is the user's private data */ - -typedef struct ft_search { - ft_search_compare_func_t compare; - enum ft_search_direction_e direction; - const DBT *k; - void *context; - - // To fix #3522, we need to remember the pivots that we have searched unsuccessfully. - // For example, when searching right (left), we call search->compare() on the ith pivot key. If search->compare(0 returns - // nonzero, then we search the ith subtree. If that subsearch returns DB_NOTFOUND then maybe the key isn't present in the - // tree. But maybe we are doing a DB_NEXT (DB_PREV), and everything was deleted. So we remember the pivot, and later we - // will only search subtrees which contain keys that are bigger than (less than) the pivot. - // The code is a kludge (even before this fix), and interacts strangely with the TOKUDB_FOUND_BUT_REJECTED (which is there - // because a failed DB_GET we would keep searching the rest of the tree). We probably should write the various lookup - // codes (NEXT, PREV, CURRENT, etc) more directly, and we should probably use a binary search within a node to search the - // pivots so that we can support a larger fanout. - // These changes (3312+3522) also (probably) introduce an isolation error (#3529). - // We must make sure we lock the right range for proper isolation level. - // There's probably a bug in which the following could happen. - // Thread A: Searches through deleted keys A,B,D,E and finds nothing, so searches the next leaf, releasing the YDB lock. - // Thread B: Inserts key C, and acquires the write lock, then commits. - // Thread A: Resumes, searching F,G,H and return success. Thread A then read-locks the range A-H, and doesn't notice - // the value C inserted by thread B. Thus a failure of serialization. - // See #3529. - // There also remains a potential thrashing problem. When we get a TOKUDB_TRY_AGAIN, we unpin everything. There's - // no guarantee that we will get everything pinned again. We ought to keep nodes pinned when we retry, except that on the - // way out with a DB_NOTFOUND we ought to unpin those nodes. See #3528. - DBT pivot_bound; - const DBT *k_bound; -} ft_search_t; - -/* initialize the search compare object */ -static inline ft_search_t *ft_search_init(ft_search_t *so, ft_search_compare_func_t compare, enum ft_search_direction_e direction, - const DBT *k, const DBT *k_bound, void *context) { - so->compare = compare; - so->direction = direction; - so->k = k; - so->context = context; - toku_init_dbt(&so->pivot_bound); - so->k_bound = k_bound; - return so; -} - -static inline void ft_search_finish(ft_search_t *so) { - toku_destroy_dbt(&so->pivot_bound); -} - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-serialize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-serialize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-serialize.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-serialize.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,861 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "compress.h" -#include "ft.h" -#include "ft-internal.h" - -// not version-sensitive because we only serialize a descriptor using the current layout_version -uint32_t -toku_serialize_descriptor_size(const DESCRIPTOR desc) { - //Checksum NOT included in this. Checksum only exists in header's version. - uint32_t size = 4; // four bytes for size of descriptor - size += desc->dbt.size; - return size; -} - -static uint32_t -deserialize_descriptor_size(const DESCRIPTOR desc, int layout_version) { - //Checksum NOT included in this. Checksum only exists in header's version. - uint32_t size = 4; // four bytes for size of descriptor - if (layout_version == FT_LAYOUT_VERSION_13) - size += 4; // for version 13, include four bytes of "version" - size += desc->dbt.size; - return size; -} - -void -toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc) { - wbuf_bytes(wb, desc->dbt.data, desc->dbt.size); -} - -//Descriptor is written to disk during toku_ft_handle_open iff we have a new (or changed) -//descriptor. -//Descriptors are NOT written during the header checkpoint process. -void -toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset) { - // make the checksum - int64_t size = toku_serialize_descriptor_size(desc)+4; //4 for checksum - int64_t size_aligned = roundup_to_multiple(512, size); - struct wbuf w; - char *XMALLOC_N_ALIGNED(512, size_aligned, aligned_buf); - for (int64_t i=size; i 0) { - data_copy = toku_memdup(data, size); //Cannot keep the reference from rbuf. Must copy. - lazy_assert(data_copy); - } else { - lazy_assert(size==0); - data_copy = NULL; - } - toku_fill_dbt(&desc->dbt, data_copy, size); -} - -static int -deserialize_descriptor_from(int fd, BLOCK_TABLE bt, DESCRIPTOR desc, int layout_version) { - int r = 0; - DISKOFF offset; - DISKOFF size; - unsigned char *dbuf = NULL; - toku_get_descriptor_offset_size(bt, &offset, &size); - memset(desc, 0, sizeof(*desc)); - if (size > 0) { - lazy_assert(size>=4); //4 for checksum - { - ssize_t size_to_malloc = roundup_to_multiple(512, size); - XMALLOC_N_ALIGNED(512, size_to_malloc, dbuf); - { - - ssize_t sz_read = toku_os_pread(fd, dbuf, size_to_malloc, offset); - lazy_assert(sz_read==size_to_malloc); - } - { - // check the checksum - uint32_t x1764 = toku_x1764_memory(dbuf, size-4); - //printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, block_translation_address_on_disk, offset, block_translation_size_on_disk); - uint32_t stored_x1764 = toku_dtoh32(*(int*)(dbuf + size-4)); - if (x1764 != stored_x1764) { - fprintf(stderr, "Descriptor checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764); - r = TOKUDB_BAD_CHECKSUM; - toku_free(dbuf); - goto exit; - } - } - { - struct rbuf rb = {.buf = dbuf, .size = (unsigned int) size, .ndone = 0}; - //Not temporary; must have a toku_memdup'd copy. - deserialize_descriptor_from_rbuf(&rb, desc, layout_version); - } - lazy_assert(deserialize_descriptor_size(desc, layout_version)+4 == size); - toku_free(dbuf); - } - } -exit: - return r; -} - -int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) -// Effect: Deserialize the ft header. -// We deserialize ft_header only once and then share everything with all the FTs. -{ - int r; - FT ft = NULL; - paranoid_invariant(version >= FT_LAYOUT_MIN_SUPPORTED_VERSION); - paranoid_invariant(version <= FT_LAYOUT_VERSION); - // We already know: - // we have an rbuf representing the header. - // The checksum has been validated - - //Verification of initial elements. - //Check magic number - bytevec magic; - rbuf_literal_bytes(rb, &magic, 8); - lazy_assert(memcmp(magic,"tokudata",8)==0); - - XCALLOC(ft); - ft->checkpoint_header = NULL; - toku_list_init(&ft->live_ft_handles); - - //version MUST be in network order on disk regardless of disk order - ft->layout_version_read_from_disk = rbuf_network_int(rb); - invariant(ft->layout_version_read_from_disk >= FT_LAYOUT_MIN_SUPPORTED_VERSION); - invariant(ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION); - - //build_id MUST be in network order on disk regardless of disk order - uint32_t build_id; - build_id = rbuf_network_int(rb); - - //Size MUST be in network order regardless of disk order. - uint32_t size; - size = rbuf_network_int(rb); - lazy_assert(size == rb->size); - - bytevec tmp_byte_order_check; - lazy_assert((sizeof tmp_byte_order_check) >= 8); - rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order - int64_t byte_order_stored; - byte_order_stored = *(int64_t*)tmp_byte_order_check; - lazy_assert(byte_order_stored == toku_byte_order_host); - - uint64_t checkpoint_count; - checkpoint_count = rbuf_ulonglong(rb); - LSN checkpoint_lsn; - checkpoint_lsn = rbuf_lsn(rb); - unsigned nodesize; - nodesize = rbuf_int(rb); - DISKOFF translation_address_on_disk; - translation_address_on_disk = rbuf_diskoff(rb); - DISKOFF translation_size_on_disk; - translation_size_on_disk = rbuf_diskoff(rb); - lazy_assert(translation_address_on_disk > 0); - lazy_assert(translation_size_on_disk > 0); - - // initialize the tree lock - toku_ft_init_reflock(ft); - - //Load translation table - { - size_t size_to_read = roundup_to_multiple(512, translation_size_on_disk); - unsigned char *XMALLOC_N_ALIGNED(512, size_to_read, tbuf); - { - // This cast is messed up in 32-bits if the block translation - // table is ever more than 4GB. But in that case, the - // translation table itself won't fit in main memory. - ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read, - translation_address_on_disk); - assert(readsz >= translation_size_on_disk); - assert(readsz <= (ssize_t)size_to_read); - } - // Create table and read in data. - r = toku_blocktable_create_from_buffer(fd, - &ft->blocktable, - translation_address_on_disk, - translation_size_on_disk, - tbuf); - toku_free(tbuf); - if (r != 0) { - goto exit; - } - } - - BLOCKNUM root_blocknum; - root_blocknum = rbuf_blocknum(rb); - unsigned flags; - flags = rbuf_int(rb); - if (ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_13) { - // deprecate 'TOKU_DB_VALCMP_BUILTIN'. just remove the flag - flags &= ~TOKU_DB_VALCMP_BUILTIN_13; - } - int layout_version_original; - layout_version_original = rbuf_int(rb); - uint32_t build_id_original; - build_id_original = rbuf_int(rb); - uint64_t time_of_creation; - time_of_creation = rbuf_ulonglong(rb); - uint64_t time_of_last_modification; - time_of_last_modification = rbuf_ulonglong(rb); - - if (ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_18) { - // 17 was the last version with these fields, we no longer store - // them, so read and discard them - (void) rbuf_ulonglong(rb); // num_blocks_to_upgrade_13 - if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_15) { - (void) rbuf_ulonglong(rb); // num_blocks_to_upgrade_14 - } - } - - // fake creation during the last checkpoint - TXNID root_xid_that_created; - root_xid_that_created = checkpoint_lsn.lsn; - if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_14) { - rbuf_TXNID(rb, &root_xid_that_created); - } - - // TODO(leif): get this to default to what's specified, not the - // hard-coded default - unsigned basementnodesize; - basementnodesize = FT_DEFAULT_BASEMENT_NODE_SIZE; - uint64_t time_of_last_verification; - time_of_last_verification = 0; - if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_15) { - basementnodesize = rbuf_int(rb); - time_of_last_verification = rbuf_ulonglong(rb); - } - - STAT64INFO_S on_disk_stats; - on_disk_stats = ZEROSTATS; - uint64_t time_of_last_optimize_begin; - time_of_last_optimize_begin = 0; - uint64_t time_of_last_optimize_end; - time_of_last_optimize_end = 0; - uint32_t count_of_optimize_in_progress; - count_of_optimize_in_progress = 0; - MSN msn_at_start_of_last_completed_optimize; - msn_at_start_of_last_completed_optimize = ZERO_MSN; - if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_18) { - on_disk_stats.numrows = rbuf_ulonglong(rb); - on_disk_stats.numbytes = rbuf_ulonglong(rb); - ft->in_memory_stats = on_disk_stats; - time_of_last_optimize_begin = rbuf_ulonglong(rb); - time_of_last_optimize_end = rbuf_ulonglong(rb); - count_of_optimize_in_progress = rbuf_int(rb); - msn_at_start_of_last_completed_optimize = rbuf_msn(rb); - } - - enum toku_compression_method compression_method; - MSN highest_unused_msn_for_upgrade; - highest_unused_msn_for_upgrade.msn = (MIN_MSN.msn - 1); - if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_19) { - unsigned char method = rbuf_char(rb); - compression_method = (enum toku_compression_method) method; - highest_unused_msn_for_upgrade = rbuf_msn(rb); - } else { - // we hard coded zlib until 5.2, then quicklz in 5.2 - if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_18) { - compression_method = TOKU_ZLIB_METHOD; - } else { - compression_method = TOKU_QUICKLZ_METHOD; - } - } - - MSN max_msn_in_ft; - max_msn_in_ft = ZERO_MSN; // We'll upgrade it from the root node later if necessary - if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_21) { - max_msn_in_ft = rbuf_msn(rb); - } - - (void) rbuf_int(rb); //Read in checksum and ignore (already verified). - if (rb->ndone != rb->size) { - fprintf(stderr, "Header size did not match contents.\n"); - r = EINVAL; - goto exit; - } - - { - struct ft_header h = { - .type = FT_CURRENT, - .dirty = 0, - .checkpoint_count = checkpoint_count, - .checkpoint_lsn = checkpoint_lsn, - .layout_version = FT_LAYOUT_VERSION, - .layout_version_original = layout_version_original, - .build_id = build_id, - .build_id_original = build_id_original, - .time_of_creation = time_of_creation, - .root_xid_that_created = root_xid_that_created, - .time_of_last_modification = time_of_last_modification, - .time_of_last_verification = time_of_last_verification, - .root_blocknum = root_blocknum, - .flags = flags, - .nodesize = nodesize, - .basementnodesize = basementnodesize, - .compression_method = compression_method, - .fanout = FT_DEFAULT_FANOUT, // fanout is not serialized, must be set at startup - .highest_unused_msn_for_upgrade = highest_unused_msn_for_upgrade, - .max_msn_in_ft = max_msn_in_ft, - .time_of_last_optimize_begin = time_of_last_optimize_begin, - .time_of_last_optimize_end = time_of_last_optimize_end, - .count_of_optimize_in_progress = count_of_optimize_in_progress, - .count_of_optimize_in_progress_read_from_disk = count_of_optimize_in_progress, - .msn_at_start_of_last_completed_optimize = msn_at_start_of_last_completed_optimize, - .on_disk_stats = on_disk_stats - }; - XMEMDUP(ft->h, &h); - } - - if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_18) { - // This needs ft->h to be non-null, so we have to do it after we - // read everything else. - r = toku_upgrade_subtree_estimates_to_stat64info(fd, ft); - if (r != 0) { - goto exit; - } - } - if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_21) { - r = toku_upgrade_msn_from_root_to_header(fd, ft); - if (r != 0) { - goto exit; - } - } - - invariant((uint32_t) ft->layout_version_read_from_disk == version); - r = deserialize_descriptor_from(fd, ft->blocktable, &ft->descriptor, version); - if (r != 0) { - goto exit; - } - // copy descriptor to cmp_descriptor for #4541 - ft->cmp_descriptor.dbt.size = ft->descriptor.dbt.size; - ft->cmp_descriptor.dbt.data = toku_xmemdup(ft->descriptor.dbt.data, ft->descriptor.dbt.size); - // Version 13 descriptors had an extra 4 bytes that we don't read - // anymore. Since the header is going to think it's the current - // version if it gets written out, we need to write the descriptor in - // the new format (without those bytes) before that happens. - if (version <= FT_LAYOUT_VERSION_13) { - toku_ft_update_descriptor_with_fd(ft, &ft->cmp_descriptor, fd); - } - r = 0; -exit: - if (r != 0 && ft != NULL) { - toku_free(ft); - ft = NULL; - } - *ftp = ft; - return r; -} - -static size_t -serialize_ft_min_size (uint32_t version) { - size_t size = 0; - - switch(version) { - case FT_LAYOUT_VERSION_27: - case FT_LAYOUT_VERSION_26: - case FT_LAYOUT_VERSION_25: - case FT_LAYOUT_VERSION_24: - case FT_LAYOUT_VERSION_23: - case FT_LAYOUT_VERSION_22: - case FT_LAYOUT_VERSION_21: - size += sizeof(MSN); // max_msn_in_ft - case FT_LAYOUT_VERSION_20: - case FT_LAYOUT_VERSION_19: - size += 1; // compression method - size += sizeof(MSN); // highest_unused_msn_for_upgrade - case FT_LAYOUT_VERSION_18: - size += sizeof(uint64_t); // time_of_last_optimize_begin - size += sizeof(uint64_t); // time_of_last_optimize_end - size += sizeof(uint32_t); // count_of_optimize_in_progress - size += sizeof(MSN); // msn_at_start_of_last_completed_optimize - size -= 8; // removed num_blocks_to_upgrade_14 - size -= 8; // removed num_blocks_to_upgrade_13 - case FT_LAYOUT_VERSION_17: - size += 16; - invariant(sizeof(STAT64INFO_S) == 16); - case FT_LAYOUT_VERSION_16: - case FT_LAYOUT_VERSION_15: - size += 4; // basement node size - size += 8; // num_blocks_to_upgrade_14 (previously num_blocks_to_upgrade, now one int each for upgrade from 13, 14 - size += 8; // time of last verification - case FT_LAYOUT_VERSION_14: - size += 8; //TXNID that created - case FT_LAYOUT_VERSION_13: - size += ( 4 // build_id - +4 // build_id_original - +8 // time_of_creation - +8 // time_of_last_modification - ); - // fall through - case FT_LAYOUT_VERSION_12: - size += (+8 // "tokudata" - +4 // version - +4 // original_version - +4 // size - +8 // byte order verification - +8 // checkpoint_count - +8 // checkpoint_lsn - +4 // tree's nodesize - +8 // translation_size_on_disk - +8 // translation_address_on_disk - +4 // checksum - +8 // Number of blocks in old version. - +8 // diskoff - +4 // flags - ); - break; - default: - abort(); - } - - lazy_assert(size <= BLOCK_ALLOCATOR_HEADER_RESERVE); - return size; -} - -int deserialize_ft_from_fd_into_rbuf(int fd, - toku_off_t offset_of_header, - struct rbuf *rb, - uint64_t *checkpoint_count, - LSN *checkpoint_lsn, - uint32_t * version_p) -// Effect: Read and parse the header of a fractalal tree -// -// Simply reading the raw bytes of the header into an rbuf is insensitive -// to disk format version. If that ever changes, then modify this. -// -// TOKUDB_DICTIONARY_NO_HEADER means we can overwrite everything in the -// file AND the header is useless -{ - int r = 0; - const int64_t prefix_size = 8 + // magic ("tokudata") - 4 + // version - 4 + // build_id - 4; // size - const int64_t read_size = roundup_to_multiple(512, prefix_size); - unsigned char *XMALLOC_N_ALIGNED(512, read_size, prefix); - rb->buf = NULL; - int64_t n = toku_os_pread(fd, prefix, read_size, offset_of_header); - if (n != read_size) { - if (n==0) { - r = TOKUDB_DICTIONARY_NO_HEADER; - } else if (n<0) { - r = get_error_errno(); - } else { - r = EINVAL; - } - toku_free(prefix); - goto exit; - } - - rbuf_init(rb, prefix, prefix_size); - - //Check magic number - bytevec magic; - rbuf_literal_bytes(rb, &magic, 8); - if (memcmp(magic,"tokudata",8)!=0) { - if ((*(uint64_t*)magic) == 0) { - r = TOKUDB_DICTIONARY_NO_HEADER; - } else { - r = EINVAL; //Not a tokudb file! Do not use. - } - goto exit; - } - - //Version MUST be in network order regardless of disk order. - uint32_t version; - version = rbuf_network_int(rb); - *version_p = version; - if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) { - r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use - goto exit; - } else if (version > FT_LAYOUT_VERSION) { - r = TOKUDB_DICTIONARY_TOO_NEW; //Cannot use - goto exit; - } - - //build_id MUST be in network order regardless of disk order. - uint32_t build_id __attribute__((__unused__)); - build_id = rbuf_network_int(rb); - int64_t min_header_size; - min_header_size = serialize_ft_min_size(version); - - //Size MUST be in network order regardless of disk order. - uint32_t size; - size = rbuf_network_int(rb); - //If too big, it is corrupt. We would probably notice during checksum - //but may have to do a multi-gigabyte malloc+read to find out. - //If its too small reading rbuf would crash, so verify. - if (size > BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) { - r = TOKUDB_DICTIONARY_NO_HEADER; - goto exit; - } - - lazy_assert(rb->ndone==prefix_size); - rb->size = size; - { - toku_free(rb->buf); - uint32_t size_to_read = roundup_to_multiple(512, size); - XMALLOC_N_ALIGNED(512, size_to_read, rb->buf); - - assert(offset_of_header%512==0); - n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header); - if (n != size_to_read) { - if (n < 0) { - r = get_error_errno(); - } else { - r = EINVAL; //Header might be useless (wrong size) or could be a disk read error. - } - goto exit; - } - } - //It's version 14 or later. Magic looks OK. - //We have an rbuf that represents the header. - //Size is within acceptable bounds. - - //Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed) - uint32_t calculated_x1764; - calculated_x1764 = toku_x1764_memory(rb->buf, rb->size-4); - uint32_t stored_x1764; - stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4)); - if (calculated_x1764 != stored_x1764) { - r = TOKUDB_BAD_CHECKSUM; //Header useless - fprintf(stderr, "Header checksum failure: calc=0x%08x read=0x%08x\n", calculated_x1764, stored_x1764); - goto exit; - } - - //Verify byte order - bytevec tmp_byte_order_check; - lazy_assert((sizeof toku_byte_order_host) == 8); - rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order - int64_t byte_order_stored; - byte_order_stored = *(int64_t*)tmp_byte_order_check; - if (byte_order_stored != toku_byte_order_host) { - r = TOKUDB_DICTIONARY_NO_HEADER; //Cannot use dictionary - goto exit; - } - - //Load checkpoint count - *checkpoint_count = rbuf_ulonglong(rb); - *checkpoint_lsn = rbuf_lsn(rb); - //Restart at beginning during regular deserialization - rb->ndone = 0; - -exit: - if (r != 0 && rb->buf != NULL) { - toku_free(rb->buf); - rb->buf = NULL; - } - return r; -} - -// Read ft from file into struct. Read both headers and use one. -// We want the latest acceptable header whose checkpoint_lsn is no later -// than max_acceptable_lsn. -int -toku_deserialize_ft_from(int fd, - LSN max_acceptable_lsn, - FT *ft) -{ - struct rbuf rb_0; - struct rbuf rb_1; - uint64_t checkpoint_count_0; - uint64_t checkpoint_count_1; - LSN checkpoint_lsn_0; - LSN checkpoint_lsn_1; - uint32_t version_0, version_1, version = 0; - bool h0_acceptable = false; - bool h1_acceptable = false; - struct rbuf *rb = NULL; - int r0, r1, r; - - toku_off_t header_0_off = 0; - r0 = deserialize_ft_from_fd_into_rbuf(fd, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, &version_0); - if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) { - h0_acceptable = true; - } - - toku_off_t header_1_off = BLOCK_ALLOCATOR_HEADER_RESERVE; - r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1); - if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) { - h1_acceptable = true; - } - - // if either header is too new, the dictionary is unreadable - if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW || - !(h0_acceptable || h1_acceptable)) { - // We were unable to read either header or at least one is too - // new. Certain errors are higher priority than others. Order of - // these if/else if is important. - if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) { - r = TOKUDB_DICTIONARY_TOO_NEW; - } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || r1 == TOKUDB_DICTIONARY_TOO_OLD) { - r = TOKUDB_DICTIONARY_TOO_OLD; - } else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) { - fprintf(stderr, "Both header checksums failed.\n"); - r = TOKUDB_BAD_CHECKSUM; - } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || r1 == TOKUDB_DICTIONARY_NO_HEADER) { - r = TOKUDB_DICTIONARY_NO_HEADER; - } else { - r = r0 ? r0 : r1; //Arbitrarily report the error from the - //first header, unless it's readable - } - - // it should not be possible for both headers to be later than the max_acceptable_lsn - invariant(!((r0==0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) && - (r1==0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn))); - invariant(r!=0); - goto exit; - } - - if (h0_acceptable && h1_acceptable) { - if (checkpoint_count_0 > checkpoint_count_1) { - invariant(checkpoint_count_0 == checkpoint_count_1 + 1); - invariant(version_0 >= version_1); - rb = &rb_0; - version = version_0; - } - else { - invariant(checkpoint_count_1 == checkpoint_count_0 + 1); - invariant(version_1 >= version_0); - rb = &rb_1; - version = version_1; - } - } else if (h0_acceptable) { - if (r1 == TOKUDB_BAD_CHECKSUM) { - // print something reassuring - fprintf(stderr, "Header 2 checksum failed, but header 1 ok. Proceeding.\n"); - } - rb = &rb_0; - version = version_0; - } else if (h1_acceptable) { - if (r0 == TOKUDB_BAD_CHECKSUM) { - // print something reassuring - fprintf(stderr, "Header 1 checksum failed, but header 2 ok. Proceeding.\n"); - } - rb = &rb_1; - version = version_1; - } - - paranoid_invariant(rb); - r = deserialize_ft_versioned(fd, rb, ft, version); - -exit: - if (rb_0.buf) { - toku_free(rb_0.buf); - } - if (rb_1.buf) { - toku_free(rb_1.buf); - } - return r; -} - - -size_t toku_serialize_ft_size (FT_HEADER h) { - size_t size = serialize_ft_min_size(h->layout_version); - //There is no dynamic data. - lazy_assert(size <= BLOCK_ALLOCATOR_HEADER_RESERVE); - return size; -} - - -void toku_serialize_ft_to_wbuf ( - struct wbuf *wbuf, - FT_HEADER h, - DISKOFF translation_location_on_disk, - DISKOFF translation_size_on_disk - ) -{ - wbuf_literal_bytes(wbuf, "tokudata", 8); - wbuf_network_int (wbuf, h->layout_version); //MUST be in network order regardless of disk order - wbuf_network_int (wbuf, BUILD_ID); //MUST be in network order regardless of disk order - wbuf_network_int (wbuf, wbuf->size); //MUST be in network order regardless of disk order - wbuf_literal_bytes(wbuf, &toku_byte_order_host, 8); //Must not translate byte order - wbuf_ulonglong(wbuf, h->checkpoint_count); - wbuf_LSN (wbuf, h->checkpoint_lsn); - wbuf_int (wbuf, h->nodesize); - - wbuf_DISKOFF(wbuf, translation_location_on_disk); - wbuf_DISKOFF(wbuf, translation_size_on_disk); - wbuf_BLOCKNUM(wbuf, h->root_blocknum); - wbuf_int(wbuf, h->flags); - wbuf_int(wbuf, h->layout_version_original); - wbuf_int(wbuf, h->build_id_original); - wbuf_ulonglong(wbuf, h->time_of_creation); - wbuf_ulonglong(wbuf, h->time_of_last_modification); - wbuf_TXNID(wbuf, h->root_xid_that_created); - wbuf_int(wbuf, h->basementnodesize); - wbuf_ulonglong(wbuf, h->time_of_last_verification); - wbuf_ulonglong(wbuf, h->on_disk_stats.numrows); - wbuf_ulonglong(wbuf, h->on_disk_stats.numbytes); - wbuf_ulonglong(wbuf, h->time_of_last_optimize_begin); - wbuf_ulonglong(wbuf, h->time_of_last_optimize_end); - wbuf_int(wbuf, h->count_of_optimize_in_progress); - wbuf_MSN(wbuf, h->msn_at_start_of_last_completed_optimize); - wbuf_char(wbuf, (unsigned char) h->compression_method); - wbuf_MSN(wbuf, h->highest_unused_msn_for_upgrade); - wbuf_MSN(wbuf, h->max_msn_in_ft); - uint32_t checksum = toku_x1764_finish(&wbuf->checksum); - wbuf_int(wbuf, checksum); - lazy_assert(wbuf->ndone == wbuf->size); -} - -void toku_serialize_ft_to (int fd, FT_HEADER h, BLOCK_TABLE blocktable, CACHEFILE cf) { - lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS); - struct wbuf w_translation; - int64_t size_translation; - int64_t address_translation; - - //Must serialize translation first, to get address,size for header. - toku_serialize_translation_to_wbuf(blocktable, fd, &w_translation, - &address_translation, - &size_translation); - assert(size_translation == w_translation.ndone); // the bytes written are the size - assert(w_translation.size % 512 == 0); // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized. - - struct wbuf w_main; - size_t size_main = toku_serialize_ft_size(h); - size_t size_main_aligned = roundup_to_multiple(512, size_main); - assert(size_main_alignedcheckpoint_count & 0x1) ? 0 : BLOCK_ALLOCATOR_HEADER_RESERVE; - toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset); - toku_free(w_main.buf); - toku_free(w_translation.buf); -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-test-helpers.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-test-helpers.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-test-helpers.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-test-helpers.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +89,15 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "ft-cachetable-wrappers.h" -#include "ft-flusher.h" -#include "ft-internal.h" -#include "ft.h" -#include "fttypes.h" -#include "ule.h" +#include + +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-internal.h" +#include "ft/ft-flusher.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/node.h" +#include "ft/ule.h" // dummymsn needed to simulate msn because messages are injected at a lower level than toku_ft_root_put_msg() #define MIN_DUMMYMSN ((MSN) {(uint64_t)1 << 62}) @@ -123,17 +126,21 @@ FTNODE node; assert(testsetup_initialized); toku_create_new_ftnode(ft_handle, &node, 0, n_children); - int i; - for (i=0; ichildkeys[i], keys[i], keylens[i]); - node->totalchildkeylens += keylens[i]; + DBT *XMALLOC_N(n_children - 1, pivotkeys); + for (int i = 0; i + 1 < n_children; i++) { + toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]); + } + node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1); + for (int i = 0; i + 1 < n_children; i++) { + toku_destroy_dbt(&pivotkeys[i]); } + toku_free(pivotkeys); - *blocknum = node->thisnodename; + *blocknum = node->blocknum; toku_unpin_ftnode(ft_handle->ft, node); return 0; } @@ -143,16 +150,21 @@ FTNODE node; assert(testsetup_initialized); toku_create_new_ftnode(ft_handle, &node, height, n_children); - int i; - for (i=0; ichildkeys[i], keys[i], keylens[i]); - node->totalchildkeylens += keylens[i]; + DBT *XMALLOC_N(n_children - 1, pivotkeys); + for (int i = 0; i + 1 < n_children; i++) { + toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]); } - *blocknum = node->thisnodename; + node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1); + for (int i = 0; i + 1 < n_children; i++) { + toku_destroy_dbt(&pivotkeys[i]); + } + toku_free(pivotkeys); + + *blocknum = node->blocknum; toku_unpin_ftnode(ft_handle->ft, node); return 0; } @@ -167,8 +179,8 @@ { assert(testsetup_initialized); void *node_v; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft_handle->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); int r = toku_cachetable_get_and_pin( ft_handle->ft->cf, diskoff, toku_cachetable_hash(ft_handle->ft->cf, diskoff), @@ -194,8 +206,8 @@ assert(testsetup_initialized); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft_handle->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); r = toku_cachetable_get_and_pin( ft_handle->ft->cf, blocknum, @@ -214,26 +226,22 @@ toku_verify_or_set_counts(node); assert(node->height==0); - DBT keydbt,valdbt; - MSN msn = next_dummymsn(); - FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), - .u = { .id = { toku_fill_dbt(&keydbt, key, keylen), - toku_fill_dbt(&valdbt, val, vallen) } } }; + DBT kdbt, vdbt; + ft_msg msg(toku_fill_dbt(&kdbt, key, keylen), toku_fill_dbt(&vdbt, val, vallen), + FT_INSERT, next_dummymsn(), toku_xids_get_root_xids()); static size_t zero_flow_deltas[] = { 0, 0 }; txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true); - toku_ft_node_put_msg ( - ft_handle->ft->compare_fun, - ft_handle->ft->update_fun, - &ft_handle->ft->cmp_descriptor, - node, - -1, - &msg, - true, - &gc_info, - zero_flow_deltas, - NULL - ); + toku_ftnode_put_msg(ft_handle->ft->cmp, + ft_handle->ft->update_fun, + node, + -1, + msg, + true, + &gc_info, + zero_flow_deltas, + NULL + ); toku_verify_or_set_counts(node); @@ -252,8 +260,8 @@ void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t) { - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); toku_pin_ftnode( t->ft, b, @@ -271,8 +279,8 @@ assert(testsetup_initialized); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft_handle->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); r = toku_cachetable_get_and_pin( ft_handle->ft->cf, blocknum, @@ -291,13 +299,14 @@ assert(node->height>0); DBT k; - int childnum = toku_ftnode_which_child(node, - toku_fill_dbt(&k, key, keylen), - &ft_handle->ft->cmp_descriptor, ft_handle->ft->compare_fun); + int childnum = toku_ftnode_which_child(node, toku_fill_dbt(&k, key, keylen), ft_handle->ft->cmp); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); MSN msn = next_dummymsn(); - toku_bnc_insert_msg(BNC(node, childnum), key, keylen, val, vallen, msgtype, msn, xids_0, true, NULL, testhelper_string_key_cmp); + toku::comparator cmp; + cmp.create(testhelper_string_key_cmp, nullptr); + toku_bnc_insert_msg(BNC(node, childnum), key, keylen, val, vallen, msgtype, msn, xids_0, true, cmp); + cmp.destroy(); // Hack to get the test working. The problem is that this test // is directly queueing something in a FIFO instead of // using ft APIs. diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/fttypes.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/fttypes.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/fttypes.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/fttypes.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,382 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FTTYPES_H -#define FTTYPES_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#ifndef _XOPEN_SOURCE -#define _XOPEN_SOURCE 500 -#endif -#define _FILE_OFFSET_BITS 64 - -#include "toku_assert.h" -#include -#include - - -// Use the C++ bool and constants (true false), rather than BOOL, TRUE, and FALSE. - -typedef struct ft_handle *FT_HANDLE; -typedef struct ftnode *FTNODE; -typedef struct ftnode_disk_data *FTNODE_DISK_DATA; -typedef struct ftnode_leaf_basement_node *BASEMENTNODE; -typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO; -typedef struct sub_block *SUB_BLOCK; -typedef struct ft *FT; -typedef struct ft_header *FT_HEADER; -typedef struct ft_options *FT_OPTIONS; - -struct wbuf; -struct dbuf; - -typedef unsigned int ITEMLEN; -typedef const void *bytevec; - -typedef int64_t DISKOFF; /* Offset in a disk. -1 is the NULL pointer. */ -typedef uint64_t TXNID; - -typedef struct txnid_pair_s { - TXNID parent_id64; - TXNID child_id64; -} TXNID_PAIR; - - -#define TXNID_NONE_LIVING ((TXNID)0) -#define TXNID_NONE ((TXNID)0) -#define TXNID_MAX ((TXNID)-1) - -static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }; - -typedef struct blocknum_s { int64_t b; } BLOCKNUM; // make a struct so that we will notice type problems. -typedef struct gid_s { uint8_t *gid; } GID; // the gid is of size [DB_GID_SIZE] -typedef TOKU_XA_XID *XIDP; // this is the type that's passed to the logger code (so that we don't have to copy all 152 bytes when only a subset are even valid.) -#define ROLLBACK_NONE ((BLOCKNUM){0}) - -static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; } - -// This struct hold information about values stored in the cachetable. -// As one can tell from the names, we are probably violating an -// abstraction layer by placing names. -// -// The purpose of having this struct is to have a way for the -// cachetable to accumulate the some totals we are interested in. -// Breaking this abstraction layer by having these names was the -// easiest way. -// -typedef struct pair_attr_s { - long size; // size PAIR's value takes in memory - long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status - long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status - long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status - long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts - bool is_valid; -} PAIR_ATTR; - -static inline PAIR_ATTR make_pair_attr(long size) { - PAIR_ATTR result={ - .size = size, - .nonleaf_size = 0, - .leaf_size = 0, - .rollback_size = 0, - .cache_pressure_size = 0, - .is_valid = true - }; - return result; -} - -typedef struct { - uint32_t len; - char *data; -} BYTESTRING; - -/* Log Sequence Number (LSN) - * Make the LSN be a struct instead of an integer so that we get better type checking. */ -typedef struct __toku_lsn { uint64_t lsn; } LSN; -#define ZERO_LSN ((LSN){0}) -#define MAX_LSN ((LSN){UINT64_MAX}) - -/* Message Sequence Number (MSN) - * Make the MSN be a struct instead of an integer so that we get better type checking. */ -typedef struct __toku_msn { uint64_t msn; } MSN; -#define ZERO_MSN ((MSN){0}) // dummy used for message construction, to be filled in when msg is applied to tree -#define MIN_MSN ((MSN){(uint64_t)1 << 62}) // first 2^62 values reserved for messages created before Dr. No (for upgrade) -#define MAX_MSN ((MSN){UINT64_MAX}) - -typedef struct { - int64_t numrows; // delta versions in basements could be negative - int64_t numbytes; -} STAT64INFO_S, *STAT64INFO; - -static const STAT64INFO_S ZEROSTATS = {0,0}; - -/* At the ft layer, a FILENUM uniquely identifies an open file. - * At the ydb layer, a DICTIONARY_ID uniquely identifies an open dictionary. - * With the introduction of the loader (ticket 2216), it is possible for the file that holds - * an open dictionary to change, so these are now separate and independent unique identifiers. - */ -typedef struct {uint32_t fileid;} FILENUM; -#define FILENUM_NONE ((FILENUM){UINT32_MAX}) - -typedef struct {uint64_t dictid;} DICTIONARY_ID; -#define DICTIONARY_ID_NONE ((DICTIONARY_ID){0}) - -typedef struct { - uint32_t num; - FILENUM *filenums; -} FILENUMS; - -typedef struct tokulogger *TOKULOGGER; -typedef struct txn_manager *TXN_MANAGER; -#define NULL_LOGGER ((TOKULOGGER)0) -typedef struct tokutxn *TOKUTXN; -typedef struct txninfo *TXNINFO; -#define NULL_TXN ((TOKUTXN)0) - -struct logged_btt_pair { - DISKOFF off; - int32_t size; -}; - -typedef struct cachetable *CACHETABLE; -typedef struct cachefile *CACHEFILE; -typedef struct ctpair *PAIR; -typedef class checkpointer *CHECKPOINTER; -class bn_data; - -/* tree command types */ -enum ft_msg_type { - FT_NONE = 0, - FT_INSERT = 1, - FT_DELETE_ANY = 2, // Delete any matching key. This used to be called FT_DELETE. - //FT_DELETE_BOTH = 3, - FT_ABORT_ANY = 4, // Abort any commands on any matching key. - //FT_ABORT_BOTH = 5, // Abort commands that match both the key and the value - FT_COMMIT_ANY = 6, - //FT_COMMIT_BOTH = 7, - FT_COMMIT_BROADCAST_ALL = 8, // Broadcast to all leafentries, (commit all transactions). - FT_COMMIT_BROADCAST_TXN = 9, // Broadcast to all leafentries, (commit specific transaction). - FT_ABORT_BROADCAST_TXN = 10, // Broadcast to all leafentries, (commit specific transaction). - FT_INSERT_NO_OVERWRITE = 11, - FT_OPTIMIZE = 12, // Broadcast - FT_OPTIMIZE_FOR_UPGRADE = 13, // same as FT_OPTIMIZE, but record version number in leafnode - FT_UPDATE = 14, - FT_UPDATE_BROADCAST_ALL = 15 -}; - -static inline bool -ft_msg_type_applies_once(enum ft_msg_type type) -{ - bool ret_val; - switch (type) { - case FT_INSERT_NO_OVERWRITE: - case FT_INSERT: - case FT_DELETE_ANY: - case FT_ABORT_ANY: - case FT_COMMIT_ANY: - case FT_UPDATE: - ret_val = true; - break; - case FT_COMMIT_BROADCAST_ALL: - case FT_COMMIT_BROADCAST_TXN: - case FT_ABORT_BROADCAST_TXN: - case FT_OPTIMIZE: - case FT_OPTIMIZE_FOR_UPGRADE: - case FT_UPDATE_BROADCAST_ALL: - case FT_NONE: - ret_val = false; - break; - default: - assert(false); - } - return ret_val; -} - -static inline bool -ft_msg_type_applies_all(enum ft_msg_type type) -{ - bool ret_val; - switch (type) { - case FT_NONE: - case FT_INSERT_NO_OVERWRITE: - case FT_INSERT: - case FT_DELETE_ANY: - case FT_ABORT_ANY: - case FT_COMMIT_ANY: - case FT_UPDATE: - ret_val = false; - break; - case FT_COMMIT_BROADCAST_ALL: - case FT_COMMIT_BROADCAST_TXN: - case FT_ABORT_BROADCAST_TXN: - case FT_OPTIMIZE: - case FT_OPTIMIZE_FOR_UPGRADE: - case FT_UPDATE_BROADCAST_ALL: - ret_val = true; - break; - default: - assert(false); - } - return ret_val; -} - -static inline bool -ft_msg_type_does_nothing(enum ft_msg_type type) -{ - return (type == FT_NONE); -} - -typedef struct xids_t *XIDS; -typedef struct fifo_msg_t *FIFO_MSG; -/* tree commands */ -struct ft_msg { - enum ft_msg_type type; - MSN msn; // message sequence number - XIDS xids; - union { - /* insert or delete */ - struct ft_msg_insert_delete { - const DBT *key; // for insert, delete, upsertdel - const DBT *val; // for insert, delete, (and it is the "extra" for upsertdel, upsertdel_broadcast_all) - } id; - } u; -}; - -// Message sent into the ft to implement insert, delete, update, etc -typedef struct ft_msg FT_MSG_S; -typedef struct ft_msg *FT_MSG; - -typedef int (*ft_compare_func)(DB *, const DBT *, const DBT *); -typedef void (*setval_func)(const DBT *, void *); -typedef int (*ft_update_func)(DB *, const DBT *, const DBT *, const DBT *, setval_func, void *); -typedef void (*on_redirect_callback)(FT_HANDLE, void*); -typedef void (*remove_ft_ref_callback)(FT, void*); - -#define UU(x) x __attribute__((__unused__)) - -typedef struct memarena *MEMARENA; -typedef struct rollback_log_node *ROLLBACK_LOG_NODE; -typedef struct serialized_rollback_log_node *SERIALIZED_ROLLBACK_LOG_NODE; - -// -// Types of snapshots that can be taken by a tokutxn -// - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads. -// used for SERIALIZABLE and READ UNCOMMITTED -// - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot -// used for REPEATABLE READ -// - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot -// used for READ COMMITTED -// - -typedef enum __TXN_SNAPSHOT_TYPE { - TXN_SNAPSHOT_NONE=0, - TXN_SNAPSHOT_ROOT=1, - TXN_SNAPSHOT_CHILD=2 -} TXN_SNAPSHOT_TYPE; - -typedef struct ancestors *ANCESTORS; -typedef struct pivot_bounds const * const PIVOT_BOUNDS; -typedef struct ftnode_fetch_extra *FTNODE_FETCH_EXTRA; -typedef struct unlockers *UNLOCKERS; - -enum reactivity { - RE_STABLE, - RE_FUSIBLE, - RE_FISSIBLE -}; - -enum split_mode { - SPLIT_EVENLY, - SPLIT_LEFT_HEAVY, - SPLIT_RIGHT_HEAVY -}; - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-verify.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-verify.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ft-verify.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ft-verify.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,31 +97,32 @@ * For each nonleaf node: All the messages have keys that are between the associated pivot keys ( left_pivot_key < message <= right_pivot_key) */ -#include "ft-cachetable-wrappers.h" -#include "ft-internal.h" -#include "ft.h" +#include + +#include "ft/serialize/block_table.h" +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-internal.h" +#include "ft/node.h" static int compare_pairs (FT_HANDLE ft_handle, const DBT *a, const DBT *b) { - FAKE_DB(db, &ft_handle->ft->cmp_descriptor); - int cmp = ft_handle->ft->compare_fun(&db, a, b); - return cmp; + return ft_handle->ft->cmp(a, b); } static int -compare_pair_to_key (FT_HANDLE ft_handle, const DBT *a, bytevec key, ITEMLEN keylen) { +compare_pair_to_key (FT_HANDLE ft_handle, const DBT *a, const void *key, uint32_t keylen) { DBT y; - FAKE_DB(db, &ft_handle->ft->cmp_descriptor); - int cmp = ft_handle->ft->compare_fun(&db, a, toku_fill_dbt(&y, key, keylen)); - return cmp; + return ft_handle->ft->cmp(a, toku_fill_dbt(&y, key, keylen)); } static int -verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, bytevec key, ITEMLEN keylen, bytevec UU(data), ITEMLEN UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) +verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, const void *key, uint32_t keylen, const void *UU(data), uint32_t UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) __attribute__((warn_unused_result)); +UU() static int -verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, bytevec key, ITEMLEN keylen, bytevec UU(data), ITEMLEN UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) { +verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, const void *key, uint32_t keylen, const void *UU(data), uint32_t UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) { int result = 0; if (msn.msn == ZERO_MSN.msn) result = EINVAL; @@ -159,7 +160,8 @@ #define VERIFY_ASSERTION(predicate, i, string) ({ \ if(!(predicate)) { \ - if (verbose) { \ + (void) verbose; \ + if (true) { \ fprintf(stderr, "%s:%d: Looking at child %d of block %" PRId64 ": %s\n", __FILE__, __LINE__, i, blocknum.b, string); \ } \ result = TOKUDB_NEEDS_REPAIR; \ @@ -169,7 +171,7 @@ struct count_msgs_extra { int count; MSN msn; - FIFO fifo; + message_buffer *msg_buffer; }; // template-only function, but must be extern @@ -177,15 +179,16 @@ __attribute__((nonnull(3))); int count_msgs(const int32_t &offset, const uint32_t UU(idx), struct count_msgs_extra *const e) { - const struct fifo_entry *entry = toku_fifo_get_entry(e->fifo, offset); - if (entry->msn.msn == e->msn.msn) { + MSN msn; + e->msg_buffer->get_message_key_msn(offset, nullptr, &msn); + if (msn.msn == e->msn.msn) { e->count++; } return 0; } struct verify_message_tree_extra { - FIFO fifo; + message_buffer *msg_buffer; bool broadcast; bool is_fresh; int i; @@ -202,20 +205,22 @@ BLOCKNUM blocknum = e->blocknum; int keep_going_on_failure = e->keep_going_on_failure; int result = 0; - const struct fifo_entry *entry = toku_fifo_get_entry(e->fifo, offset); + DBT k, v; + ft_msg msg = e->msg_buffer->get_message(offset, &k, &v); + bool is_fresh = e->msg_buffer->get_freshness(offset); if (e->broadcast) { - VERIFY_ASSERTION(ft_msg_type_applies_all((enum ft_msg_type) entry->type) || ft_msg_type_does_nothing((enum ft_msg_type) entry->type), + VERIFY_ASSERTION(ft_msg_type_applies_all((enum ft_msg_type) msg.type()) || ft_msg_type_does_nothing((enum ft_msg_type) msg.type()), e->i, "message found in broadcast list that is not a broadcast"); } else { - VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) entry->type), + VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) msg.type()), e->i, "message found in fresh or stale message tree that does not apply once"); if (e->is_fresh) { if (e->messages_have_been_moved) { - VERIFY_ASSERTION(entry->is_fresh, + VERIFY_ASSERTION(is_fresh, e->i, "message found in fresh message tree that is not fresh"); } } else { - VERIFY_ASSERTION(!entry->is_fresh, + VERIFY_ASSERTION(!is_fresh, e->i, "message found in stale message tree that is fresh"); } } @@ -235,15 +240,15 @@ BLOCKNUM blocknum = e->blocknum; int keep_going_on_failure = e->keep_going_on_failure; int result = 0; - const struct fifo_entry *entry = toku_fifo_get_entry(e->fifo, offset); - VERIFY_ASSERTION(!entry->is_fresh, e->i, "marked message found in the fresh message tree that is fresh"); + bool is_fresh = e->msg_buffer->get_freshness(offset); + VERIFY_ASSERTION(!is_fresh, e->i, "marked message found in the fresh message tree that is fresh"); done: return result; } template static int -verify_sorted_by_key_msn(FT_HANDLE ft_handle, FIFO fifo, const verify_omt_t &mt) { +verify_sorted_by_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const verify_omt_t &mt) { int result = 0; size_t last_offset = 0; for (uint32_t i = 0; i < mt.size(); i++) { @@ -251,12 +256,8 @@ int r = mt.fetch(i, &offset); assert_zero(r); if (i > 0) { - struct toku_fifo_entry_key_msn_cmp_extra extra; - ZERO_STRUCT(extra); - extra.desc = &ft_handle->ft->cmp_descriptor; - extra.cmp = ft_handle->ft->compare_fun; - extra.fifo = fifo; - if (toku_fifo_entry_key_msn_cmp(extra, last_offset, offset) >= 0) { + struct toku_msg_buffer_key_msn_cmp_extra extra(ft_handle->ft->cmp, msg_buffer); + if (toku_msg_buffer_key_msn_cmp(extra, last_offset, offset) >= 0) { result = TOKUDB_NEEDS_REPAIR; break; } @@ -268,15 +269,9 @@ template static int -count_eq_key_msn(FT_HANDLE ft_handle, FIFO fifo, const count_omt_t &mt, const DBT *key, MSN msn) { - struct toku_fifo_entry_key_msn_heaviside_extra extra; - ZERO_STRUCT(extra); - extra.desc = &ft_handle->ft->cmp_descriptor; - extra.cmp = ft_handle->ft->compare_fun; - extra.fifo = fifo; - extra.key = key; - extra.msn = msn; - int r = mt.template find_zero(extra, nullptr, nullptr); +count_eq_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const count_omt_t &mt, const DBT *key, MSN msn) { + struct toku_msg_buffer_key_msn_heaviside_extra extra(ft_handle->ft->cmp, msg_buffer, key, msn); + int r = mt.template find_zero(extra, nullptr, nullptr); int count; if (r == 0) { count = 1; @@ -295,8 +290,8 @@ ) { uint32_t fullhash = toku_cachetable_hash(ft_handle->ft->cf, blocknum); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft_handle->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); toku_pin_ftnode( ft_handle->ft, blocknum, @@ -308,6 +303,80 @@ ); } +struct verify_msg_fn { + FT_HANDLE ft_handle; + NONLEAF_CHILDINFO bnc; + const DBT *curr_less_pivot; + const DBT *curr_geq_pivot; + BLOCKNUM blocknum; + MSN this_msn; + int verbose; + int keep_going_on_failure; + bool messages_have_been_moved; + + MSN last_msn; + int msg_i; + int result = 0; // needed by VERIFY_ASSERTION + + verify_msg_fn(FT_HANDLE handle, NONLEAF_CHILDINFO nl, const DBT *less, const DBT *geq, + BLOCKNUM b, MSN tmsn, int v, int k, bool m) : + ft_handle(handle), bnc(nl), curr_less_pivot(less), curr_geq_pivot(geq), + blocknum(b), this_msn(tmsn), verbose(v), keep_going_on_failure(k), messages_have_been_moved(m), last_msn(ZERO_MSN), msg_i(0) { + } + + int operator()(const ft_msg &msg, bool is_fresh) { + enum ft_msg_type type = (enum ft_msg_type) msg.type(); + MSN msn = msg.msn(); + XIDS xid = msg.xids(); + const void *key = msg.kdbt()->data; + const void *data = msg.vdbt()->data; + uint32_t keylen = msg.kdbt()->size; + uint32_t datalen = msg.vdbt()->size; + + int r = verify_msg_in_child_buffer(ft_handle, type, msn, key, keylen, data, datalen, xid, + curr_less_pivot, + curr_geq_pivot); + VERIFY_ASSERTION(r == 0, msg_i, "A message in the buffer is out of place"); + VERIFY_ASSERTION((msn.msn > last_msn.msn), msg_i, "msn per msg must be monotonically increasing toward newer messages in buffer"); + VERIFY_ASSERTION((msn.msn <= this_msn.msn), msg_i, "all messages must have msn within limit of this node's max_msn_applied_to_node_in_memory"); + if (ft_msg_type_applies_once(type)) { + int count; + DBT keydbt; + toku_fill_dbt(&keydbt, key, keylen); + int total_count = 0; + count = count_eq_key_msn(ft_handle, &bnc->msg_buffer, bnc->fresh_message_tree, toku_fill_dbt(&keydbt, key, keylen), msn); + total_count += count; + if (is_fresh) { + VERIFY_ASSERTION(count == 1, msg_i, "a fresh message was not found in the fresh message tree"); + } else if (messages_have_been_moved) { + VERIFY_ASSERTION(count == 0, msg_i, "a stale message was found in the fresh message tree"); + } + VERIFY_ASSERTION(count <= 1, msg_i, "a message was found multiple times in the fresh message tree"); + count = count_eq_key_msn(ft_handle, &bnc->msg_buffer, bnc->stale_message_tree, &keydbt, msn); + + total_count += count; + if (is_fresh) { + VERIFY_ASSERTION(count == 0, msg_i, "a fresh message was found in the stale message tree"); + } else if (messages_have_been_moved) { + VERIFY_ASSERTION(count == 1, msg_i, "a stale message was not found in the stale message tree"); + } + VERIFY_ASSERTION(count <= 1, msg_i, "a message was found multiple times in the stale message tree"); + + VERIFY_ASSERTION(total_count <= 1, msg_i, "a message was found in both message trees (or more than once in a single tree)"); + VERIFY_ASSERTION(total_count >= 1, msg_i, "a message was not found in either message tree"); + } else { + VERIFY_ASSERTION(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type), msg_i, "a message was found that does not apply either to all or to only one key"); + struct count_msgs_extra extra = { .count = 0, .msn = msn, .msg_buffer = &bnc->msg_buffer }; + bnc->broadcast_list.iterate(&extra); + VERIFY_ASSERTION(extra.count == 1, msg_i, "a broadcast message was not found in the broadcast list"); + } + last_msn = msn; + msg_i++; +done: + return result; + } +}; + static int toku_verify_ftnode_internal(FT_HANDLE ft_handle, MSN rootmsn, MSN parentmsn_with_messages, bool messages_exist_above, @@ -318,10 +387,10 @@ { int result=0; MSN this_msn; - BLOCKNUM blocknum = node->thisnodename; + BLOCKNUM blocknum = node->blocknum; //printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); this_msn = node->max_msn_applied_to_node_on_disk; if (height >= 0) { @@ -332,74 +401,40 @@ } // Verify that all the pivot keys are in order. for (int i = 0; i < node->n_children-2; i++) { - int compare = compare_pairs(ft_handle, &node->childkeys[i], &node->childkeys[i+1]); + DBT x, y; + int compare = compare_pairs(ft_handle, node->pivotkeys.fill_pivot(i, &x), node->pivotkeys.fill_pivot(i + 1, &y)); VERIFY_ASSERTION(compare < 0, i, "Value is >= the next value"); } // Verify that all the pivot keys are lesser_pivot < pivot <= greatereq_pivot for (int i = 0; i < node->n_children-1; i++) { + DBT x; if (lesser_pivot) { - int compare = compare_pairs(ft_handle, lesser_pivot, &node->childkeys[i]); + int compare = compare_pairs(ft_handle, lesser_pivot, node->pivotkeys.fill_pivot(i, &x)); VERIFY_ASSERTION(compare < 0, i, "Pivot is >= the lower-bound pivot"); } if (greatereq_pivot) { - int compare = compare_pairs(ft_handle, greatereq_pivot, &node->childkeys[i]); + int compare = compare_pairs(ft_handle, greatereq_pivot, node->pivotkeys.fill_pivot(i, &x)); VERIFY_ASSERTION(compare >= 0, i, "Pivot is < the upper-bound pivot"); } } for (int i = 0; i < node->n_children; i++) { - const DBT *curr_less_pivot = (i==0) ? lesser_pivot : &node->childkeys[i-1]; - const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : &node->childkeys[i]; + DBT x, y; + const DBT *curr_less_pivot = (i==0) ? lesser_pivot : node->pivotkeys.fill_pivot(i - 1, &x); + const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.fill_pivot(i, &y); if (node->height > 0) { - MSN last_msn = ZERO_MSN; - // Verify that messages in the buffers are in the right place. NONLEAF_CHILDINFO bnc = BNC(node, i); - VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, bnc->buffer, bnc->fresh_message_tree) == 0, i, "fresh_message_tree"); - VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, bnc->buffer, bnc->stale_message_tree) == 0, i, "stale_message_tree"); - FIFO_ITERATE(bnc->buffer, key, keylen, data, datalen, itype, msn, xid, is_fresh, - ({ - enum ft_msg_type type = (enum ft_msg_type) itype; - int r = verify_msg_in_child_buffer(ft_handle, type, msn, key, keylen, data, datalen, xid, - curr_less_pivot, - curr_geq_pivot); - VERIFY_ASSERTION(r==0, i, "A message in the buffer is out of place"); - VERIFY_ASSERTION((msn.msn > last_msn.msn), i, "msn per msg must be monotonically increasing toward newer messages in buffer"); - VERIFY_ASSERTION((msn.msn <= this_msn.msn), i, "all messages must have msn within limit of this node's max_msn_applied_to_node_in_memory"); - if (ft_msg_type_applies_once(type)) { - int count; - DBT keydbt; - toku_fill_dbt(&keydbt, key, keylen); - int total_count = 0; - count = count_eq_key_msn(ft_handle, bnc->buffer, bnc->fresh_message_tree, toku_fill_dbt(&keydbt, key, keylen), msn); - total_count += count; - if (is_fresh) { - VERIFY_ASSERTION(count == 1, i, "a fresh message was not found in the fresh message tree"); - } else if (messages_have_been_moved) { - VERIFY_ASSERTION(count == 0, i, "a stale message was found in the fresh message tree"); - } - VERIFY_ASSERTION(count <= 1, i, "a message was found multiple times in the fresh message tree"); - count = count_eq_key_msn(ft_handle, bnc->buffer, bnc->stale_message_tree, &keydbt, msn); - - total_count += count; - if (is_fresh) { - VERIFY_ASSERTION(count == 0, i, "a fresh message was found in the stale message tree"); - } else if (messages_have_been_moved) { - VERIFY_ASSERTION(count == 1, i, "a stale message was not found in the stale message tree"); - } - VERIFY_ASSERTION(count <= 1, i, "a message was found multiple times in the stale message tree"); - - VERIFY_ASSERTION(total_count <= 1, i, "a message was found in both message trees (or more than once in a single tree)"); - VERIFY_ASSERTION(total_count >= 1, i, "a message was not found in either message tree"); - } else { - VERIFY_ASSERTION(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type), i, "a message was found that does not apply either to all or to only one key"); - struct count_msgs_extra extra = { .count = 0, .msn = msn, .fifo = bnc->buffer }; - bnc->broadcast_list.iterate(&extra); - VERIFY_ASSERTION(extra.count == 1, i, "a broadcast message was not found in the broadcast list"); - } - last_msn = msn; - })); - struct verify_message_tree_extra extra = { .fifo = bnc->buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->thisnodename, .keep_going_on_failure = keep_going_on_failure, .messages_have_been_moved = messages_have_been_moved }; - int r = bnc->fresh_message_tree.iterate(&extra); + // Verify that messages in the buffers are in the right place. + VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, &bnc->msg_buffer, bnc->fresh_message_tree) == 0, i, "fresh_message_tree"); + VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, &bnc->msg_buffer, bnc->stale_message_tree) == 0, i, "stale_message_tree"); + + verify_msg_fn verify_msg(ft_handle, bnc, curr_less_pivot, curr_geq_pivot, + blocknum, this_msn, verbose, keep_going_on_failure, messages_have_been_moved); + int r = bnc->msg_buffer.iterate(verify_msg); + if (r != 0) { result = r; goto done; } + + struct verify_message_tree_extra extra = { .msg_buffer = &bnc->msg_buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->blocknum, .keep_going_on_failure = keep_going_on_failure, .messages_have_been_moved = messages_have_been_moved }; + r = bnc->fresh_message_tree.iterate(&extra); if (r != 0) { result = r; goto done; } extra.is_fresh = false; r = bnc->stale_message_tree.iterate(&extra); @@ -460,7 +495,7 @@ MSN this_msn; //printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); this_msn = node->max_msn_applied_to_node_on_disk; int result = 0; @@ -489,14 +524,15 @@ for (int i = 0; i < node->n_children; i++) { FTNODE child_node; toku_get_node_for_verify(BP_BLOCKNUM(node, i), ft_handle, &child_node); + DBT x, y; int r = toku_verify_ftnode(ft_handle, rootmsn, (toku_bnc_n_entries(BNC(node, i)) > 0 ? this_msn : parentmsn_with_messages), messages_exist_above || toku_bnc_n_entries(BNC(node, i)) > 0, child_node, node->height-1, - (i==0) ? lesser_pivot : &node->childkeys[i-1], - (i==node->n_children-1) ? greatereq_pivot : &node->childkeys[i], + (i==0) ? lesser_pivot : node->pivotkeys.fill_pivot(i - 1, &x), + (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.fill_pivot(i, &y), progress_callback, progress_extra, recurse, verbose, keep_going_on_failure); if (r) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ftverify.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ftverify.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ftverify.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ftverify.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,507 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -//////////////////////////////////////////////////////////////////// -// ftverify - Command line tool that checks the validity of a given -// fractal tree file, one block at a time. -//////////////////////////////////////////////////////////////////// - -#include "fttypes.h" -#include "ft-internal.h" -#include "ft_layout_version.h" -#include "block_table.h" -#include "rbuf.h" -#include "sub_block.h" - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -static int num_cores = 0; // cache the number of cores for the parallelization -static struct toku_thread_pool *ft_pool = NULL; -static FILE *outf; -static double pct = 0.5; - -// Struct for reporting sub block stats. -struct verify_block_extra { - BLOCKNUM b; - int n_sub_blocks; - uint32_t header_length; - uint32_t calc_xsum; - uint32_t stored_xsum; - bool header_valid; - bool sub_blocks_valid; - struct sub_block_info *sub_block_results; -}; - -// Initialization function for the sub block stats. -static void -init_verify_block_extra(BLOCKNUM b, struct verify_block_extra *e) -{ - static const struct verify_block_extra default_vbe = - { - .b = { 0 }, - .n_sub_blocks = 0, - .header_length = 0, - .calc_xsum = 0, - .stored_xsum = 0, - .header_valid = true, - .sub_blocks_valid = true, - .sub_block_results = NULL - }; - *e = default_vbe; - e->b = b; -} - -// Reports percentage of completed blocks. -static void -report(int64_t blocks_done, int64_t blocks_failed, int64_t total_blocks) -{ - int64_t blocks_per_report = llrint(pct * total_blocks / 100.0); - if (blocks_per_report < 1) { - blocks_per_report = 1; - } - if (blocks_done % blocks_per_report == 0) { - double pct_actually_done = (100.0 * blocks_done) / total_blocks; - printf("% 3.3lf%% | %" PRId64 " blocks checked, %" PRId64 " bad block(s) detected\n", - pct_actually_done, blocks_done, blocks_failed); - fflush(stdout); - } -} - -// Helper function to deserialize one of the two headers for the ft -// we are checking. -static void -deserialize_headers(int fd, struct ft **h1p, struct ft **h2p) -{ - struct rbuf rb_0; - struct rbuf rb_1; - uint64_t checkpoint_count_0; - uint64_t checkpoint_count_1; - LSN checkpoint_lsn_0; - LSN checkpoint_lsn_1; - uint32_t version_0, version_1; - bool h0_acceptable = false; - bool h1_acceptable = false; - int r0, r1; - int r; - - { - toku_off_t header_0_off = 0; - r0 = deserialize_ft_from_fd_into_rbuf( - fd, - header_0_off, - &rb_0, - &checkpoint_count_0, - &checkpoint_lsn_0, - &version_0 - ); - if ((r0==0) && (checkpoint_lsn_0.lsn <= MAX_LSN.lsn)) { - h0_acceptable = true; - } - } - { - toku_off_t header_1_off = BLOCK_ALLOCATOR_HEADER_RESERVE; - r1 = deserialize_ft_from_fd_into_rbuf( - fd, - header_1_off, - &rb_1, - &checkpoint_count_1, - &checkpoint_lsn_1, - &version_1 - ); - if ((r1==0) && (checkpoint_lsn_1.lsn <= MAX_LSN.lsn)) { - h1_acceptable = true; - } - } - - // If either header is too new, the dictionary is unreadable - if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) { - fprintf(stderr, "This dictionary was created with too new a version of TokuDB. Aborting.\n"); - abort(); - } - if (h0_acceptable) { - printf("Found dictionary header 1 with LSN %" PRIu64 "\n", checkpoint_lsn_0.lsn); - r = deserialize_ft_versioned(fd, &rb_0, h1p, version_0); - - if (r != 0) { - printf("---Header Error----\n"); - } - - } else { - *h1p = NULL; - } - if (h1_acceptable) { - printf("Found dictionary header 2 with LSN %" PRIu64 "\n", checkpoint_lsn_1.lsn); - r = deserialize_ft_versioned(fd, &rb_1, h2p, version_1); - if (r != 0) { - printf("---Header Error----\n"); - } - } else { - *h2p = NULL; - } - - if (rb_0.buf) toku_free(rb_0.buf); - if (rb_1.buf) toku_free(rb_1.buf); -} - -// Helper struct for tracking block checking progress. -struct check_block_table_extra { - int fd; - int64_t blocks_done, blocks_failed, total_blocks; - struct ft *h; -}; - -// Check non-upgraded (legacy) node. -// NOTE: These nodes have less checksumming than more -// recent nodes. This effectively means that we are -// skipping over these nodes. -static int -check_old_node(FTNODE node, struct rbuf *rb, int version) -{ - int r = 0; - read_legacy_node_info(node, rb, version); - // For version 14 nodes, advance the buffer to the end - // and verify the checksum. - if (version == FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) { - // Advance the buffer to the end. - rb->ndone = rb->size - 4; - r = check_legacy_end_checksum(rb); - } - - return r; -} - -// Read, decompress, and check the given block. -static int -check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void *extra) -{ - int r = 0; - int failure = 0; - struct check_block_table_extra *CAST_FROM_VOIDP(cbte, extra); - int fd = cbte->fd; - FT ft = cbte->h; - - struct verify_block_extra be; - init_verify_block_extra(blocknum, &be); - - // Let's read the block off of disk and fill a buffer with that - // block. - struct rbuf rb = RBUF_INITIALIZER; - read_block_from_fd_into_rbuf(fd, blocknum, ft, &rb); - - // Allocate the node. - FTNODE XMALLOC(node); - - initialize_ftnode(node, blocknum); - - r = read_and_check_magic(&rb); - if (r == DB_BADFORMAT) { - printf(" Magic failed.\n"); - failure++; - } - - r = read_and_check_version(node, &rb); - if (r != 0) { - printf(" Version check failed.\n"); - failure++; - } - - int version = node->layout_version_read_from_disk; - - //////////////////////////// - // UPGRADE FORK GOES HERE // - //////////////////////////// - - // Check nodes before major layout changes in version 15. - // All newer versions should follow the same layout, for now. - // This predicate would need to be changed if the layout - // of the nodes on disk does indeed change in the future. - if (version < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) - { - struct rbuf nrb; - // Use old decompression method for legacy nodes. - r = decompress_from_raw_block_into_rbuf(rb.buf, rb.size, &nrb, blocknum); - if (r != 0) { - failure++; - goto cleanup; - } - - // Check the end-to-end checksum. - r = check_old_node(node, &nrb, version); - if (r != 0) { - failure++; - } - goto cleanup; - } - - read_node_info(node, &rb, version); - - FTNODE_DISK_DATA ndd; - allocate_and_read_partition_offsets(node, &rb, &ndd); - - r = check_node_info_checksum(&rb); - if (r == TOKUDB_BAD_CHECKSUM) { - printf(" Node info checksum failed.\n"); - failure++; - } - - // Get the partition info sub block. - struct sub_block sb; - sub_block_init(&sb); - r = read_compressed_sub_block(&rb, &sb); - if (r != 0) { - printf(" Partition info checksum failed.\n"); - failure++; - } - - just_decompress_sub_block(&sb); - - // If we want to inspect the data inside the partitions, we need - // to call setup_ftnode_partitions(node, bfe, true) - - // TODO: Create function for this. - // Using the node info, decompress all the keys and pivots to - // detect any corruptions. - for (int i = 0; i < node->n_children; ++i) { - uint32_t curr_offset = BP_START(ndd,i); - uint32_t curr_size = BP_SIZE(ndd,i); - struct rbuf curr_rbuf = {.buf = NULL, .size = 0, .ndone = 0}; - rbuf_init(&curr_rbuf, rb.buf + curr_offset, curr_size); - struct sub_block curr_sb; - sub_block_init(&curr_sb); - - r = read_compressed_sub_block(&rb, &sb); - if (r != 0) { - printf(" Compressed child partition %d checksum failed.\n", i); - failure++; - } - just_decompress_sub_block(&sb); - - r = verify_ftnode_sub_block(&sb); - if (r != 0) { - printf(" Uncompressed child partition %d checksum failed.\n", i); - failure++; - } - - // If needed, we can print row and/or pivot info at this - // point. - } - -cleanup: - // Cleanup and error incrementing. - if (failure) { - cbte->blocks_failed++; - } - - cbte->blocks_done++; - - if (node) { - toku_free(node); - } - - // Print the status of this block to the console. - report(cbte->blocks_done, cbte->blocks_failed, cbte->total_blocks); - // We need to ALWAYS return 0 if we want to continue iterating - // through the nodes in the file. - r = 0; - return r; -} - -// This calls toku_blocktable_iterate on the given block table. -// Passes our check_block() function to be called as we iterate over -// the block table. This will print any interesting failures and -// update us on our progress. -static void -check_block_table(int fd, BLOCK_TABLE bt, struct ft *h) -{ - int64_t num_blocks = toku_block_get_blocks_in_use_unlocked(bt); - printf("Starting verification of checkpoint containing"); - printf(" %" PRId64 " blocks.\n", num_blocks); - fflush(stdout); - - struct check_block_table_extra extra = { .fd = fd, - .blocks_done = 0, - .blocks_failed = 0, - .total_blocks = num_blocks, - .h = h }; - int r = 0; - r = toku_blocktable_iterate(bt, - TRANSLATION_CURRENT, - check_block, - &extra, - true, - true); - if (r != 0) { - // We can print more information here if necessary. - } - - assert(extra.blocks_done == extra.total_blocks); - printf("Finished verification. "); - printf(" %" PRId64 " blocks checked,", extra.blocks_done); - printf(" %" PRId64 " bad block(s) detected\n", extra.blocks_failed); - fflush(stdout); -} - -int -main(int argc, char const * const argv[]) -{ - // open the file - int r = 0; - int dictfd; - const char *dictfname, *outfname; - if (argc < 3 || argc > 4) { - fprintf(stderr, "%s: Invalid arguments.\n", argv[0]); - fprintf(stderr, "Usage: %s [report%%]\n", argv[0]); - r = EX_USAGE; - goto exit; - } - - assert(argc == 3 || argc == 4); - dictfname = argv[1]; - outfname = argv[2]; - if (argc == 4) { - set_errno(0); - pct = strtod(argv[3], NULL); - assert_zero(get_maybe_error_errno()); - assert(pct > 0.0 && pct <= 100.0); - } - - // Open the file as read-only. - dictfd = open(dictfname, O_RDONLY | O_BINARY, S_IRWXU | S_IRWXG | S_IRWXO); - if (dictfd < 0) { - perror(dictfname); - fflush(stderr); - abort(); - } - outf = fopen(outfname, "w"); - if (!outf) { - perror(outfname); - fflush(stderr); - abort(); - } - - // body of toku_ft_serialize_init(); - num_cores = toku_os_get_number_active_processors(); - r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r); - assert_zero(r); - - // deserialize the header(s) - struct ft *h1, *h2; - deserialize_headers(dictfd, &h1, &h2); - - // walk over the block table and check blocks - if (h1) { - printf("Checking dictionary from header 1.\n"); - check_block_table(dictfd, h1->blocktable, h1); - } - if (h2) { - printf("Checking dictionary from header 2.\n"); - check_block_table(dictfd, h2->blocktable, h2); - } - if (h1 == NULL && h2 == NULL) { - printf("Both headers have a corruption and could not be used.\n"); - } - - toku_thread_pool_destroy(&ft_pool); -exit: - return r; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/key.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/key.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/key.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/key.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,189 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "key.h" -#include "fttypes.h" -#include - -#if 0 -int toku_keycompare (bytevec key1b, ITEMLEN key1len, bytevec key2b, ITEMLEN key2len) { - const unsigned char *key1 = key1b; - const unsigned char *key2 = key2b; - while (key1len > 0 && key2len > 0) { - unsigned char b1 = key1[0]; - unsigned char b2 = key2[0]; - if (b1b2) return 1; - key1len--; key1++; - key2len--; key2++; - } - if (key1lenkey2len) return 1; - return 0; -} - -#elif 0 -int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) { - if (key1len==key2len) { - return memcmp(key1,key2,key1len); - } else if (key1len=0) return 1; /* If the keys are the same up to 2's length, then return 1 since key1 is longer than key2 */ - else return -1; - } -} -#elif 0 -/* This one looks tighter, but it does use memcmp... */ -int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) { - int comparelen = key1len0; - k1++, k2++, comparelen--) { - if (*k1 != *k2) { - return (int)*k1-(int)*k2; - } - } - if (key1lenkey2len) return 1; - return 0; -} -#else -/* unroll that one four times */ -// when a and b are chars, return a-b is safe here because return type is int. No over/underflow possible. -int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) { - int comparelen = key1len4; - k1+=4, k2+=4, comparelen-=4) { - { int v1=k1[0], v2=k2[0]; if (v1!=v2) return v1-v2; } - { int v1=k1[1], v2=k2[1]; if (v1!=v2) return v1-v2; } - { int v1=k1[2], v2=k2[2]; if (v1!=v2) return v1-v2; } - { int v1=k1[3], v2=k2[3]; if (v1!=v2) return v1-v2; } - } - for (; - comparelen>0; - k1++, k2++, comparelen--) { - if (*k1 != *k2) { - return (int)*k1-(int)*k2; - } - } - if (key1lenkey2len) return 1; - return 0; -} - -#endif - -int -toku_builtin_compare_fun (DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) { - return toku_keycompare(a->data, a->size, b->data, b->size); -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/key.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/key.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/key.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/key.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,104 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_KEY_H -#define TOKU_KEY_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "ybt.h" -#include "fttypes.h" - -int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len); - -void toku_test_keycompare (void) ; - -int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default"))); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/leafentry.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/leafentry.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/leafentry.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/leafentry.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "wbuf.h" +#include "serialize/wbuf.h" #include "leafentry.h" void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/leafentry.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/leafentry.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/leafentry.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/leafentry.h 2014-10-08 13:19:51.000000000 +0000 @@ -1,9 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_LEAFENTRY_H -#define TOKU_LEAFENTRY_H - #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -33,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,6 +87,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -98,8 +97,9 @@ #include #include -#include "txn_manager.h" -#include "rbuf.h" +#include "ft/txn/txn_manager.h" +#include "ft/serialize/rbuf.h" +#include "ft/msg.h" /* Memory format of packed leaf entry @@ -211,6 +211,7 @@ int print_klpair (FILE *outf, const void* key, uint32_t keylen, LEAFENTRY v); // Print a leafentry out in human-readable form. int le_latest_is_del(LEAFENTRY le); // Return true if it is a provisional delete. +int le_val_is_del(LEAFENTRY le, bool is_snapshot_read, TOKUTXN txn); // Returns true if the value that is to be read is empty bool le_is_clean(LEAFENTRY le); //Return how many xids exist (0 does not count) bool le_has_xids(LEAFENTRY le, XIDS xids); // Return true transaction represented by xids is still provisional in this leafentry (le's xid stack is a superset or equal to xids) void* le_latest_val (LEAFENTRY le); // Return the latest val (return NULL for provisional deletes) @@ -227,10 +228,13 @@ // r|r!=0&&r!=TOKUDB_ACCEPT: Quit early, return r, because something unexpected went wrong (error case) typedef int(*LE_ITERATE_CALLBACK)(TXNID id, TOKUTXN context); -int le_iterate_is_del(LEAFENTRY le, LE_ITERATE_CALLBACK f, bool *is_empty, TOKUTXN context); - int le_iterate_val(LEAFENTRY le, LE_ITERATE_CALLBACK f, void** valpp, uint32_t *vallenp, TOKUTXN context); +void le_extract_val(LEAFENTRY le, + // should we return the entire leafentry as the val? + bool is_leaf_mode, bool is_snapshot_read, + TOKUTXN ttxn, uint32_t *vallen, void **val); + size_t leafentry_disksize_13(LEAFENTRY_13 le); @@ -241,11 +245,14 @@ size_t *new_leafentry_memorysize, LEAFENTRY *new_leafentry_p); +class bn_data; + void -toku_le_apply_msg(FT_MSG msg, +toku_le_apply_msg(const ft_msg &msg, LEAFENTRY old_leafentry, // NULL if there was no stored data. bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced + uint32_t old_keylen, txn_gc_info *gc_info, LEAFENTRY *new_leafentry_p, int64_t * numbytes_delta_p); @@ -261,6 +268,3 @@ txn_gc_info *gc_info, LEAFENTRY *new_leaf_entry, int64_t * numbytes_delta_p); - -#endif /* TOKU_LEAFENTRY_H */ - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/le-cursor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/le-cursor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/le-cursor.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/le-cursor.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,9 +89,12 @@ #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "ft.h" -#include "ft-internal.h" -#include "le-cursor.h" +#include + +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/le-cursor.h" +#include "ft/cursor.h" // A LE_CURSOR is a special purpose FT_CURSOR that: // - enables prefetching @@ -100,10 +103,6 @@ // A LE_CURSOR is good for scanning a FT from beginning to end. Useful for hot indexing. struct le_cursor { - // TODO: remove DBs from the ft layer comparison function - // so this is never necessary - // use a fake db for comparisons. - struct __toku_db fake_db; FT_CURSOR ft_cursor; bool neg_infinity; // true when the le cursor is positioned at -infinity (initial setting) bool pos_infinity; // true when the le cursor is positioned at +infinity (when _next returns DB_NOTFOUND) @@ -123,8 +122,6 @@ toku_ft_cursor_set_leaf_mode(le_cursor->ft_cursor); le_cursor->neg_infinity = false; le_cursor->pos_infinity = true; - // zero out the fake DB. this is a rare operation so it's not too slow. - memset(&le_cursor->fake_db, 0, sizeof(le_cursor->fake_db)); } } @@ -169,13 +166,9 @@ } else if (le_cursor->pos_infinity) { result = false; // all keys are less than +infinity } else { - // get the comparison function and descriptor from the cursor's ft - FT_HANDLE ft_handle = le_cursor->ft_cursor->ft_handle; - ft_compare_func keycompare = toku_ft_get_bt_compare(ft_handle); - le_cursor->fake_db.cmp_descriptor = toku_ft_get_cmp_descriptor(ft_handle); + FT ft = le_cursor->ft_cursor->ft_handle->ft; // get the current position from the cursor and compare it to the given key. - DBT *cursor_key = &le_cursor->ft_cursor->key; - int r = keycompare(&le_cursor->fake_db, cursor_key, key); + int r = ft->cmp(&le_cursor->ft_cursor->key, key); if (r <= 0) { result = true; // key is right of the cursor key } else { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/le-cursor.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/le-cursor.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/le-cursor.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/le-cursor.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,13 +86,12 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef LE_CURSOR_H -#define LE_CURSOR_H - -#include "ft-ops.h" +#include "ft/ft-internal.h" // A leaf entry cursor (LE_CURSOR) is a special type of FT_CURSOR that visits all of the leaf entries in a tree // and returns the leaf entry to the caller. It maintains a copy of the key that it was last positioned over to @@ -127,5 +126,3 @@ // extracts position of le_cursor into estimate. Responsibility of caller to handle // thread safety. Caller (the indexer), does so by ensuring indexer lock is held void toku_le_cursor_update_estimate(LE_CURSOR le_cursor, DBT* estimate); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/callbacks.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/callbacks.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/callbacks.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/callbacks.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,199 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include +#include +#include +#include + +#include "loader/loader-internal.h" +#include "util/dbt.h" + +static void error_callback_lock(ft_loader_error_callback loader_error) { + toku_mutex_lock(&loader_error->mutex); +} + +static void error_callback_unlock(ft_loader_error_callback loader_error) { + toku_mutex_unlock(&loader_error->mutex); +} + +void ft_loader_init_error_callback(ft_loader_error_callback loader_error) { + memset(loader_error, 0, sizeof *loader_error); + toku_init_dbt(&loader_error->key); + toku_init_dbt(&loader_error->val); + toku_mutex_init(&loader_error->mutex, NULL); +} + +void ft_loader_destroy_error_callback(ft_loader_error_callback loader_error) { + toku_mutex_destroy(&loader_error->mutex); + toku_destroy_dbt(&loader_error->key); + toku_destroy_dbt(&loader_error->val); + memset(loader_error, 0, sizeof *loader_error); +} + +int ft_loader_get_error(ft_loader_error_callback loader_error) { + error_callback_lock(loader_error); + int r = loader_error->error; + error_callback_unlock(loader_error); + return r; +} + +void ft_loader_set_error_function(ft_loader_error_callback loader_error, ft_loader_error_func error_function, void *error_extra) { + loader_error->error_callback = error_function; + loader_error->extra = error_extra; +} + +int ft_loader_set_error(ft_loader_error_callback loader_error, int error, DB *db, int which_db, DBT *key, DBT *val) { + int r; + error_callback_lock(loader_error); + if (loader_error->error) { // there can be only one + r = EEXIST; + } else { + r = 0; + loader_error->error = error; // set the error + loader_error->db = db; + loader_error->which_db = which_db; + if (key != nullptr) { + toku_clone_dbt(&loader_error->key, *key); + } + if (val != nullptr) { + toku_clone_dbt(&loader_error->val, *val); + } + } + error_callback_unlock(loader_error); + return r; +} + +int ft_loader_call_error_function(ft_loader_error_callback loader_error) { + int r; + error_callback_lock(loader_error); + r = loader_error->error; + if (r && loader_error->error_callback && !loader_error->did_callback) { + loader_error->did_callback = true; + loader_error->error_callback(loader_error->db, + loader_error->which_db, + loader_error->error, + &loader_error->key, + &loader_error->val, + loader_error->extra); + } + error_callback_unlock(loader_error); + return r; +} + +int ft_loader_set_error_and_callback(ft_loader_error_callback loader_error, int error, DB *db, int which_db, DBT *key, DBT *val) { + int r = ft_loader_set_error(loader_error, error, db, which_db, key, val); + if (r == 0) + r = ft_loader_call_error_function(loader_error); + return r; +} + +int ft_loader_init_poll_callback(ft_loader_poll_callback p) { + memset(p, 0, sizeof *p); + return 0; +} + +void ft_loader_destroy_poll_callback(ft_loader_poll_callback p) { + memset(p, 0, sizeof *p); +} + +void ft_loader_set_poll_function(ft_loader_poll_callback p, ft_loader_poll_func poll_function, void *poll_extra) { + p->poll_function = poll_function; + p->poll_extra = poll_extra; +} + +int ft_loader_call_poll_function(ft_loader_poll_callback p, float progress) { + int r = 0; + if (p->poll_function) + r = p->poll_function(p->poll_extra, progress); + return r; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/dbufio.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/dbufio.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/dbufio.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/dbufio.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,631 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include +#include +#include + +#include "portability/toku_assert.h" +#include "portability/memory.h" + +#include "ft/ft-internal.h" +#include "ft/serialize/ft_node-serialize.h" +#include "loader/dbufio.h" +#include "loader/loader-internal.h" + +struct dbufio_file { + // i/o thread owns these + int fd; + + // consumers own these + size_t offset_in_buf; + toku_off_t offset_in_uncompressed_file; + + // need the mutex to modify these + struct dbufio_file *next; + bool second_buf_ready; // if true, the i/o thread is not touching anything. + + // consumers own [0], i/o thread owns [1], they are swapped by the consumer only when the condition mutex is held and second_buf_ready is true. + char *buf[2]; + size_t n_in_buf[2]; + int error_code[2]; // includes errno or eof. [0] is the error code associated with buf[0], [1] is the code for buf[1] + + bool io_done; +}; + + +/* A dbufio_fileset */ +struct dbufio_fileset { + // The mutex/condition variables protect + // the singly-linked list of files that need I/O (head/tail in the fileset, and next in each file) + // in each file: + // the second_buf_ready boolean (which says the second buffer is full of data). + // the swapping of the buf[], n_in_buf[], and error_code[] values. + toku_mutex_t mutex; + toku_cond_t cond; + int N; // How many files. This is constant once established. + int n_not_done; // how many of the files require more I/O? Owned by the i/o thread. + struct dbufio_file *files; // an array of length N. + struct dbufio_file *head, *tail; // must have the mutex to fiddle with these. + size_t bufsize; // the bufsize is the constant (the same for all buffers). + + bool panic; + bool compressed; + int panic_errno; + toku_pthread_t iothread; +}; + + +static void enq (DBUFIO_FILESET bfs, struct dbufio_file *f) { + if (bfs->tail==NULL) { + bfs->head = f; + } else { + bfs->tail->next = f; + } + bfs->tail = f; + f->next = NULL; +} + +static void panic (DBUFIO_FILESET bfs, int r) { + if (bfs->panic) return; + bfs->panic_errno = r; // Don't really care about a race on this variable... Writes to it are atomic, so at least one good panic reason will be stored. + bfs->panic = true; + return; +} + +static bool paniced (DBUFIO_FILESET bfs) { + return bfs->panic; +} + +static ssize_t dbf_read_some_compressed(struct dbufio_file *dbf, char *buf, size_t bufsize) { + ssize_t ret; + invariant(bufsize >= MAX_UNCOMPRESSED_BUF); + unsigned char *raw_block = NULL; + + // deserialize the sub block header + + // total_size + // num_sub_blocks + // compressed_size,uncompressed_size,xsum (repeated num_sub_blocks times) + ssize_t readcode; + const uint32_t header_size = sizeof(uint32_t); + char header[header_size]; + + readcode = toku_os_read(dbf->fd, &header, header_size); + if (readcode < 0) { + ret = -1; + goto exit; + } + if (readcode == 0) { + ret = 0; + goto exit; + } + if (readcode < header_size) { + errno = TOKUDB_NO_DATA; + ret = -1; + goto exit; + } + uint32_t total_size; + { + uint32_t *p = (uint32_t *) &header[0]; + total_size = toku_dtoh32(p[0]); + } + if (total_size == 0 || total_size > (1<<30)) { + errno = toku_db_badformat(); + ret = -1; + goto exit; + } + + //Cannot use XMALLOC + MALLOC_N(total_size, raw_block); + if (raw_block == nullptr) { + errno = ENOMEM; + ret = -1; + goto exit; + } + readcode = toku_os_read(dbf->fd, raw_block, total_size); + if (readcode < 0) { + ret = -1; + goto exit; + } + if (readcode < total_size) { + errno = TOKUDB_NO_DATA; + ret = -1; + goto exit; + } + + struct sub_block sub_block[max_sub_blocks]; + uint32_t *sub_block_header; + sub_block_header = (uint32_t *) &raw_block[0]; + int32_t n_sub_blocks; + n_sub_blocks = toku_dtoh32(sub_block_header[0]); + sub_block_header++; + size_t size_subblock_header; + size_subblock_header = sub_block_header_size(n_sub_blocks); + if (n_sub_blocks == 0 || n_sub_blocks > max_sub_blocks || size_subblock_header > total_size) { + errno = toku_db_badformat(); + ret = -1; + goto exit; + } + for (int i = 0; i < n_sub_blocks; i++) { + sub_block_init(&sub_block[i]); + sub_block[i].compressed_size = toku_dtoh32(sub_block_header[0]); + sub_block[i].uncompressed_size = toku_dtoh32(sub_block_header[1]); + sub_block[i].xsum = toku_dtoh32(sub_block_header[2]); + sub_block_header += 3; + } + + // verify sub block sizes + size_t total_compressed_size; + total_compressed_size = 0; + for (int i = 0; i < n_sub_blocks; i++) { + uint32_t compressed_size = sub_block[i].compressed_size; + if (compressed_size<=0 || compressed_size>(1<<30)) { + errno = toku_db_badformat(); + ret = -1; + goto exit; + } + + uint32_t uncompressed_size = sub_block[i].uncompressed_size; + if (uncompressed_size<=0 || uncompressed_size>(1<<30)) { + errno = toku_db_badformat(); + ret = -1; + goto exit; + } + total_compressed_size += compressed_size; + } + if (total_size != total_compressed_size + size_subblock_header) { + errno = toku_db_badformat(); + ret = -1; + goto exit; + } + + // sum up the uncompressed size of the sub blocks + size_t uncompressed_size; + uncompressed_size = get_sum_uncompressed_size(n_sub_blocks, sub_block); + if (uncompressed_size > bufsize || uncompressed_size > MAX_UNCOMPRESSED_BUF) { + errno = toku_db_badformat(); + ret = -1; + goto exit; + } + + unsigned char *uncompressed_data; + uncompressed_data = (unsigned char *)buf; + + // point at the start of the compressed data (past the node header, the sub block header, and the header checksum) + unsigned char *compressed_data; + compressed_data = raw_block + size_subblock_header; + + // decompress all the compressed sub blocks into the uncompressed buffer + { + int r; + r = decompress_all_sub_blocks(n_sub_blocks, sub_block, compressed_data, uncompressed_data, get_num_cores(), get_ft_pool()); + if (r != 0) { + fprintf(stderr, "%s:%d loader failed %d at %p size %" PRIu32"\n", __FUNCTION__, __LINE__, r, raw_block, total_size); + dump_bad_block(raw_block, total_size); + errno = r; + ret = -1; + goto exit; + } + } + ret = uncompressed_size; +exit: + if (raw_block) { + toku_free(raw_block); + } + return ret; +} + +static ssize_t dbf_read_compressed(struct dbufio_file *dbf, char *buf, size_t bufsize) { + invariant(bufsize >= MAX_UNCOMPRESSED_BUF); + size_t count = 0; + + while (count + MAX_UNCOMPRESSED_BUF <= bufsize) { + ssize_t readcode = dbf_read_some_compressed(dbf, buf + count, bufsize - count); + if (readcode < 0) { + return readcode; + } + count += readcode; + if (readcode == 0) { + break; + } + } + return count; +} + +static void* io_thread (void *v) +// The dbuf_thread does all the asynchronous I/O. +{ + DBUFIO_FILESET bfs = (DBUFIO_FILESET)v; + toku_mutex_lock(&bfs->mutex); + //printf("%s:%d Locked\n", __FILE__, __LINE__); + while (1) { + + if (paniced(bfs)) { + toku_mutex_unlock(&bfs->mutex); // ignore any error + return 0; + } + //printf("n_not_done=%d\n", bfs->n_not_done); + if (bfs->n_not_done==0) { + // all done (meaning we stored EOF (or another error) in error_code[0] for the file. + //printf("unlocked\n"); + toku_mutex_unlock(&bfs->mutex); + return 0; + } + + struct dbufio_file *dbf = bfs->head; + if (dbf==NULL) { + // No I/O needs to be done yet. + // Wait until something happens that will wake us up. + toku_cond_wait(&bfs->cond, &bfs->mutex); + if (paniced(bfs)) { + toku_mutex_unlock(&bfs->mutex); // ignore any error + return 0; + } + // Have the lock so go around. + } else { + // Some I/O needs to be done. + //printf("%s:%d Need I/O\n", __FILE__, __LINE__); + assert(dbf->second_buf_ready == false); + assert(!dbf->io_done); + bfs->head = dbf->next; + if (bfs->head==NULL) bfs->tail=NULL; + + // Unlock the mutex now that we have ownership of dbf to allow consumers to get the mutex and perform swaps. They won't swap + // this buffer because second_buf_ready is false. + toku_mutex_unlock(&bfs->mutex); + //printf("%s:%d Doing read fd=%d\n", __FILE__, __LINE__, dbf->fd); + { + ssize_t readcode; + if (bfs->compressed) { + readcode = dbf_read_compressed(dbf, dbf->buf[1], bfs->bufsize); + } + else { + readcode = toku_os_read(dbf->fd, dbf->buf[1], bfs->bufsize); + } + //printf("%s:%d readcode=%ld\n", __FILE__, __LINE__, readcode); + if (readcode==-1) { + // a real error. Save the real error. + int the_errno = get_error_errno(); + fprintf(stderr, "%s:%d dbf=%p fd=%d errno=%d\n", __FILE__, __LINE__, dbf, dbf->fd, the_errno); + dbf->error_code[1] = the_errno; + dbf->n_in_buf[1] = 0; + } else if (readcode==0) { + // End of file. Save it. + dbf->error_code[1] = EOF; + dbf->n_in_buf[1] = 0; + dbf->io_done = true; + + } else { + dbf->error_code[1] = 0; + dbf->n_in_buf[1] = readcode; + } + + //printf("%s:%d locking mutex again=%ld\n", __FILE__, __LINE__, readcode); + { + toku_mutex_lock(&bfs->mutex); + if (paniced(bfs)) { + toku_mutex_unlock(&bfs->mutex); // ignore any error + return 0; + } + } + // Now that we have the mutex, we can decrement n_not_done (if applicable) and set second_buf_ready + if (readcode<=0) { + bfs->n_not_done--; + } + //printf("%s:%d n_not_done=%d\n", __FILE__, __LINE__, bfs->n_not_done); + dbf->second_buf_ready = true; + toku_cond_broadcast(&bfs->cond); + //printf("%s:%d did broadcast=%d\n", __FILE__, __LINE__, bfs->n_not_done); + // Still have the lock so go around the loop + } + } + } +} + +int create_dbufio_fileset (DBUFIO_FILESET *bfsp, int N, int fds[/*N*/], size_t bufsize, bool compressed) { + //printf("%s:%d here\n", __FILE__, __LINE__); + int result = 0; + DBUFIO_FILESET CALLOC(bfs); + if (bfs==0) { result = get_error_errno(); } + + bfs->compressed = compressed; + + bool mutex_inited = false, cond_inited = false; + if (result==0) { + CALLOC_N(N, bfs->files); + if (bfs->files==NULL) { result = get_error_errno(); } + else { + for (int i=0; ifiles[i].buf[0] = bfs->files[i].buf[1] = NULL; + } + } + } + //printf("%s:%d here\n", __FILE__, __LINE__); + if (result==0) { + toku_mutex_init(&bfs->mutex, NULL); + mutex_inited = true; + } + if (result==0) { + toku_cond_init(&bfs->cond, NULL); + cond_inited = true; + } + if (result==0) { + bfs->N = N; + bfs->n_not_done = N; + bfs->head = bfs->tail = NULL; + for (int i=0; ifiles[i].fd = fds[i]; + bfs->files[i].offset_in_buf = 0; + bfs->files[i].offset_in_uncompressed_file = 0; + bfs->files[i].next = NULL; + bfs->files[i].second_buf_ready = false; + for (int j=0; j<2; j++) { + if (result==0) { + MALLOC_N(bufsize, bfs->files[i].buf[j]); + if (bfs->files[i].buf[j]==NULL) { result=get_error_errno(); } + } + bfs->files[i].n_in_buf[j] = 0; + bfs->files[i].error_code[j] = 0; + } + bfs->files[i].io_done = false; + ssize_t r; + if (bfs->compressed) { + r = dbf_read_compressed(&bfs->files[i], bfs->files[i].buf[0], bufsize); + } else { + r = toku_os_read(bfs->files[i].fd, bfs->files[i].buf[0], bufsize); + } + { + if (r<0) { + result=get_error_errno(); + break; + } else if (r==0) { + // it's EOF + bfs->files[i].io_done = true; + bfs->n_not_done--; + bfs->files[i].error_code[0] = EOF; + } else { + bfs->files[i].n_in_buf[0] = r; + //printf("%s:%d enq [%d]\n", __FILE__, __LINE__, i); + enq(bfs, &bfs->files[i]); + } + } + } + bfs->bufsize = bufsize; + bfs->panic = false; + bfs->panic_errno = 0; + } + //printf("Creating IO thread\n"); + if (result==0) { + result = toku_pthread_create(&bfs->iothread, NULL, io_thread, (void*)bfs); + } + if (result==0) { *bfsp = bfs; return 0; } + // Now undo everything. + // If we got here, there is no thread (either result was zero before the thread was created, or else the thread creation itself failed. + if (bfs) { + if (bfs->files) { + // the files were allocated, so we have to free all the bufs. + for (int i=0; ifiles[i].buf[j]) + toku_free(bfs->files[i].buf[j]); + bfs->files[i].buf[j]=NULL; + } + } + toku_free(bfs->files); + bfs->files=NULL; + } + if (cond_inited) { + toku_cond_destroy(&bfs->cond); // don't check error status + } + if (mutex_inited) { + toku_mutex_destroy(&bfs->mutex); // don't check error status + } + toku_free(bfs); + } + return result; +} + +int panic_dbufio_fileset(DBUFIO_FILESET bfs, int error) { + toku_mutex_lock(&bfs->mutex); + panic(bfs, error); + toku_cond_broadcast(&bfs->cond); + toku_mutex_unlock(&bfs->mutex); + return 0; +} + +int destroy_dbufio_fileset (DBUFIO_FILESET bfs) { + int result = 0; + { + void *retval; + int r = toku_pthread_join(bfs->iothread, &retval); + assert(r==0); + assert(retval==NULL); + } + { + toku_mutex_destroy(&bfs->mutex); + } + { + toku_cond_destroy(&bfs->cond); + } + if (bfs->files) { + for (int i=0; iN; i++) { + for (int j=0; j<2; j++) { + //printf("%s:%d free([%d][%d]=%p\n", __FILE__, __LINE__, i,j, bfs->files[i].buf[j]); + toku_free(bfs->files[i].buf[j]); + } + } + toku_free(bfs->files); + } + toku_free(bfs); + return result; +} + +int dbufio_fileset_read (DBUFIO_FILESET bfs, int filenum, void *buf_v, size_t count, size_t *n_read) { + char *buf = (char*)buf_v; + struct dbufio_file *dbf = &bfs->files[filenum]; + if (dbf->error_code[0]!=0) return dbf->error_code[0]; + if (dbf->offset_in_buf + count <= dbf->n_in_buf[0]) { + // Enough data is present to do it all now + memcpy(buf, dbf->buf[0]+dbf->offset_in_buf, count); + dbf->offset_in_buf += count; + dbf->offset_in_uncompressed_file += count; + *n_read = count; + return 0; + } else if (dbf->n_in_buf[0] > dbf->offset_in_buf) { + // There is something in buf[0] + size_t this_count = dbf->n_in_buf[0]-dbf->offset_in_buf; + assert(dbf->offset_in_buf + this_count <= bfs->bufsize); + memcpy(buf, dbf->buf[0]+dbf->offset_in_buf, this_count); + dbf->offset_in_buf += this_count; + dbf->offset_in_uncompressed_file += this_count; + size_t sub_n_read; + int r = dbufio_fileset_read(bfs, filenum, buf+this_count, count-this_count, &sub_n_read); + if (r==0) { + *n_read = this_count + sub_n_read; + return 0; + } else { + // The error code will have been saved. We got some data so return that + *n_read = this_count; + return 0; + } + } else { + // There is nothing in buf[0]. So we need to swap buffers + toku_mutex_lock(&bfs->mutex); + while (1) { + if (dbf->second_buf_ready) { + dbf->n_in_buf[0] = dbf->n_in_buf[1]; + { + char *tmp = dbf->buf[0]; + dbf->buf[0] = dbf->buf[1]; + dbf->buf[1] = tmp; + } + dbf->error_code[0] = dbf->error_code[1]; + dbf->second_buf_ready = false; + dbf->offset_in_buf = 0; + if (!dbf->io_done) { + // Don't enqueue it if the I/O is all done. + //printf("%s:%d enq [%ld]\n", __FILE__, __LINE__, dbf-&bfs->files[0]); + enq(bfs, dbf); + } + toku_cond_broadcast(&bfs->cond); + toku_mutex_unlock(&bfs->mutex); + if (dbf->error_code[0]==0) { + assert(dbf->n_in_buf[0]>0); + return dbufio_fileset_read(bfs, filenum, buf_v, count, n_read); + } else { + *n_read = 0; + return dbf->error_code[0]; + } + } else { + toku_cond_wait(&bfs->cond, &bfs->mutex); + } + } + assert(0); // cannot get here. + } +} + +void +dbufio_print(DBUFIO_FILESET bfs) { + fprintf(stderr, "%s:%d bfs=%p", __FILE__, __LINE__, bfs); + if (bfs->panic) + fprintf(stderr, " panic=%d", bfs->panic_errno); + fprintf(stderr, " N=%d %d %" PRIuMAX, bfs->N, bfs->n_not_done, bfs->bufsize); + for (int i = 0; i < bfs->N; i++) { + struct dbufio_file *dbf = &bfs->files[i]; + if (dbf->error_code[0] || dbf->error_code[1]) + fprintf(stderr, " %d=[%d,%d]", i, dbf->error_code[0], dbf->error_code[1]); + } + fprintf(stderr, "\n"); + +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/dbufio.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/dbufio.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/dbufio.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/dbufio.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,110 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." + +#include +#include + +/* Maintain a set of files for reading, with double buffering for the reads. */ + +/* A DBUFIO_FILESET is a set of files. The files are indexed from 0 to N-1, where N is specified when the set is created (and the files are also provided when the set is creaed). */ +/* An implementation would typically use a separate thread or asynchronous I/O to fetch ahead data for each file. The system will typically fill two buffers of size M for each file. One buffer is being read out of using dbuf_read(), and the other buffer is either empty (waiting on the asynchronous I/O to start), being filled in by the asynchronous I/O mechanism, or is waiting for the caller to read data from it. */ +typedef struct dbufio_fileset *DBUFIO_FILESET; + +int create_dbufio_fileset (DBUFIO_FILESET *bfsp, int N, int fds[/*N*/], size_t bufsize, bool compressed); + +int destroy_dbufio_fileset(DBUFIO_FILESET); + +int dbufio_fileset_read (DBUFIO_FILESET bfs, int filenum, void *buf_v, size_t count, size_t *n_read); + +int panic_dbufio_fileset(DBUFIO_FILESET, int error); + +void dbufio_print(DBUFIO_FILESET); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/loader.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/loader.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/loader.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/loader.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,3342 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/leafentry.h" +#include "ft/loader/loader-internal.h" +#include "ft/loader/pqueue.h" +#include "ft/loader/dbufio.h" +#include "ft/logger/log-internal.h" +#include "ft/node.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/serialize/sub_block.h" + +#include "util/x1764.h" + +static size_t (*os_fwrite_fun)(const void *,size_t,size_t,FILE*)=NULL; +void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) { + os_fwrite_fun=fwrite_fun; +} + +static size_t do_fwrite (const void *ptr, size_t size, size_t nmemb, FILE *stream) { + if (os_fwrite_fun) { + return os_fwrite_fun(ptr, size, nmemb, stream); + } else { + return fwrite(ptr, size, nmemb, stream); + } +} + + +// 1024 is the right size_factor for production. +// Different values for these sizes may be used for testing. +static uint32_t size_factor = 1024; +static uint32_t default_loader_nodesize = FT_DEFAULT_NODE_SIZE; +static uint32_t default_loader_basementnodesize = FT_DEFAULT_BASEMENT_NODE_SIZE; + +void +toku_ft_loader_set_size_factor(uint32_t factor) { +// For test purposes only + size_factor = factor; + default_loader_nodesize = (size_factor==1) ? (1<<15) : FT_DEFAULT_NODE_SIZE; +} + +uint64_t +toku_ft_loader_get_rowset_budget_for_testing (void) +// For test purposes only. In production, the rowset size is determined by negotation with the cachetable for some memory. (See #2613). +{ + return 16ULL*size_factor*1024ULL; +} + +void ft_loader_lock_init(FTLOADER bl) { + invariant(!bl->mutex_init); + toku_mutex_init(&bl->mutex, NULL); + bl->mutex_init = true; +} + +void ft_loader_lock_destroy(FTLOADER bl) { + if (bl->mutex_init) { + toku_mutex_destroy(&bl->mutex); + bl->mutex_init = false; + } +} + +static void ft_loader_lock(FTLOADER bl) { + invariant(bl->mutex_init); + toku_mutex_lock(&bl->mutex); +} + +static void ft_loader_unlock(FTLOADER bl) { + invariant(bl->mutex_init); + toku_mutex_unlock(&bl->mutex); +} + +static int add_big_buffer(struct file_info *file) { + int result = 0; + bool newbuffer = false; + if (file->buffer == NULL) { + file->buffer = toku_malloc(file->buffer_size); + if (file->buffer == NULL) + result = get_error_errno(); + else + newbuffer = true; + } + if (result == 0) { + int r = setvbuf(file->file, (char *) file->buffer, _IOFBF, file->buffer_size); + if (r != 0) { + result = get_error_errno(); + if (newbuffer) { + toku_free(file->buffer); + file->buffer = NULL; + } + } + } + return result; +} + +static void cleanup_big_buffer(struct file_info *file) { + if (file->buffer) { + toku_free(file->buffer); + file->buffer = NULL; + } +} + +int ft_loader_init_file_infos (struct file_infos *fi) { + int result = 0; + toku_mutex_init(&fi->lock, NULL); + fi->n_files = 0; + fi->n_files_limit = 1; + fi->n_files_open = 0; + fi->n_files_extant = 0; + MALLOC_N(fi->n_files_limit, fi->file_infos); + if (fi->file_infos == NULL) + result = get_error_errno(); + return result; +} + +void ft_loader_fi_destroy (struct file_infos *fi, bool is_error) +// Effect: Free the resources in the fi. +// If is_error then we close and unlink all the temp files. +// If !is_error then requires that all the temp files have been closed and destroyed +// No error codes are returned. If anything goes wrong with closing and unlinking then it's only in an is_error case, so we don't care. +{ + if (fi->file_infos == NULL) { + // ft_loader_init_file_infos guarantees this isn't null, so if it is, we know it hasn't been inited yet and we don't need to destroy it. + return; + } + toku_mutex_destroy(&fi->lock); + if (!is_error) { + invariant(fi->n_files_open==0); + invariant(fi->n_files_extant==0); + } + for (int i=0; in_files; i++) { + if (fi->file_infos[i].is_open) { + invariant(is_error); + toku_os_fclose(fi->file_infos[i].file); // don't check for errors, since we are in an error case. + } + if (fi->file_infos[i].is_extant) { + invariant(is_error); + unlink(fi->file_infos[i].fname); + toku_free(fi->file_infos[i].fname); + } + cleanup_big_buffer(&fi->file_infos[i]); + } + toku_free(fi->file_infos); + fi->n_files=0; + fi->n_files_limit=0; + fi->file_infos = NULL; +} + +static int open_file_add (struct file_infos *fi, + FILE *file, + char *fname, + /* out */ FIDX *idx) +{ + int result = 0; + toku_mutex_lock(&fi->lock); + if (fi->n_files >= fi->n_files_limit) { + fi->n_files_limit *=2; + XREALLOC_N(fi->n_files_limit, fi->file_infos); + } + invariant(fi->n_files < fi->n_files_limit); + fi->file_infos[fi->n_files].is_open = true; + fi->file_infos[fi->n_files].is_extant = true; + fi->file_infos[fi->n_files].fname = fname; + fi->file_infos[fi->n_files].file = file; + fi->file_infos[fi->n_files].n_rows = 0; + fi->file_infos[fi->n_files].buffer_size = FILE_BUFFER_SIZE; + fi->file_infos[fi->n_files].buffer = NULL; + result = add_big_buffer(&fi->file_infos[fi->n_files]); + if (result == 0) { + idx->idx = fi->n_files; + fi->n_files++; + fi->n_files_extant++; + fi->n_files_open++; + } + toku_mutex_unlock(&fi->lock); + return result; +} + +int ft_loader_fi_reopen (struct file_infos *fi, FIDX idx, const char *mode) { + int result = 0; + toku_mutex_lock(&fi->lock); + int i = idx.idx; + invariant(i>=0 && in_files); + invariant(!fi->file_infos[i].is_open); + invariant(fi->file_infos[i].is_extant); + fi->file_infos[i].file = toku_os_fopen(fi->file_infos[i].fname, mode); + if (fi->file_infos[i].file == NULL) { + result = get_error_errno(); + } else { + fi->file_infos[i].is_open = true; + // No longer need the big buffer for reopened files. Don't allocate the space, we need it elsewhere. + //add_big_buffer(&fi->file_infos[i]); + fi->n_files_open++; + } + toku_mutex_unlock(&fi->lock); + return result; +} + +int ft_loader_fi_close (struct file_infos *fi, FIDX idx, bool require_open) +{ + int result = 0; + toku_mutex_lock(&fi->lock); + invariant(idx.idx >=0 && idx.idx < fi->n_files); + if (fi->file_infos[idx.idx].is_open) { + invariant(fi->n_files_open>0); // loader-cleanup-test failure + fi->n_files_open--; + fi->file_infos[idx.idx].is_open = false; + int r = toku_os_fclose(fi->file_infos[idx.idx].file); + if (r) + result = get_error_errno(); + cleanup_big_buffer(&fi->file_infos[idx.idx]); + } else if (require_open) + result = EINVAL; + toku_mutex_unlock(&fi->lock); + return result; +} + +int ft_loader_fi_unlink (struct file_infos *fi, FIDX idx) { + int result = 0; + toku_mutex_lock(&fi->lock); + int id = idx.idx; + invariant(id >=0 && id < fi->n_files); + if (fi->file_infos[id].is_extant) { // must still exist + invariant(fi->n_files_extant>0); + fi->n_files_extant--; + invariant(!fi->file_infos[id].is_open); // must be closed before we unlink + fi->file_infos[id].is_extant = false; + int r = unlink(fi->file_infos[id].fname); + if (r != 0) + result = get_error_errno(); + toku_free(fi->file_infos[id].fname); + fi->file_infos[id].fname = NULL; + } else + result = EINVAL; + toku_mutex_unlock(&fi->lock); + return result; +} + +int +ft_loader_fi_close_all(struct file_infos *fi) { + int rval = 0; + for (int i = 0; i < fi->n_files; i++) { + int r; + FIDX idx = { i }; + r = ft_loader_fi_close(fi, idx, false); // ignore files that are already closed + if (rval == 0 && r) + rval = r; // capture first error + } + return rval; +} + +int ft_loader_open_temp_file (FTLOADER bl, FIDX *file_idx) +/* Effect: Open a temporary file in read-write mode. Save enough information to close and delete the file later. + * Return value: 0 on success, an error number otherwise. + * On error, *file_idx and *fnamep will be unmodified. + * The open file will be saved in bl->file_infos so that even if errors happen we can free them all. + */ +{ + int result = 0; + if (result) // debug hack + return result; + FILE *f = NULL; + int fd = -1; + char *fname = toku_strdup(bl->temp_file_template); + if (fname == NULL) + result = get_error_errno(); + else { + fd = mkstemp(fname); + if (fd < 0) { + result = get_error_errno(); + } else { + f = toku_os_fdopen(fd, "r+"); + if (f == NULL) + result = get_error_errno(); + else + result = open_file_add(&bl->file_infos, f, fname, file_idx); + } + } + if (result != 0) { + if (fd >= 0) { + toku_os_close(fd); + unlink(fname); + } + if (f != NULL) + toku_os_fclose(f); // don't check for error because we're already in an error case + if (fname != NULL) + toku_free(fname); + } + return result; +} + +void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error) { + ft_loader_lock_destroy(bl); + + // These frees rely on the fact that if you free a NULL pointer then nothing bad happens. + toku_free(bl->dbs); + toku_free(bl->descriptors); + toku_free(bl->root_xids_that_created); + if (bl->new_fnames_in_env) { + for (int i = 0; i < bl->N; i++) + toku_free((char*)bl->new_fnames_in_env[i]); + toku_free(bl->new_fnames_in_env); + } + toku_free(bl->extracted_datasizes); + toku_free(bl->bt_compare_funs); + toku_free((char*)bl->temp_file_template); + ft_loader_fi_destroy(&bl->file_infos, is_error); + + for (int i = 0; i < bl->N; i++) + destroy_rowset(&bl->rows[i]); + toku_free(bl->rows); + + for (int i = 0; i < bl->N; i++) + destroy_merge_fileset(&bl->fs[i]); + toku_free(bl->fs); + + if (bl->last_key) { + for (int i=0; i < bl->N; i++) { + toku_free(bl->last_key[i].data); + } + toku_free(bl->last_key); + bl->last_key = NULL; + } + + destroy_rowset(&bl->primary_rowset); + if (bl->primary_rowset_queue) { + toku_queue_destroy(bl->primary_rowset_queue); + bl->primary_rowset_queue = nullptr; + } + + for (int i=0; iN; i++) { + if ( bl->fractal_queues ) { + invariant(bl->fractal_queues[i]==NULL); + } + } + toku_free(bl->fractal_threads); + toku_free(bl->fractal_queues); + toku_free(bl->fractal_threads_live); + + if (bl->did_reserve_memory) { + invariant(bl->cachetable); + toku_cachetable_release_reserved_memory(bl->cachetable, bl->reserved_memory); + } + + ft_loader_destroy_error_callback(&bl->error_callback); + ft_loader_destroy_poll_callback(&bl->poll_callback); + + //printf("Progress=%d/%d\n", bl->progress, PROGRESS_MAX); + + toku_free(bl); +} + +static void *extractor_thread (void*); + +#define MAX(a,b) (((a)<(b)) ? (b) : (a)) + +static uint64_t memory_per_rowset_during_extract (FTLOADER bl) +// Return how much memory can be allocated for each rowset. +{ + if (size_factor==1) { + return 16*1024; + } else { + // There is a primary rowset being maintained by the foreground thread. + // There could be two more in the queue. + // There is one rowset for each index (bl->N) being filled in. + // Later we may have sort_and_write operations spawning in parallel, and will need to account for that. + int n_copies = (1 // primary rowset + +EXTRACTOR_QUEUE_DEPTH // the number of primaries in the queue + +bl->N // the N rowsets being constructed by the extractor thread. + +bl->N // the N sort buffers + +1 // Give the extractor thread one more so that it can have temporary space for sorting. This is overkill. + ); + int64_t extra_reserved_memory = bl->N * FILE_BUFFER_SIZE; // for each index we are writing to a file at any given time. + int64_t tentative_rowset_size = ((int64_t)(bl->reserved_memory - extra_reserved_memory))/(n_copies); + return MAX(tentative_rowset_size, (int64_t)MIN_ROWSET_MEMORY); + } +} + +static unsigned ft_loader_get_fractal_workers_count(FTLOADER bl) { + unsigned w = 0; + while (1) { + ft_loader_lock(bl); + w = bl->fractal_workers; + ft_loader_unlock(bl); + if (w != 0) + break; + toku_pthread_yield(); // maybe use a cond var instead + } + return w; +} + +static void ft_loader_set_fractal_workers_count(FTLOADER bl) { + ft_loader_lock(bl); + if (bl->fractal_workers == 0) + bl->fractal_workers = 1; + ft_loader_unlock(bl); +} + +// To compute a merge, we have a certain amount of memory to work with. +// We perform only one fanin at a time. +// If the fanout is F then we are using +// F merges. Each merge uses +// DBUFIO_DEPTH buffers for double buffering. Each buffer is of size at least MERGE_BUF_SIZE +// so the memory is +// F*MERGE_BUF_SIZE*DBUFIO_DEPTH storage. +// We use some additional space to buffer the outputs. +// That's FILE_BUFFER_SIZE for writing to a merge file if we are writing to a mergefile. +// And we have FRACTAL_WRITER_ROWSETS*MERGE_BUF_SIZE per queue +// And if we are doing a fractal, each worker could have have a fractal tree that it's working on. +// +// DBUFIO_DEPTH*F*MERGE_BUF_SIZE + FRACTAL_WRITER_ROWSETS*MERGE_BUF_SIZE + WORKERS*NODESIZE*2 <= RESERVED_MEMORY + +static int64_t memory_avail_during_merge(FTLOADER bl, bool is_fractal_node) { + // avail memory = reserved memory - WORKERS*NODESIZE*2 for the last merge stage only + int64_t avail_memory = bl->reserved_memory; + if (is_fractal_node) { + // reserve space for the fractal writer thread buffers + avail_memory -= (int64_t)ft_loader_get_fractal_workers_count(bl) * (int64_t)default_loader_nodesize * 2; // compressed and uncompressed buffers + } + return avail_memory; +} + +static int merge_fanin (FTLOADER bl, bool is_fractal_node) { + // return number of temp files to read in this pass + int64_t memory_avail = memory_avail_during_merge(bl, is_fractal_node); + int64_t nbuffers = memory_avail / (int64_t)TARGET_MERGE_BUF_SIZE; + if (is_fractal_node) + nbuffers -= FRACTAL_WRITER_ROWSETS; + return MAX(nbuffers / (int64_t)DBUFIO_DEPTH, (int)MIN_MERGE_FANIN); +} + +static uint64_t memory_per_rowset_during_merge (FTLOADER bl, int merge_factor, bool is_fractal_node // if it is being sent to a q + ) { + int64_t memory_avail = memory_avail_during_merge(bl, is_fractal_node); + int64_t nbuffers = DBUFIO_DEPTH * merge_factor; + if (is_fractal_node) + nbuffers += FRACTAL_WRITER_ROWSETS; + return MAX(memory_avail / nbuffers, (int64_t)MIN_MERGE_BUF_SIZE); +} + +int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, + CACHETABLE cachetable, + generate_row_for_put_func g, + DB *src_db, + int N, FT_HANDLE fts[/*N*/], DB* dbs[/*N*/], + const char *new_fnames_in_env[/*N*/], + ft_compare_func bt_compare_functions[/*N*/], + const char *temp_file_template, + LSN load_lsn, + TOKUTXN txn, + bool reserve_memory, + uint64_t reserve_memory_size, + bool compress_intermediates, + bool allow_puts) +// Effect: Allocate and initialize a FTLOADER, but do not create the extractor thread. +{ + FTLOADER CALLOC(bl); // initialized to all zeros (hence CALLOC) + if (!bl) return get_error_errno(); + + bl->generate_row_for_put = g; + bl->cachetable = cachetable; + if (reserve_memory && bl->cachetable) { + bl->did_reserve_memory = true; + bl->reserved_memory = toku_cachetable_reserve_memory(bl->cachetable, 2.0/3.0, reserve_memory_size); // allocate 2/3 of the unreserved part (which is 3/4 of the memory to start with). + } + else { + bl->did_reserve_memory = false; + bl->reserved_memory = 512*1024*1024; // if no cache table use 512MB. + } + bl->compress_intermediates = compress_intermediates; + bl->allow_puts = allow_puts; + bl->src_db = src_db; + bl->N = N; + bl->load_lsn = load_lsn; + if (txn) { + bl->load_root_xid = txn->txnid.parent_id64; + } + else { + bl->load_root_xid = TXNID_NONE; + } + + ft_loader_init_error_callback(&bl->error_callback); + ft_loader_init_poll_callback(&bl->poll_callback); + +#define MY_CALLOC_N(n,v) CALLOC_N(n,v); if (!v) { int r = get_error_errno(); toku_ft_loader_internal_destroy(bl, true); return r; } +#define SET_TO_MY_STRDUP(lval, s) do { char *v = toku_strdup(s); if (!v) { int r = get_error_errno(); toku_ft_loader_internal_destroy(bl, true); return r; } lval = v; } while (0) + + MY_CALLOC_N(N, bl->root_xids_that_created); + for (int i=0; iroot_xids_that_created[i]=fts[i]->ft->h->root_xid_that_created; + MY_CALLOC_N(N, bl->dbs); + for (int i=0; idbs[i]=dbs[i]; + MY_CALLOC_N(N, bl->descriptors); + for (int i=0; idescriptors[i]=&fts[i]->ft->descriptor; + MY_CALLOC_N(N, bl->new_fnames_in_env); + for (int i=0; inew_fnames_in_env[i], new_fnames_in_env[i]); + MY_CALLOC_N(N, bl->extracted_datasizes); // the calloc_n zeroed everything, which is what we want + MY_CALLOC_N(N, bl->bt_compare_funs); + for (int i=0; ibt_compare_funs[i] = bt_compare_functions[i]; + + MY_CALLOC_N(N, bl->fractal_queues); + for (int i=0; ifractal_queues[i]=NULL; + MY_CALLOC_N(N, bl->fractal_threads); + MY_CALLOC_N(N, bl->fractal_threads_live); + for (int i=0; ifractal_threads_live[i] = false; + + { + int r = ft_loader_init_file_infos(&bl->file_infos); + if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; } + } + + SET_TO_MY_STRDUP(bl->temp_file_template, temp_file_template); + + bl->n_rows = 0; + bl->progress = 0; + bl->progress_callback_result = 0; + + MY_CALLOC_N(N, bl->rows); + MY_CALLOC_N(N, bl->fs); + MY_CALLOC_N(N, bl->last_key); + for(int i=0;irows[i], memory_per_rowset_during_extract(bl)); + if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; } + } + init_merge_fileset(&bl->fs[i]); + bl->last_key[i].flags = DB_DBT_REALLOC; // don't really need this, but it's nice to maintain it. We use ulen to keep track of the realloced space. + } + + { + int r = init_rowset(&bl->primary_rowset, memory_per_rowset_during_extract(bl)); + if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; } + } + { int r = toku_queue_create(&bl->primary_rowset_queue, EXTRACTOR_QUEUE_DEPTH); + if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; } + } + { + ft_loader_lock_init(bl); + } + + *blp = bl; + + return 0; +} + +int toku_ft_loader_open (FTLOADER *blp, /* out */ + CACHETABLE cachetable, + generate_row_for_put_func g, + DB *src_db, + int N, FT_HANDLE fts[/*N*/], DB* dbs[/*N*/], + const char *new_fnames_in_env[/*N*/], + ft_compare_func bt_compare_functions[/*N*/], + const char *temp_file_template, + LSN load_lsn, + TOKUTXN txn, + bool reserve_memory, + uint64_t reserve_memory_size, + bool compress_intermediates, + bool allow_puts) { +// Effect: called by DB_ENV->create_loader to create an ft loader. +// Arguments: +// blp Return a ft loader ("bulk loader") here. +// g The function for generating a row +// src_db The source database. Needed by g. May be NULL if that's ok with g. +// N The number of dbs to create. +// dbs An array of open databases. Used by g. The data will be put in these database. +// new_fnames The file names (these strings are owned by the caller: we make a copy for our own purposes). +// temp_file_template A template suitable for mkstemp() +// reserve_memory Cause the loader to reserve memory for its use from the cache table. +// compress_intermediates Cause the loader to compress intermediate loader files. +// allow_puts Prepare the loader for rows to insert. When puts are disabled, the loader does not run the +// extractor or the fractal tree writer threads. +// Return value: 0 on success, an error number otherwise. + int result = 0; + { + int r = toku_ft_loader_internal_init(blp, cachetable, g, src_db, + N, fts, dbs, + new_fnames_in_env, + bt_compare_functions, + temp_file_template, + load_lsn, + txn, + reserve_memory, + reserve_memory_size, + compress_intermediates, + allow_puts); + if (r!=0) result = r; + } + if (result==0 && allow_puts) { + FTLOADER bl = *blp; + int r = toku_pthread_create(&bl->extractor_thread, NULL, extractor_thread, (void*)bl); + if (r==0) { + bl->extractor_live = true; + } else { + result = r; + (void) toku_ft_loader_internal_destroy(bl, true); + } + } + return result; +} + +static void ft_loader_set_panic(FTLOADER bl, int error, bool callback, int which_db, DBT *key, DBT *val) { + DB *db = nullptr; + if (bl && bl->dbs && which_db >= 0 && which_db < bl->N) { + db = bl->dbs[which_db]; + } + int r = ft_loader_set_error(&bl->error_callback, error, db, which_db, key, val); + if (r == 0 && callback) + ft_loader_call_error_function(&bl->error_callback); +} + +// One of the tests uses this. +FILE *toku_bl_fidx2file (FTLOADER bl, FIDX i) { + toku_mutex_lock(&bl->file_infos.lock); + invariant(i.idx >=0 && i.idx < bl->file_infos.n_files); + invariant(bl->file_infos.file_infos[i.idx].is_open); + FILE *result=bl->file_infos.file_infos[i.idx].file; + toku_mutex_unlock(&bl->file_infos.lock); + return result; +} + +static int bl_finish_compressed_write(FILE *stream, struct wbuf *wb) { + int r; + char *compressed_buf = NULL; + const size_t data_size = wb->ndone; + invariant(data_size > 0); + invariant(data_size <= MAX_UNCOMPRESSED_BUF); + + int n_sub_blocks = 0; + int sub_block_size = 0; + + r = choose_sub_block_size(wb->ndone, max_sub_blocks, &sub_block_size, &n_sub_blocks); + invariant(r==0); + invariant(0 < n_sub_blocks && n_sub_blocks <= max_sub_blocks); + invariant(sub_block_size > 0); + + struct sub_block sub_block[max_sub_blocks]; + // set the initial sub block size for all of the sub blocks + for (int i = 0; i < n_sub_blocks; i++) { + sub_block_init(&sub_block[i]); + } + set_all_sub_block_sizes(data_size, sub_block_size, n_sub_blocks, sub_block); + + size_t compressed_len = get_sum_compressed_size_bound(n_sub_blocks, sub_block, TOKU_DEFAULT_COMPRESSION_METHOD); + const size_t sub_block_header_len = sub_block_header_size(n_sub_blocks); + const size_t other_overhead = sizeof(uint32_t); //total_size + const size_t header_len = sub_block_header_len + other_overhead; + MALLOC_N(header_len + compressed_len, compressed_buf); + if (compressed_buf == nullptr) { + return ENOMEM; + } + + // compress all of the sub blocks + char *uncompressed_ptr = (char*)wb->buf; + char *compressed_ptr = compressed_buf + header_len; + compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block, uncompressed_ptr, compressed_ptr, + get_num_cores(), get_ft_pool(), TOKU_DEFAULT_COMPRESSION_METHOD); + + //total_size does NOT include itself + uint32_t total_size = compressed_len + sub_block_header_len; + // serialize the sub block header + uint32_t *ptr = (uint32_t *)(compressed_buf); + *ptr++ = toku_htod32(total_size); + *ptr++ = toku_htod32(n_sub_blocks); + for (int i=0; indone = 0; + + size_t size_to_write = total_size + 4; // Includes writing total_size + + { + size_t written = do_fwrite(compressed_buf, 1, size_to_write, stream); + if (written!=size_to_write) { + if (os_fwrite_fun) // if using hook to induce artificial errors (for testing) ... + r = get_maybe_error_errno(); // ... then there is no error in the stream, but there is one in errno + else + r = ferror(stream); + invariant(r!=0); + goto exit; + } + } + r = 0; +exit: + if (compressed_buf) { + toku_free(compressed_buf); + } + return r; +} + +static int bl_compressed_write(void *ptr, size_t nbytes, FILE *stream, struct wbuf *wb) { + invariant(wb->size <= MAX_UNCOMPRESSED_BUF); + size_t bytes_left = nbytes; + char *buf = (char*)ptr; + + while (bytes_left > 0) { + size_t bytes_to_copy = bytes_left; + if (wb->ndone + bytes_to_copy > wb->size) { + bytes_to_copy = wb->size - wb->ndone; + } + wbuf_nocrc_literal_bytes(wb, buf, bytes_to_copy); + if (wb->ndone == wb->size) { + //Compress, write to disk, and empty out wb + int r = bl_finish_compressed_write(stream, wb); + if (r != 0) { + errno = r; + return -1; + } + wb->ndone = 0; + } + bytes_left -= bytes_to_copy; + buf += bytes_to_copy; + } + return 0; +} + +static int bl_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream, struct wbuf *wb, FTLOADER bl) +/* Effect: this is a wrapper for fwrite that returns 0 on success, otherwise returns an error number. + * Arguments: + * ptr the data to be writen. + * size the amount of data to be written. + * nmemb the number of units of size to be written. + * stream write the data here. + * wb where to write uncompressed data (if we're compressing) or ignore if NULL + * bl passed so we can panic the ft_loader if something goes wrong (recording the error number). + * Return value: 0 on success, an error number otherwise. + */ +{ + if (!bl->compress_intermediates || !wb) { + size_t r = do_fwrite(ptr, size, nmemb, stream); + if (r!=nmemb) { + int e; + if (os_fwrite_fun) // if using hook to induce artificial errors (for testing) ... + e = get_maybe_error_errno(); // ... then there is no error in the stream, but there is one in errno + else + e = ferror(stream); + invariant(e!=0); + return e; + } + } else { + size_t num_bytes = size * nmemb; + int r = bl_compressed_write(ptr, num_bytes, stream, wb); + if (r != 0) { + return r; + } + } + return 0; +} + +static int bl_fread (void *ptr, size_t size, size_t nmemb, FILE *stream) +/* Effect: this is a wrapper for fread that returns 0 on success, otherwise returns an error number. + * Arguments: + * ptr read data into here. + * size size of data element to be read. + * nmemb number of data elements to be read. + * stream where to read the data from. + * Return value: 0 on success, an error number otherwise. + */ +{ + size_t r = fread(ptr, size, nmemb, stream); + if (r==0) { + if (feof(stream)) return EOF; + else { + do_error: ; + int e = ferror(stream); + // r == 0 && !feof && e == 0, how does this happen? invariant(e!=0); + return e; + } + } else if (rsize; + if ((r=bl_fwrite(&dlen, sizeof(dlen), 1, datafile, wb, bl))) return r; + if ((r=bl_fwrite(dbt->data, 1, dlen, datafile, wb, bl))) return r; + if (dataoff) + *dataoff += dlen + sizeof(dlen); + return 0; +} + +static int bl_read_dbt (/*in*/DBT *dbt, FILE *stream) +{ + int len; + { + int r; + if ((r = bl_fread(&len, sizeof(len), 1, stream))) return r; + invariant(len>=0); + } + if ((int)dbt->ulenulen=len; dbt->data=toku_xrealloc(dbt->data, len); } + { + int r; + if ((r = bl_fread(dbt->data, 1, len, stream))) return r; + } + dbt->size = len; + return 0; +} + +static int bl_read_dbt_from_dbufio (/*in*/DBT *dbt, DBUFIO_FILESET bfs, int filenum) +{ + int result = 0; + uint32_t len; + { + size_t n_read; + int r = dbufio_fileset_read(bfs, filenum, &len, sizeof(len), &n_read); + if (r!=0) { + result = r; + } else if (n_readulendata, len); + if (data==NULL) { + result = get_error_errno(); + } else { + dbt->ulen=len; + dbt->data=data; + } + } + } + if (result==0) { + size_t n_read; + int r = dbufio_fileset_read(bfs, filenum, dbt->data, len, &n_read); + if (r!=0) { + result = r; + } else if (n_readsize = len; + } + } + return result; +} + + +int loader_write_row(DBT *key, DBT *val, FIDX data, FILE *dataf, uint64_t *dataoff, struct wbuf *wb, FTLOADER bl) +/* Effect: Given a key and a val (both DBTs), write them to a file. Increment *dataoff so that it's up to date. + * Arguments: + * key, val write these. + * data the file to write them to + * dataoff a pointer to a counter that keeps track of the amount of data written so far. + * wb a pointer (possibly NULL) to buffer uncompressed output + * bl the ft_loader (passed so we can panic if needed). + * Return value: 0 on success, an error number otherwise. + */ +{ + //int klen = key->size; + //int vlen = val->size; + int r; + // we have a chance to handle the errors because when we close we can delete all the files. + if ((r=bl_write_dbt(key, dataf, dataoff, wb, bl))) return r; + if ((r=bl_write_dbt(val, dataf, dataoff, wb, bl))) return r; + toku_mutex_lock(&bl->file_infos.lock); + bl->file_infos.file_infos[data.idx].n_rows++; + toku_mutex_unlock(&bl->file_infos.lock); + return 0; +} + +int loader_read_row (FILE *f, DBT *key, DBT *val) +/* Effect: Read a key value pair from a file. The DBTs must have DB_DBT_REALLOC set. + * Arguments: + * f where to read it from. + * key, val read it into these. + * bl passed so we can panic if needed. + * Return value: 0 on success, an error number otherwise. + * Requires: The DBTs must have DB_DBT_REALLOC + */ +{ + { + int r = bl_read_dbt(key, f); + if (r!=0) return r; + } + { + int r = bl_read_dbt(val, f); + if (r!=0) return r; + } + return 0; +} + +static int loader_read_row_from_dbufio (DBUFIO_FILESET bfs, int filenum, DBT *key, DBT *val) +/* Effect: Read a key value pair from a file. The DBTs must have DB_DBT_REALLOC set. + * Arguments: + * f where to read it from. + * key, val read it into these. + * bl passed so we can panic if needed. + * Return value: 0 on success, an error number otherwise. + * Requires: The DBTs must have DB_DBT_REALLOC + */ +{ + { + int r = bl_read_dbt_from_dbufio(key, bfs, filenum); + if (r!=0) return r; + } + { + int r = bl_read_dbt_from_dbufio(val, bfs, filenum); + if (r!=0) return r; + } + return 0; +} + + +int init_rowset (struct rowset *rows, uint64_t memory_budget) +/* Effect: Initialize a collection of rows to be empty. */ +{ + int result = 0; + + rows->memory_budget = memory_budget; + + rows->rows = NULL; + rows->data = NULL; + + rows->n_rows = 0; + rows->n_rows_limit = 100; + MALLOC_N(rows->n_rows_limit, rows->rows); + if (rows->rows == NULL) + result = get_error_errno(); + rows->n_bytes = 0; + rows->n_bytes_limit = (size_factor==1) ? 1024*size_factor*16 : memory_budget; + //printf("%s:%d n_bytes_limit=%ld (size_factor based limit=%d)\n", __FILE__, __LINE__, rows->n_bytes_limit, 1024*size_factor*16); + rows->data = (char *) toku_malloc(rows->n_bytes_limit); + if (rows->rows==NULL || rows->data==NULL) { + if (result == 0) + result = get_error_errno(); + toku_free(rows->rows); + toku_free(rows->data); + rows->rows = NULL; + rows->data = NULL; + } + return result; +} + +static void zero_rowset (struct rowset *rows) { + memset(rows, 0, sizeof(*rows)); +} + +void destroy_rowset (struct rowset *rows) { + if ( rows ) { + toku_free(rows->data); + toku_free(rows->rows); + zero_rowset(rows); + } +} + +static int row_wont_fit (struct rowset *rows, size_t size) +/* Effect: Return nonzero if adding a row of size SIZE would be too big (bigger than the buffer limit) */ +{ + // Account for the memory used by the data and also the row structures. + size_t memory_in_use = (rows->n_rows*sizeof(struct row) + + rows->n_bytes); + return (rows->memory_budget < memory_in_use + size); +} + +int add_row (struct rowset *rows, DBT *key, DBT *val) +/* Effect: add a row to a collection. */ +{ + int result = 0; + if (rows->n_rows >= rows->n_rows_limit) { + struct row *old_rows = rows->rows; + size_t old_n_rows_limit = rows->n_rows_limit; + rows->n_rows_limit *= 2; + REALLOC_N(rows->n_rows_limit, rows->rows); + if (rows->rows == NULL) { + result = get_error_errno(); + rows->rows = old_rows; + rows->n_rows_limit = old_n_rows_limit; + return result; + } + } + size_t off = rows->n_bytes; + size_t next_off = off + key->size + val->size; + + struct row newrow; + memset(&newrow, 0, sizeof newrow); newrow.off = off; newrow.klen = key->size; newrow.vlen = val->size; + + rows->rows[rows->n_rows++] = newrow; + if (next_off > rows->n_bytes_limit) { + size_t old_n_bytes_limit = rows->n_bytes_limit; + while (next_off > rows->n_bytes_limit) { + rows->n_bytes_limit = rows->n_bytes_limit*2; + } + invariant(next_off <= rows->n_bytes_limit); + char *old_data = rows->data; + REALLOC_N(rows->n_bytes_limit, rows->data); + if (rows->data == NULL) { + result = get_error_errno(); + rows->data = old_data; + rows->n_bytes_limit = old_n_bytes_limit; + return result; + } + } + memcpy(rows->data+off, key->data, key->size); + memcpy(rows->data+off+key->size, val->data, val->size); + rows->n_bytes = next_off; + return result; +} + +static int process_primary_rows (FTLOADER bl, struct rowset *primary_rowset); + +static int finish_primary_rows_internal (FTLOADER bl) +// now we have been asked to finish up. +// Be sure to destroy the rowsets. +{ + int *MALLOC_N(bl->N, ra); + if (ra==NULL) return get_error_errno(); + + for (int i = 0; i < bl->N; i++) { + //printf("%s:%d extractor finishing index %d with %ld rows\n", __FILE__, __LINE__, i, rows->n_rows); + ra[i] = sort_and_write_rows(bl->rows[i], &(bl->fs[i]), bl, i, bl->dbs[i], bl->bt_compare_funs[i]); + zero_rowset(&bl->rows[i]); + } + + // accept any of the error codes (in this case, the last one). + int r = 0; + for (int i = 0; i < bl->N; i++) + if (ra[i] != 0) + r = ra[i]; + + toku_free(ra); + return r; +} + +static int finish_primary_rows (FTLOADER bl) { + return finish_primary_rows_internal (bl); +} + +static void* extractor_thread (void *blv) { + FTLOADER bl = (FTLOADER)blv; + int r = 0; + while (1) { + void *item; + { + int rq = toku_queue_deq(bl->primary_rowset_queue, &item, NULL, NULL); + if (rq==EOF) break; + invariant(rq==0); // other errors are arbitrarily bad. + } + struct rowset *primary_rowset = (struct rowset *)item; + + //printf("%s:%d extractor got %ld rows\n", __FILE__, __LINE__, primary_rowset.n_rows); + + // Now we have some rows to output + { + r = process_primary_rows(bl, primary_rowset); + if (r) + ft_loader_set_panic(bl, r, false, 0, nullptr, nullptr); + } + } + + //printf("%s:%d extractor finishing\n", __FILE__, __LINE__); + if (r == 0) { + r = finish_primary_rows(bl); + if (r) + ft_loader_set_panic(bl, r, false, 0, nullptr, nullptr); + + } + return NULL; +} + +static void enqueue_for_extraction (FTLOADER bl) { + //printf("%s:%d enqueing %ld items\n", __FILE__, __LINE__, bl->primary_rowset.n_rows); + struct rowset *XMALLOC(enqueue_me); + *enqueue_me = bl->primary_rowset; + zero_rowset(&bl->primary_rowset); + int r = toku_queue_enq(bl->primary_rowset_queue, (void*)enqueue_me, 1, NULL); + resource_assert_zero(r); +} + +static int loader_do_put(FTLOADER bl, + DBT *pkey, + DBT *pval) +{ + int result; + result = add_row(&bl->primary_rowset, pkey, pval); + if (result == 0 && row_wont_fit(&bl->primary_rowset, 0)) { + // queue the rows for further processing by the extractor thread. + //printf("%s:%d please extract %ld\n", __FILE__, __LINE__, bl->primary_rowset.n_rows); + enqueue_for_extraction(bl); + { + int r = init_rowset(&bl->primary_rowset, memory_per_rowset_during_extract(bl)); + // bl->primary_rowset will get destroyed by toku_ft_loader_abort + if (r != 0) + result = r; + } + } + return result; +} + +static int +finish_extractor (FTLOADER bl) { + //printf("%s:%d now finishing extraction\n", __FILE__, __LINE__); + + int rval; + + if (bl->primary_rowset.n_rows>0) { + enqueue_for_extraction(bl); + } else { + destroy_rowset(&bl->primary_rowset); + } + //printf("%s:%d please finish extraction\n", __FILE__, __LINE__); + { + int r = toku_queue_eof(bl->primary_rowset_queue); + invariant(r==0); + } + //printf("%s:%d joining\n", __FILE__, __LINE__); + { + void *toku_pthread_retval; + int r = toku_pthread_join(bl->extractor_thread, &toku_pthread_retval); + resource_assert_zero(r); + invariant(toku_pthread_retval == NULL); + bl->extractor_live = false; + } + { + int r = toku_queue_destroy(bl->primary_rowset_queue); + invariant(r==0); + bl->primary_rowset_queue = nullptr; + } + + rval = ft_loader_fi_close_all(&bl->file_infos); + + //printf("%s:%d joined\n", __FILE__, __LINE__); + return rval; +} + +static const DBT zero_dbt = {0,0,0,0}; + +static DBT make_dbt (void *data, uint32_t size) { + DBT result = zero_dbt; + result.data = data; + result.size = size; + return result; +} + +#define inc_error_count() error_count++ + +static TXNID leafentry_xid(FTLOADER bl, int which_db) { + TXNID le_xid = TXNID_NONE; + if (bl->root_xids_that_created && bl->load_root_xid != bl->root_xids_that_created[which_db]) + le_xid = bl->load_root_xid; + return le_xid; +} + +size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid) { + size_t s = 0; + if (xid == TXNID_NONE) + s = LE_CLEAN_MEMSIZE(val_size) + key_size + sizeof(uint32_t); + else + s = LE_MVCC_COMMITTED_MEMSIZE(val_size) + key_size + sizeof(uint32_t); + return s; +} + +static int process_primary_rows_internal (FTLOADER bl, struct rowset *primary_rowset) +// process the rows in primary_rowset, and then destroy the rowset. +// if FLUSH is true then write all the buffered rows out. +// if primary_rowset is NULL then treat it as empty. +{ + int error_count = 0; + int *XMALLOC_N(bl->N, error_codes); + + // If we parallelize the first for loop, dest_keys/dest_vals init&cleanup need to move inside + DBT_ARRAY dest_keys; + DBT_ARRAY dest_vals; + toku_dbt_array_init(&dest_keys, 1); + toku_dbt_array_init(&dest_vals, 1); + + for (int i = 0; i < bl->N; i++) { + unsigned int klimit,vlimit; // maximum row sizes. + toku_ft_get_maximum_advised_key_value_lengths(&klimit, &vlimit); + + error_codes[i] = 0; + struct rowset *rows = &(bl->rows[i]); + struct merge_fileset *fs = &(bl->fs[i]); + ft_compare_func compare = bl->bt_compare_funs[i]; + + // Don't parallelize this loop, or we have to lock access to add_row() which would be a lot of overehad. + // Also this way we can reuse the DB_DBT_REALLOC'd values inside dest_keys/dest_vals without a race. + for (size_t prownum=0; prownumn_rows; prownum++) { + if (error_count) break; + + struct row *prow = &primary_rowset->rows[prownum]; + DBT pkey = zero_dbt; + DBT pval = zero_dbt; + pkey.data = primary_rowset->data + prow->off; + pkey.size = prow->klen; + pval.data = primary_rowset->data + prow->off + prow->klen; + pval.size = prow->vlen; + + + DBT_ARRAY key_array; + DBT_ARRAY val_array; + if (bl->dbs[i] != bl->src_db) { + int r = bl->generate_row_for_put(bl->dbs[i], bl->src_db, &dest_keys, &dest_vals, &pkey, &pval); + if (r != 0) { + error_codes[i] = r; + inc_error_count(); + break; + } + paranoid_invariant(dest_keys.size <= dest_keys.capacity); + paranoid_invariant(dest_vals.size <= dest_vals.capacity); + paranoid_invariant(dest_keys.size == dest_vals.size); + + key_array = dest_keys; + val_array = dest_vals; + } else { + key_array.size = key_array.capacity = 1; + key_array.dbts = &pkey; + + val_array.size = val_array.capacity = 1; + val_array.dbts = &pval; + } + for (uint32_t row = 0; row < key_array.size; row++) { + DBT *dest_key = &key_array.dbts[row]; + DBT *dest_val = &val_array.dbts[row]; + if (dest_key->size > klimit) { + error_codes[i] = EINVAL; + fprintf(stderr, "Key too big (keysize=%d bytes, limit=%d bytes)\n", dest_key->size, klimit); + inc_error_count(); + break; + } + if (dest_val->size > vlimit) { + error_codes[i] = EINVAL; + fprintf(stderr, "Row too big (rowsize=%d bytes, limit=%d bytes)\n", dest_val->size, vlimit); + inc_error_count(); + break; + } + + bl->extracted_datasizes[i] += ft_loader_leafentry_size(dest_key->size, dest_val->size, leafentry_xid(bl, i)); + + if (row_wont_fit(rows, dest_key->size + dest_val->size)) { + //printf("%s:%d rows.n_rows=%ld rows.n_bytes=%ld\n", __FILE__, __LINE__, rows->n_rows, rows->n_bytes); + int r = sort_and_write_rows(*rows, fs, bl, i, bl->dbs[i], compare); // cannot spawn this because of the race on rows. If we were to create a new rows, and if sort_and_write_rows were to destroy the rows it is passed, we could spawn it, however. + // If we do spawn this, then we must account for the additional storage in the memory_per_rowset() function. + init_rowset(rows, memory_per_rowset_during_extract(bl)); // we passed the contents of rows to sort_and_write_rows. + if (r != 0) { + error_codes[i] = r; + inc_error_count(); + break; + } + } + int r = add_row(rows, dest_key, dest_val); + if (r != 0) { + error_codes[i] = r; + inc_error_count(); + break; + } + } + } + } + toku_dbt_array_destroy(&dest_keys); + toku_dbt_array_destroy(&dest_vals); + + destroy_rowset(primary_rowset); + toku_free(primary_rowset); + int r = 0; + if (error_count > 0) { + for (int i=0; iN; i++) { + if (error_codes[i]) { + r = error_codes[i]; + ft_loader_set_panic(bl, r, false, i, nullptr, nullptr); + } + } + invariant(r); // found the error + } + toku_free(error_codes); + return r; +} + +static int process_primary_rows (FTLOADER bl, struct rowset *primary_rowset) { + int r = process_primary_rows_internal (bl, primary_rowset); + return r; +} + +int toku_ft_loader_put (FTLOADER bl, DBT *key, DBT *val) +/* Effect: Put a key-value pair into the ft loader. Called by DB_LOADER->put(). + * Return value: 0 on success, an error number otherwise. + */ +{ + if (!bl->allow_puts || ft_loader_get_error(&bl->error_callback)) + return EINVAL; // previous panic + bl->n_rows++; + return loader_do_put(bl, key, val); +} + +void toku_ft_loader_set_n_rows(FTLOADER bl, uint64_t n_rows) { + bl->n_rows = n_rows; +} + +uint64_t toku_ft_loader_get_n_rows(FTLOADER bl) { + return bl->n_rows; +} + +int merge_row_arrays_base (struct row dest[/*an+bn*/], struct row a[/*an*/], int an, struct row b[/*bn*/], int bn, + int which_db, DB *dest_db, ft_compare_func compare, + + FTLOADER bl, + struct rowset *rowset) +/* Effect: Given two arrays of rows, a and b, merge them using the comparison function, and write them into dest. + * This function is suitable for use in a mergesort. + * If a pair of duplicate keys is ever noticed, then call the error_callback function (if it exists), and return DB_KEYEXIST. + * Arguments: + * dest write the rows here + * a,b the rows being merged + * an,bn the lenth of a and b respectively. + * dest_db We need the dest_db to run the comparison function. + * compare We need the compare function for the dest_db. + */ +{ + while (an>0 && bn>0) { + DBT akey; memset(&akey, 0, sizeof akey); akey.data=rowset->data+a->off; akey.size=a->klen; + DBT bkey; memset(&bkey, 0, sizeof bkey); bkey.data=rowset->data+b->off; bkey.size=b->klen; + + int compare_result = compare(dest_db, &akey, &bkey); + if (compare_result==0) { + if (bl->error_callback.error_callback) { + DBT aval; memset(&aval, 0, sizeof aval); aval.data=rowset->data + a->off + a->klen; aval.size = a->vlen; + ft_loader_set_error(&bl->error_callback, DB_KEYEXIST, dest_db, which_db, &akey, &aval); + } + return DB_KEYEXIST; + } else if (compare_result<0) { + // a is smaller + *dest = *a; + dest++; a++; an--; + } else { + *dest = *b; + dest++; b++; bn--; + } + } + while (an>0) { + *dest = *a; + dest++; a++; an--; + } + while (bn>0) { + *dest = *b; + dest++; b++; bn--; + } + return 0; +} + +static int binary_search (int *location, + const DBT *key, + struct row a[/*an*/], int an, + int abefore, + int which_db, DB *dest_db, ft_compare_func compare, + FTLOADER bl, + struct rowset *rowset) +// Given a sorted array of rows a, and a dbt key, find the first row in a that is > key. +// If no such row exists, then consider the result to be equal to an. +// On success store abefore+the index into *location +// Return 0 on success. +// Return DB_KEYEXIST if we find a row that is equal to key. +{ + if (an==0) { + *location = abefore; + return 0; + } else { + int a2 = an/2; + DBT akey = make_dbt(rowset->data+a[a2].off, a[a2].klen); + int compare_result = compare(dest_db, key, &akey); + if (compare_result==0) { + if (bl->error_callback.error_callback) { + DBT aval = make_dbt(rowset->data + a[a2].off + a[a2].klen, a[a2].vlen); + ft_loader_set_error(&bl->error_callback, DB_KEYEXIST, dest_db, which_db, &akey, &aval); + } + return DB_KEYEXIST; + } else if (compare_result<0) { + // key is before a2 + if (an==1) { + *location = abefore; + return 0; + } else { + return binary_search(location, key, + a, a2, + abefore, + which_db, dest_db, compare, bl, rowset); + } + } else { + // key is after a2 + if (an==1) { + *location = abefore + 1; + return 0; + } else { + return binary_search(location, key, + a+a2, an-a2, + abefore+a2, + which_db, dest_db, compare, bl, rowset); + } + } + } +} + + +#define SWAP(typ,x,y) { typ tmp = x; x=y; y=tmp; } + +static int merge_row_arrays (struct row dest[/*an+bn*/], struct row a[/*an*/], int an, struct row b[/*bn*/], int bn, + int which_db, DB *dest_db, ft_compare_func compare, + FTLOADER bl, + struct rowset *rowset) +/* Effect: Given two sorted arrays of rows, a and b, merge them using the comparison function, and write them into dest. + * Arguments: + * dest write the rows here + * a,b the rows being merged + * an,bn the lenth of a and b respectively. + * dest_db We need the dest_db to run the comparison function. + * compare We need the compare function for the dest_db. + */ +{ + if (an + bn < 10000) { + return merge_row_arrays_base(dest, a, an, b, bn, which_db, dest_db, compare, bl, rowset); + } + if (an < bn) { + SWAP(struct row *,a, b) + SWAP(int ,an,bn) + } + // an >= bn + int a2 = an/2; + DBT akey = make_dbt(rowset->data+a[a2].off, a[a2].klen); + int b2 = 0; // initialize to zero so we can add the answer in. + { + int r = binary_search(&b2, &akey, b, bn, 0, which_db, dest_db, compare, bl, rowset); + if (r!=0) return r; // for example if we found a duplicate, called the error_callback, and now we return an error code. + } + int ra, rb; + ra = merge_row_arrays(dest, a, a2, b, b2, which_db, dest_db, compare, bl, rowset); + rb = merge_row_arrays(dest+a2+b2, a+a2, an-a2, b+b2, bn-b2, which_db, dest_db, compare, bl, rowset); + if (ra!=0) return ra; + else return rb; +} + +int mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func compare, FTLOADER bl, struct rowset *rowset) +/* Sort an array of rows (using mergesort). + * Arguments: + * rows sort this array of rows. + * n the length of the array. + * dest_db used by the comparison function. + * compare the compare function + */ +{ + if (n<=1) return 0; // base case is sorted + int mid = n/2; + int r1, r2; + r1 = mergesort_row_array (rows, mid, which_db, dest_db, compare, bl, rowset); + + // Don't spawn this one explicitly + r2 = mergesort_row_array (rows+mid, n-mid, which_db, dest_db, compare, bl, rowset); + + if (r1!=0) return r1; + if (r2!=0) return r2; + + struct row *MALLOC_N(n, tmp); + if (tmp == NULL) return get_error_errno(); + { + int r = merge_row_arrays(tmp, rows, mid, rows+mid, n-mid, which_db, dest_db, compare, bl, rowset); + if (r!=0) { + toku_free(tmp); + return r; + } + } + memcpy(rows, tmp, sizeof(*tmp)*n); + toku_free(tmp); + return 0; +} + +// C function for testing mergesort_row_array +int ft_loader_mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func compare, FTLOADER bl, struct rowset *rowset) { + return mergesort_row_array (rows, n, which_db, dest_db, compare, bl, rowset); +} + +static int sort_rows (struct rowset *rows, int which_db, DB *dest_db, ft_compare_func compare, + FTLOADER bl) +/* Effect: Sort a collection of rows. + * If any duplicates are found, then call the error_callback function and return non zero. + * Otherwise return 0. + * Arguments: + * rowset the */ +{ + return mergesort_row_array(rows->rows, rows->n_rows, which_db, dest_db, compare, bl, rows); +} + +/* filesets Maintain a collection of files. Typically these files are each individually sorted, and we will merge them. + * These files have two parts, one is for the data rows, and the other is a collection of offsets so we an more easily parallelize the manipulation (e.g., by allowing us to find the offset of the ith row quickly). */ + +void init_merge_fileset (struct merge_fileset *fs) +/* Effect: Initialize a fileset */ +{ + fs->have_sorted_output = false; + fs->sorted_output = FIDX_NULL; + fs->prev_key = zero_dbt; + fs->prev_key.flags = DB_DBT_REALLOC; + + fs->n_temp_files = 0; + fs->n_temp_files_limit = 0; + fs->data_fidxs = NULL; +} + +void destroy_merge_fileset (struct merge_fileset *fs) +/* Effect: Destroy a fileset. */ +{ + if ( fs ) { + toku_destroy_dbt(&fs->prev_key); + fs->n_temp_files = 0; + fs->n_temp_files_limit = 0; + toku_free(fs->data_fidxs); + fs->data_fidxs = NULL; + } +} + + +static int extend_fileset (FTLOADER bl, struct merge_fileset *fs, FIDX*ffile) +/* Effect: Add two files (one for data and one for idx) to the fileset. + * Arguments: + * bl the ft_loader (needed to panic if anything goes wrong, and also to get the temp_file_template. + * fs the fileset + * ffile the data file (which will be open) + * fidx the index file (which will be open) + */ +{ + FIDX sfile; + int r; + r = ft_loader_open_temp_file(bl, &sfile); if (r!=0) return r; + + if (fs->n_temp_files+1 > fs->n_temp_files_limit) { + fs->n_temp_files_limit = (fs->n_temp_files+1)*2; + XREALLOC_N(fs->n_temp_files_limit, fs->data_fidxs); + } + fs->data_fidxs[fs->n_temp_files] = sfile; + fs->n_temp_files++; + + *ffile = sfile; + return 0; +} + +// RFP maybe this should be buried in the ft_loader struct +static toku_mutex_t update_progress_lock = TOKU_MUTEX_INITIALIZER; + +static int update_progress (int N, + FTLOADER bl, + const char *UU(message)) +{ + // Must protect the increment and the call to the poll_function. + toku_mutex_lock(&update_progress_lock); + bl->progress+=N; + + int result; + if (bl->progress_callback_result == 0) { + //printf(" %20s: %d ", message, bl->progress); + result = ft_loader_call_poll_function(&bl->poll_callback, (float)bl->progress/(float)PROGRESS_MAX); + if (result!=0) { + bl->progress_callback_result = result; + } + } else { + result = bl->progress_callback_result; + } + toku_mutex_unlock(&update_progress_lock); + return result; +} + + +static int write_rowset_to_file (FTLOADER bl, FIDX sfile, const struct rowset rows) { + int r = 0; + // Allocate a buffer if we're compressing intermediates. + char *uncompressed_buffer = nullptr; + if (bl->compress_intermediates) { + MALLOC_N(MAX_UNCOMPRESSED_BUF, uncompressed_buffer); + if (uncompressed_buffer == nullptr) { + return ENOMEM; + } + } + struct wbuf wb; + wbuf_init(&wb, uncompressed_buffer, MAX_UNCOMPRESSED_BUF); + + FILE *sstream = toku_bl_fidx2file(bl, sfile); + for (size_t i=0; icompress_intermediates && wb.ndone > 0) { + r = bl_finish_compressed_write(sstream, &wb); + if (r != 0) { + goto exit; + } + } + r = 0; +exit: + if (uncompressed_buffer) { + toku_free(uncompressed_buffer); + } + return r; +} + + +int sort_and_write_rows (struct rowset rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func compare) +/* Effect: Given a rowset, sort it and write it to a temporary file. + * Note: The loader maintains for each index the most recently written-to file, as well as the DBT for the last key written into that file. + * If this rowset is sorted and all greater than that dbt, then we append to the file (skipping the sort, and reducing the number of temporary files). + * Arguments: + * rows the rowset + * fs the fileset into which the sorted data will be added + * bl the ft_loader + * dest_db the DB, needed for the comparison function. + * compare The comparison function. + * Returns 0 on success, otherwise an error number. + * Destroy the rowset after finishing it. + * Note: There is no sense in trying to calculate progress by this function since it's done concurrently with the loader->put operation. + * Note first time called: invariant: fs->have_sorted_output == false + */ +{ + //printf(" sort_and_write use %d progress=%d fin at %d\n", progress_allocation, bl->progress, bl->progress+progress_allocation); + + // TODO: erase the files, and deal with all the cleanup on error paths + //printf("%s:%d sort_rows n_rows=%ld\n", __FILE__, __LINE__, rows->n_rows); + //bl_time_t before_sort = bl_time_now(); + + int result; + if (rows.n_rows == 0) { + result = 0; + } else { + result = sort_rows(&rows, which_db, dest_db, compare, bl); + + //bl_time_t after_sort = bl_time_now(); + + if (result == 0) { + DBT min_rowset_key = make_dbt(rows.data+rows.rows[0].off, rows.rows[0].klen); + if (fs->have_sorted_output && compare(dest_db, &fs->prev_key, &min_rowset_key) < 0) { + // write everything to the same output if the max key in the temp file (prev_key) is < min of the sorted rowset + result = write_rowset_to_file(bl, fs->sorted_output, rows); + if (result == 0) { + // set the max key in the temp file to the max key in the sorted rowset + result = toku_dbt_set(rows.rows[rows.n_rows-1].klen, rows.data + rows.rows[rows.n_rows-1].off, &fs->prev_key, NULL); + } + } else { + // write the sorted rowset into a new temp file + if (fs->have_sorted_output) { + fs->have_sorted_output = false; + result = ft_loader_fi_close(&bl->file_infos, fs->sorted_output, true); + } + if (result == 0) { + FIDX sfile = FIDX_NULL; + result = extend_fileset(bl, fs, &sfile); + if (result == 0) { + result = write_rowset_to_file(bl, sfile, rows); + if (result == 0) { + fs->have_sorted_output = true; fs->sorted_output = sfile; + // set the max key in the temp file to the max key in the sorted rowset + result = toku_dbt_set(rows.rows[rows.n_rows-1].klen, rows.data + rows.rows[rows.n_rows-1].off, &fs->prev_key, NULL); + } + } + } + // Note: if result == 0 then invariant fs->have_sorted_output == true + } + } + } + + destroy_rowset(&rows); + + //bl_time_t after_write = bl_time_now(); + + return result; +} + +// C function for testing sort_and_write_rows +int ft_loader_sort_and_write_rows (struct rowset *rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func compare) { + return sort_and_write_rows (*rows, fs, bl, which_db, dest_db, compare); +} + +int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q, int n_sources, DBUFIO_FILESET bfs, FIDX srcs_fidxs[/*n_sources*/], FTLOADER bl, int which_db, DB *dest_db, ft_compare_func compare, int progress_allocation) +/* Effect: Given an array of FILE*'s each containing sorted, merge the data and write it to an output. All the files remain open after the merge. + * This merge is performed in one pass, so don't pass too many files in. If you need a tree of merges do it elsewhere. + * If TO_Q is true then we write rowsets into queue Q. Otherwise we write into dest_data. + * Modifies: May modify the arrays of files (but if modified, it must be a permutation so the caller can use that array to close everything.) + * Requires: The number of sources is at least one, and each of the input files must have at least one row in it. + * Arguments: + * to_q boolean indicating that output is queue (true) or a file (false) + * dest_data where to write the sorted data + * q where to write the sorted data + * n_sources how many source files. + * srcs_data the array of source data files. + * bl the ft_loader. + * dest_db the destination DB (used in the comparison function). + * Return value: 0 on success, otherwise an error number. + * The fidxs are not closed by this function. + */ +{ + int result = 0; + + FILE *dest_stream = to_q ? NULL : toku_bl_fidx2file(bl, dest_data); + + //printf(" merge_some_files progress=%d fin at %d\n", bl->progress, bl->progress+progress_allocation); + DBT keys[n_sources]; + DBT vals[n_sources]; + uint64_t dataoff[n_sources]; + DBT zero = zero_dbt; zero.flags=DB_DBT_REALLOC; + + for (int i=0; ierror_callback); + if (r!=0) result = r; + } + + uint64_t n_rows = 0; + if (result==0) { + // load pqueue with first value from each source + for (int i=0; ifile_infos.lock); + n_rows += bl->file_infos.file_infos[srcs_fidxs[i].idx].n_rows; + toku_mutex_unlock(&bl->file_infos.lock); + } + } + uint64_t n_rows_done = 0; + + struct rowset *output_rowset = NULL; + if (result==0 && to_q) { + XMALLOC(output_rowset); // freed in cleanup + int r = init_rowset(output_rowset, memory_per_rowset_during_merge(bl, n_sources, to_q)); + if (r!=0) result = r; + } + + // Allocate a buffer if we're compressing intermediates. + char *uncompressed_buffer = nullptr; + struct wbuf wb; + if (bl->compress_intermediates && !to_q) { + MALLOC_N(MAX_UNCOMPRESSED_BUF, uncompressed_buffer); + if (uncompressed_buffer == nullptr) { + result = ENOMEM; + } + } + wbuf_init(&wb, uncompressed_buffer, MAX_UNCOMPRESSED_BUF); + + //printf(" n_rows=%ld\n", n_rows); + while (result==0 && pqueue_size(pq)>0) { + int mini; + { + // get the minimum + pqueue_node_t *node; + int r = pqueue_pop(pq, &node); + if (r!=0) { + result = r; + invariant(0); + break; + } + mini = node->i; + } + if (to_q) { + if (row_wont_fit(output_rowset, keys[mini].size + vals[mini].size)) { + { + int r = toku_queue_enq(q, (void*)output_rowset, 1, NULL); + if (r!=0) { + result = r; + break; + } + } + XMALLOC(output_rowset); // freed in cleanup + { + int r = init_rowset(output_rowset, memory_per_rowset_during_merge(bl, n_sources, to_q)); + if (r!=0) { + result = r; + break; + } + } + } + { + int r = add_row(output_rowset, &keys[mini], &vals[mini]); + if (r!=0) { + result = r; + break; + } + } + } else { + // write it to the dest file + int r = loader_write_row(&keys[mini], &vals[mini], dest_data, dest_stream, &dataoff[mini], &wb, bl); + if (r!=0) { + result = r; + break; + } + } + + { + // read next row from file that just sourced min value + int r = loader_read_row_from_dbufio(bfs, mini, &keys[mini], &vals[mini]); + if (r!=0) { + if (r==EOF) { + // on feof, queue size permanently smaller + toku_free(keys[mini].data); keys[mini].data = NULL; + toku_free(vals[mini].data); vals[mini].data = NULL; + } else { + fprintf(stderr, "%s:%d r=%d errno=%d bfs=%p mini=%d\n", __FILE__, __LINE__, r, get_maybe_error_errno(), bfs, mini); + dbufio_print(bfs); + result = r; + break; + } + } else { + // insert value into queue (re-populate queue) + pq_nodes[mini].key = &keys[mini]; + r = pqueue_insert(pq, &pq_nodes[mini]); + if (r!=0) { + // Note: This error path tested by loader-dup-test1.tdbrun (and by loader-dup-test4) + result = r; + // printf("%s:%d returning\n", __FILE__, __LINE__); + break; + } + } + } + + n_rows_done++; + const uint64_t rows_per_report = size_factor*1024; + if (n_rows_done%rows_per_report==0) { + // need to update the progress. + double fraction_of_remaining_we_just_did = (double)rows_per_report / (double)(n_rows - n_rows_done + rows_per_report); + invariant(0<= fraction_of_remaining_we_just_did && fraction_of_remaining_we_just_did<=1); + int progress_just_done = fraction_of_remaining_we_just_did * progress_allocation; + progress_allocation -= progress_just_done; + // ignore the result from update_progress here, we'll call update_progress again below, which will give us the nonzero result. + int r = update_progress(progress_just_done, bl, "in file merge"); + if (0) printf("%s:%d Progress=%d\n", __FILE__, __LINE__, r); + } + } + if (result == 0 && uncompressed_buffer != nullptr && wb.ndone > 0) { + result = bl_finish_compressed_write(dest_stream, &wb); + } + + if (result==0 && to_q) { + int r = toku_queue_enq(q, (void*)output_rowset, 1, NULL); + if (r!=0) + result = r; + else + output_rowset = NULL; + } + + // cleanup + if (uncompressed_buffer) { + toku_free(uncompressed_buffer); + } + for (int i=0; icompress_intermediates); + if (r!=0) { result = r; } + } + + if (result==0) { + int r = toku_merge_some_files_using_dbufio (to_q, dest_data, q, n_sources, bfs, srcs_fidxs, bl, which_db, dest_db, compare, progress_allocation); + if (r!=0) { result = r; } + } + + if (bfs!=NULL) { + if (result != 0) + (void) panic_dbufio_fileset(bfs, result); + int r = destroy_dbufio_fileset(bfs); + if (r!=0 && result==0) result=r; + bfs = NULL; + } + if (fds!=NULL) { + toku_free(fds); + fds = NULL; + } + return result; +} + +static int int_min (int a, int b) +{ + if (a1) { + N = (N+B-1)/B; + result++; + } + return result; +} + +int merge_files (struct merge_fileset *fs, + FTLOADER bl, + // These are needed for the comparison function and error callback. + int which_db, DB *dest_db, ft_compare_func compare, + int progress_allocation, + // Write rowsets into this queue. + QUEUE output_q + ) +/* Effect: Given a fileset, merge all the files writing all the answers into a queue. + * All the files in fs, and any temporary files will be closed and unlinked (and the fileset will be empty) + * Return value: 0 on success, otherwise an error number. + * On error *fs will contain no open files. All the files (including any temporary files) will be closed and unlinked. + * (however the fs will still need to be deallocated.) + */ +{ + //printf(" merge_files %d files\n", fs->n_temp_files); + //printf(" merge_files use %d progress=%d fin at %d\n", progress_allocation, bl->progress, bl->progress+progress_allocation); + const int final_mergelimit = (size_factor == 1) ? 4 : merge_fanin(bl, true); // try for a merge to the leaf level + const int earlier_mergelimit = (size_factor == 1) ? 4 : merge_fanin(bl, false); // try for a merge at nonleaf. + int n_passes_left = (fs->n_temp_files<=final_mergelimit) + ? 1 + : 1+n_passes((fs->n_temp_files+final_mergelimit-1)/final_mergelimit, earlier_mergelimit); + // printf("%d files, %d on last pass, %d on earlier passes, %d passes\n", fs->n_temp_files, final_mergelimit, earlier_mergelimit, n_passes_left); + int result = 0; + while (fs->n_temp_files > 0) { + int progress_allocation_for_this_pass = progress_allocation/n_passes_left; + progress_allocation -= progress_allocation_for_this_pass; + //printf("%s:%d n_passes_left=%d progress_allocation_for_this_pass=%d\n", __FILE__, __LINE__, n_passes_left, progress_allocation_for_this_pass); + + invariant(fs->n_temp_files>0); + struct merge_fileset next_file_set; + bool to_queue = (bool)(fs->n_temp_files <= final_mergelimit); + init_merge_fileset(&next_file_set); + while (fs->n_temp_files>0) { + // grab some files and merge them. + int n_to_merge = int_min(to_queue?final_mergelimit:earlier_mergelimit, fs->n_temp_files); + + // We are about to do n_to_merge/n_temp_files of the remaining for this pass. + int progress_allocation_for_this_subpass = progress_allocation_for_this_pass * (double)n_to_merge / (double)fs->n_temp_files; + // printf("%s:%d progress_allocation_for_this_subpass=%d n_temp_files=%d b=%llu\n", __FILE__, __LINE__, progress_allocation_for_this_subpass, fs->n_temp_files, (long long unsigned) memory_per_rowset_during_merge(bl, n_to_merge, to_queue)); + progress_allocation_for_this_pass -= progress_allocation_for_this_subpass; + + //printf("%s:%d merging\n", __FILE__, __LINE__); + FIDX merged_data = FIDX_NULL; + + FIDX *XMALLOC_N(n_to_merge, data_fidxs); + for (int i=0; in_temp_files -1 -i; + FIDX fidx = fs->data_fidxs[idx]; + result = ft_loader_fi_reopen(&bl->file_infos, fidx, "r"); + if (result) break; + data_fidxs[i] = fidx; + } + if (result==0 && !to_queue) { + result = extend_fileset(bl, &next_file_set, &merged_data); + } + + if (result==0) { + result = merge_some_files(to_queue, merged_data, output_q, n_to_merge, data_fidxs, bl, which_db, dest_db, compare, progress_allocation_for_this_subpass); + // if result!=0, fall through + if (result==0) { + /*nothing*/;// this is gratuitous, but we need something to give code coverage tools to help us know that it's important to distinguish between result==0 and result!=0 + } + } + + //printf("%s:%d merged\n", __FILE__, __LINE__); + for (int i=0; ifile_infos, data_fidxs[i], true); + if (r!=0 && result==0) result = r; + } + { + int r = ft_loader_fi_unlink(&bl->file_infos, data_fidxs[i]); + if (r!=0 && result==0) result = r; + } + data_fidxs[i] = FIDX_NULL; + } + } + + fs->n_temp_files -= n_to_merge; + if (!to_queue && !fidx_is_null(merged_data)) { + int r = ft_loader_fi_close(&bl->file_infos, merged_data, true); + if (r!=0 && result==0) result = r; + } + toku_free(data_fidxs); + + if (result!=0) break; + } + + destroy_merge_fileset(fs); + *fs = next_file_set; + + // Update the progress + n_passes_left--; + + if (result==0) { invariant(progress_allocation_for_this_pass==0); } + + if (result!=0) break; + } + if (result) ft_loader_set_panic(bl, result, true, which_db, nullptr, nullptr); + + { + int r = toku_queue_eof(output_q); + if (r!=0 && result==0) result = r; + } + // It's conceivable that the progress_allocation could be nonzero (for example if bl->N==0) + { + int r = update_progress(progress_allocation, bl, "did merge_files"); + if (r!=0 && result==0) result = r; + } + return result; +} + +struct subtree_info { + int64_t block; +}; + +struct subtrees_info { + int64_t next_free_block; + int64_t n_subtrees; // was n_blocks + int64_t n_subtrees_limit; + struct subtree_info *subtrees; +}; + +static void subtrees_info_init(struct subtrees_info *p) { + p->next_free_block = p->n_subtrees = p->n_subtrees_limit = 0; + p->subtrees = NULL; +} + +static void subtrees_info_destroy(struct subtrees_info *p) { + toku_free(p->subtrees); + p->subtrees = NULL; +} + +static void allocate_node (struct subtrees_info *sts, int64_t b) { + if (sts->n_subtrees >= sts->n_subtrees_limit) { + sts->n_subtrees_limit *= 2; + XREALLOC_N(sts->n_subtrees_limit, sts->subtrees); + } + sts->subtrees[sts->n_subtrees].block = b; + sts->n_subtrees++; +} + +// dbuf will always contained 512-byte aligned buffer, but the length might not be a multiple of 512 bytes. If that's what you want, then pad it. +struct dbuf { + unsigned char *buf; + int buflen; + int off; + int error; +}; + +struct leaf_buf { + BLOCKNUM blocknum; + TXNID xid; + uint64_t nkeys, ndata, dsize; + FTNODE node; + XIDS xids; + uint64_t off; +}; + +struct translation { + int64_t off, size; +}; + +struct dbout { + int fd; + toku_off_t current_off; + + int64_t n_translations; + int64_t n_translations_limit; + struct translation *translation; + toku_mutex_t mutex; + FT ft; +}; + +static inline void dbout_init(struct dbout *out, FT ft) { + out->fd = -1; + out->current_off = 0; + out->n_translations = out->n_translations_limit = 0; + out->translation = NULL; + toku_mutex_init(&out->mutex, NULL); + out->ft = ft; +} + +static inline void dbout_destroy(struct dbout *out) { + if (out->fd >= 0) { + toku_os_close(out->fd); + out->fd = -1; + } + toku_free(out->translation); + out->translation = NULL; + toku_mutex_destroy(&out->mutex); +} + +static inline void dbout_lock(struct dbout *out) { + toku_mutex_lock(&out->mutex); +} + +static inline void dbout_unlock(struct dbout *out) { + toku_mutex_unlock(&out->mutex); +} + +static void seek_align_locked(struct dbout *out) { + toku_off_t old_current_off = out->current_off; + int alignment = 4096; + out->current_off += alignment-1; + out->current_off &= ~(alignment-1); + toku_off_t r = lseek(out->fd, out->current_off, SEEK_SET); + invariant(r==out->current_off); + invariant(out->current_off >= old_current_off); + invariant(out->current_off < old_current_off+alignment); + invariant(out->current_off % alignment == 0); +} + +static void seek_align(struct dbout *out) { + dbout_lock(out); + seek_align_locked(out); + dbout_unlock(out); +} + +static void dbuf_init (struct dbuf *dbuf) { + dbuf->buf = 0; + dbuf->buflen = 0; + dbuf->off = 0; + dbuf->error = 0; +} + +static void dbuf_destroy (struct dbuf *dbuf) { + toku_free(dbuf->buf); dbuf->buf = NULL; +} + +static int allocate_block (struct dbout *out, int64_t *ret_block_number) +// Return the new block number +{ + int result = 0; + dbout_lock(out); + int64_t block_number = out->n_translations; + if (block_number >= out->n_translations_limit) { + int64_t old_n_translations_limit = out->n_translations_limit; + struct translation *old_translation = out->translation; + if (out->n_translations_limit==0) { + out->n_translations_limit = 1; + } else { + out->n_translations_limit *= 2; + } + REALLOC_N(out->n_translations_limit, out->translation); + if (out->translation == NULL) { + result = get_error_errno(); + invariant(result); + out->n_translations_limit = old_n_translations_limit; + out->translation = old_translation; + goto cleanup; + } + } + out->n_translations++; + *ret_block_number = block_number; +cleanup: + dbout_unlock(out); + return result; +} + +static void putbuf_bytes (struct dbuf *dbuf, const void *bytes, int nbytes) { + if (!dbuf->error && dbuf->off + nbytes > dbuf->buflen) { + unsigned char *oldbuf = dbuf->buf; + int oldbuflen = dbuf->buflen; + dbuf->buflen += dbuf->off + nbytes; + dbuf->buflen *= 2; + REALLOC_N_ALIGNED(512, dbuf->buflen, dbuf->buf); + if (dbuf->buf == NULL) { + dbuf->error = get_error_errno(); + dbuf->buf = oldbuf; + dbuf->buflen = oldbuflen; + } + } + if (!dbuf->error) { + memcpy(dbuf->buf + dbuf->off, bytes, nbytes); + dbuf->off += nbytes; + } +} + +static void putbuf_int32 (struct dbuf *dbuf, int v) { + putbuf_bytes(dbuf, &v, 4); +} + +static void putbuf_int64 (struct dbuf *dbuf, long long v) { + putbuf_int32(dbuf, v>>32); + putbuf_int32(dbuf, v&0xFFFFFFFF); +} + +static struct leaf_buf *start_leaf (struct dbout *out, const DESCRIPTOR UU(desc), int64_t lblocknum, TXNID xid, uint32_t UU(target_nodesize)) { + invariant(lblocknum < out->n_translations_limit); + + struct leaf_buf *XMALLOC(lbuf); + lbuf->blocknum.b = lblocknum; + lbuf->xid = xid; + lbuf->nkeys = lbuf->ndata = lbuf->dsize = 0; + lbuf->off = 0; + + lbuf->xids = toku_xids_get_root_xids(); + if (xid != TXNID_NONE) { + XIDS new_xids = NULL; + int r = toku_xids_create_child(lbuf->xids, &new_xids, xid); + assert(r == 0 && new_xids); + toku_xids_destroy(&lbuf->xids); + lbuf->xids = new_xids; + } + + FTNODE XMALLOC(node); + toku_initialize_empty_ftnode(node, lbuf->blocknum, 0 /*height*/, 1 /*basement nodes*/, FT_LAYOUT_VERSION, 0); + BP_STATE(node, 0) = PT_AVAIL; + lbuf->node = node; + + return lbuf; +} + +static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progress_allocation, FTLOADER bl, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method); +static int write_nonleaves (FTLOADER bl, FIDX pivots_fidx, struct dbout *out, struct subtrees_info *sts, const DESCRIPTOR descriptor, uint32_t target_nodesize, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method); +static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen, int this_leafentry_size, STAT64INFO stats_to_update); +static int write_translation_table (struct dbout *out, long long *off_of_translation_p); +static int write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk); + +static void drain_writer_q(QUEUE q) { + void *item; + while (1) { + int r = toku_queue_deq(q, &item, NULL, NULL); + if (r == EOF) + break; + invariant(r == 0); + struct rowset *rowset = (struct rowset *) item; + destroy_rowset(rowset); + toku_free(rowset); + } +} + +static void cleanup_maxkey(DBT *maxkey) { + if (maxkey->flags == DB_DBT_REALLOC) { + toku_free(maxkey->data); + maxkey->data = NULL; + maxkey->flags = 0; + } +} + +static void update_maxkey(DBT *maxkey, DBT *key) { + cleanup_maxkey(maxkey); + *maxkey = *key; +} + +static int copy_maxkey(DBT *maxkey) { + DBT newkey; + toku_init_dbt_flags(&newkey, DB_DBT_REALLOC); + int r = toku_dbt_set(maxkey->size, maxkey->data, &newkey, NULL); + if (r == 0) + update_maxkey(maxkey, &newkey); + return r; +} + +static int toku_loader_write_ft_from_q (FTLOADER bl, + const DESCRIPTOR descriptor, + int fd, // write to here + int progress_allocation, + QUEUE q, + uint64_t total_disksize_estimate, + int which_db, + uint32_t target_nodesize, + uint32_t target_basementnodesize, + enum toku_compression_method target_compression_method, + uint32_t target_fanout) +// Effect: Consume a sequence of rowsets work from a queue, creating a fractal tree. Closes fd. +{ + // set the number of fractal tree writer threads so that we can partition memory in the merger + ft_loader_set_fractal_workers_count(bl); + + int result = 0; + int r; + + // The pivots file will contain all the pivot strings (in the form ) + // The pivots_fname is the name of the pivots file. + // Note that the pivots file will have one extra pivot in it (the last key in the dictionary) which will not appear in the tree. + int64_t n_pivots=0; // number of pivots in pivots_file + FIDX pivots_file; // the file + + r = ft_loader_open_temp_file (bl, &pivots_file); + if (r) { + result = r; + drain_writer_q(q); + r = toku_os_close(fd); + assert_zero(r); + return result; + } + FILE *pivots_stream = toku_bl_fidx2file(bl, pivots_file); + + TXNID root_xid_that_created = TXNID_NONE; + if (bl->root_xids_that_created) + root_xid_that_created = bl->root_xids_that_created[which_db]; + + // TODO: (Zardosht/Yoni/Leif), do this code properly + struct ft ft; + toku_ft_init(&ft, (BLOCKNUM){0}, bl->load_lsn, root_xid_that_created, target_nodesize, target_basementnodesize, target_compression_method, target_fanout); + + struct dbout out; + ZERO_STRUCT(out); + dbout_init(&out, &ft); + out.fd = fd; + out.current_off = 8192; // leave 8K reserved at beginning + out.n_translations = 3; // 3 translations reserved at the beginning + out.n_translations_limit = 4; + MALLOC_N(out.n_translations_limit, out.translation); + if (out.translation == NULL) { + result = get_error_errno(); + dbout_destroy(&out); + drain_writer_q(q); + toku_free(ft.h); + return result; + } + + // The blocks_array will contain all the block numbers that correspond to the pivots. Generally there should be one more block than pivot. + struct subtrees_info sts; + subtrees_info_init(&sts); + sts.next_free_block = 3; + sts.n_subtrees = 0; + sts.n_subtrees_limit = 1; + MALLOC_N(sts.n_subtrees_limit, sts.subtrees); + if (sts.subtrees == NULL) { + result = get_error_errno(); + subtrees_info_destroy(&sts); + dbout_destroy(&out); + drain_writer_q(q); + toku_free(ft.h); + return result; + } + + out.translation[0].off = -2LL; out.translation[0].size = 0; // block 0 is NULL + invariant(1==RESERVED_BLOCKNUM_TRANSLATION); + invariant(2==RESERVED_BLOCKNUM_DESCRIPTOR); + out.translation[1].off = -1; // block 1 is the block translation, filled in later + out.translation[2].off = -1; // block 2 is the descriptor + seek_align(&out); + int64_t lblock = 0; // make gcc --happy + result = allocate_block(&out, &lblock); + invariant(result == 0); // can not fail since translations reserved above + + TXNID le_xid = leafentry_xid(bl, which_db); + struct leaf_buf *lbuf = start_leaf(&out, descriptor, lblock, le_xid, target_nodesize); + uint64_t n_rows_remaining = bl->n_rows; + uint64_t old_n_rows_remaining = bl->n_rows; + + uint64_t used_estimate = 0; // how much diskspace have we used up? + + DBT maxkey = make_dbt(0, 0); // keep track of the max key of the current node + + STAT64INFO_S deltas = ZEROSTATS; + while (result == 0) { + void *item; + { + int rr = toku_queue_deq(q, &item, NULL, NULL); + if (rr == EOF) break; + if (rr != 0) { + ft_loader_set_panic(bl, rr, true, which_db, nullptr, nullptr); + break; + } + } + struct rowset *output_rowset = (struct rowset *)item; + + for (unsigned int i = 0; i < output_rowset->n_rows; i++) { + DBT key = make_dbt(output_rowset->data+output_rowset->rows[i].off, output_rowset->rows[i].klen); + DBT val = make_dbt(output_rowset->data+output_rowset->rows[i].off + output_rowset->rows[i].klen, output_rowset->rows[i].vlen); + + size_t this_leafentry_size = ft_loader_leafentry_size(key.size, val.size, le_xid); + + used_estimate += this_leafentry_size; + + // Spawn off a node if + // a) there is at least one row in it, and + // b) this item would make the nodesize too big, or + // c) the remaining amount won't fit in the current node and the current node's data is more than the remaining amount + uint64_t remaining_amount = total_disksize_estimate - used_estimate; + uint64_t used_here = lbuf->off + 1000; // leave 1000 for various overheads. + uint64_t target_size = (target_nodesize*7L)/8; // use only 7/8 of the node. + uint64_t used_here_with_next_key = used_here + this_leafentry_size; + if (lbuf->nkeys > 0 && + ((used_here_with_next_key >= target_size) || (used_here + remaining_amount >= target_size && lbuf->off > remaining_amount))) { + + int progress_this_node = progress_allocation * (double)(old_n_rows_remaining - n_rows_remaining)/(double)old_n_rows_remaining; + progress_allocation -= progress_this_node; + old_n_rows_remaining = n_rows_remaining; + + allocate_node(&sts, lblock); + + n_pivots++; + + invariant(maxkey.data != NULL); + if ((r = bl_write_dbt(&maxkey, pivots_stream, NULL, nullptr, bl))) { + ft_loader_set_panic(bl, r, true, which_db, nullptr, nullptr); + if (result == 0) result = r; + break; + } + + finish_leafnode(&out, lbuf, progress_this_node, bl, target_basementnodesize, target_compression_method); + lbuf = NULL; + + r = allocate_block(&out, &lblock); + if (r != 0) { + ft_loader_set_panic(bl, r, true, which_db, nullptr, nullptr); + if (result == 0) result = r; + break; + } + lbuf = start_leaf(&out, descriptor, lblock, le_xid, target_nodesize); + } + + add_pair_to_leafnode(lbuf, (unsigned char *) key.data, key.size, (unsigned char *) val.data, val.size, this_leafentry_size, &deltas); + n_rows_remaining--; + + update_maxkey(&maxkey, &key); // set the new maxkey to the current key + } + + r = copy_maxkey(&maxkey); // make a copy of maxkey before the rowset is destroyed + if (result == 0) + result = r; + destroy_rowset(output_rowset); + toku_free(output_rowset); + + if (result == 0) + result = ft_loader_get_error(&bl->error_callback); // check if an error was posted and terminate this quickly + } + + if (deltas.numrows || deltas.numbytes) { + toku_ft_update_stats(&ft.in_memory_stats, deltas); + } + + cleanup_maxkey(&maxkey); + + if (lbuf) { + allocate_node(&sts, lblock); + { + int p = progress_allocation/2; + finish_leafnode(&out, lbuf, p, bl, target_basementnodesize, target_compression_method); + progress_allocation -= p; + } + } + + + if (result == 0) { + result = ft_loader_get_error(&bl->error_callback); // if there were any prior errors then exit + } + + if (result != 0) goto error; + + // We haven't paniced, so the sum should add up. + invariant(used_estimate == total_disksize_estimate); + + n_pivots++; + + { + DBT key = make_dbt(0,0); // must write an extra DBT into the pivots file. + r = bl_write_dbt(&key, pivots_stream, NULL, nullptr, bl); + if (r) { + result = r; goto error; + } + } + + r = write_nonleaves(bl, pivots_file, &out, &sts, descriptor, target_nodesize, target_basementnodesize, target_compression_method); + if (r) { + result = r; goto error; + } + + { + invariant(sts.n_subtrees==1); + out.ft->h->root_blocknum = make_blocknum(sts.subtrees[0].block); + toku_free(sts.subtrees); sts.subtrees = NULL; + + // write the descriptor + { + seek_align(&out); + invariant(out.n_translations >= RESERVED_BLOCKNUM_DESCRIPTOR); + invariant(out.translation[RESERVED_BLOCKNUM_DESCRIPTOR].off == -1); + out.translation[RESERVED_BLOCKNUM_DESCRIPTOR].off = out.current_off; + size_t desc_size = 4+toku_serialize_descriptor_size(descriptor); + invariant(desc_size>0); + out.translation[RESERVED_BLOCKNUM_DESCRIPTOR].size = desc_size; + struct wbuf wbuf; + char *XMALLOC_N(desc_size, buf); + wbuf_init(&wbuf, buf, desc_size); + toku_serialize_descriptor_contents_to_wbuf(&wbuf, descriptor); + uint32_t checksum = toku_x1764_finish(&wbuf.checksum); + wbuf_int(&wbuf, checksum); + invariant(wbuf.ndone==desc_size); + r = toku_os_write(out.fd, wbuf.buf, wbuf.ndone); + out.current_off += desc_size; + toku_free(buf); // wbuf_destroy + if (r) { + result = r; goto error; + } + } + + long long off_of_translation; + r = write_translation_table(&out, &off_of_translation); + if (r) { + result = r; goto error; + } + + r = write_header(&out, off_of_translation, (out.n_translations+1)*16+4); + if (r) { + result = r; goto error; + } + + r = update_progress(progress_allocation, bl, "wrote tdb file"); + if (r) { + result = r; goto error; + } + } + + r = fsync(out.fd); + if (r) { + result = get_error_errno(); goto error; + } + + // Do we need to pay attention to user_said_stop? Or should the guy at the other end of the queue pay attention and send in an EOF. + + error: + { + int rr = toku_os_close(fd); + if (rr) + result = get_error_errno(); + } + out.fd = -1; + + subtrees_info_destroy(&sts); + dbout_destroy(&out); + drain_writer_q(q); + toku_free(ft.h); + + return result; +} + +int toku_loader_write_ft_from_q_in_C (FTLOADER bl, + const DESCRIPTOR descriptor, + int fd, // write to here + int progress_allocation, + QUEUE q, + uint64_t total_disksize_estimate, + int which_db, + uint32_t target_nodesize, + uint32_t target_basementnodesize, + enum toku_compression_method target_compression_method, + uint32_t target_fanout) +// This is probably only for testing. +{ + target_nodesize = target_nodesize == 0 ? default_loader_nodesize : target_nodesize; + target_basementnodesize = target_basementnodesize == 0 ? default_loader_basementnodesize : target_basementnodesize; + return toku_loader_write_ft_from_q (bl, descriptor, fd, progress_allocation, q, total_disksize_estimate, which_db, target_nodesize, target_basementnodesize, target_compression_method, target_fanout); +} + + +static void* fractal_thread (void *ftav) { + struct fractal_thread_args *fta = (struct fractal_thread_args *)ftav; + int r = toku_loader_write_ft_from_q (fta->bl, fta->descriptor, fta->fd, fta->progress_allocation, fta->q, fta->total_disksize_estimate, fta->which_db, fta->target_nodesize, fta->target_basementnodesize, fta->target_compression_method, fta->target_fanout); + fta->errno_result = r; + return NULL; +} + +static int loader_do_i (FTLOADER bl, + int which_db, + DB *dest_db, + ft_compare_func compare, + const DESCRIPTOR descriptor, + const char *new_fname, + int progress_allocation // how much progress do I need to add into bl->progress by the end.. + ) +/* Effect: Handle the file creating for one particular DB in the bulk loader. */ +/* Requires: The data is fully extracted, so we can do merges out of files and write the ft file. */ +{ + //printf("doing i use %d progress=%d fin at %d\n", progress_allocation, bl->progress, bl->progress+progress_allocation); + struct merge_fileset *fs = &(bl->fs[which_db]); + struct rowset *rows = &(bl->rows[which_db]); + invariant(rows->data==NULL); // the rows should be all cleaned up already + + int r = toku_queue_create(&bl->fractal_queues[which_db], FRACTAL_WRITER_QUEUE_DEPTH); + if (r) goto error; + + { + mode_t mode = S_IRUSR+S_IWUSR + S_IRGRP+S_IWGRP; + int fd = toku_os_open(new_fname, O_RDWR| O_CREAT | O_BINARY, mode); // #2621 + if (fd < 0) { + r = get_error_errno(); goto error; + } + + uint32_t target_nodesize, target_basementnodesize, target_fanout; + enum toku_compression_method target_compression_method; + r = dest_db->get_pagesize(dest_db, &target_nodesize); + invariant_zero(r); + r = dest_db->get_readpagesize(dest_db, &target_basementnodesize); + invariant_zero(r); + r = dest_db->get_compression_method(dest_db, &target_compression_method); + invariant_zero(r); + r = dest_db->get_fanout(dest_db, &target_fanout); + invariant_zero(r); + + if (bl->allow_puts) { + // a better allocation would be to figure out roughly how many merge passes we'll need. + int allocation_for_merge = (2*progress_allocation)/3; + progress_allocation -= allocation_for_merge; + + // This structure must stay live until the join below. + struct fractal_thread_args fta = { + bl, + descriptor, + fd, + progress_allocation, + bl->fractal_queues[which_db], + bl->extracted_datasizes[which_db], + 0, + which_db, + target_nodesize, + target_basementnodesize, + target_compression_method, + target_fanout + }; + + r = toku_pthread_create(bl->fractal_threads+which_db, NULL, fractal_thread, (void*)&fta); + if (r) { + int r2 __attribute__((__unused__)) = toku_queue_destroy(bl->fractal_queues[which_db]); + // ignore r2, since we already have an error + bl->fractal_queues[which_db] = nullptr; + goto error; + } + invariant(bl->fractal_threads_live[which_db]==false); + bl->fractal_threads_live[which_db] = true; + + r = merge_files(fs, bl, which_db, dest_db, compare, allocation_for_merge, bl->fractal_queues[which_db]); + + { + void *toku_pthread_retval; + int r2 = toku_pthread_join(bl->fractal_threads[which_db], &toku_pthread_retval); + invariant(fta.bl==bl); // this is a gratuitous assertion to make sure that the fta struct is still live here. A previous bug put that struct into a C block statement. + resource_assert_zero(r2); + invariant(toku_pthread_retval==NULL); + invariant(bl->fractal_threads_live[which_db]); + bl->fractal_threads_live[which_db] = false; + if (r == 0) r = fta.errno_result; + } + } else { + toku_queue_eof(bl->fractal_queues[which_db]); + r = toku_loader_write_ft_from_q(bl, descriptor, fd, progress_allocation, + bl->fractal_queues[which_db], bl->extracted_datasizes[which_db], which_db, + target_nodesize, target_basementnodesize, target_compression_method, target_fanout); + } + } + + error: // this is the cleanup code. Even if r==0 (no error) we fall through to here. + if (bl->fractal_queues[which_db]) { + int r2 = toku_queue_destroy(bl->fractal_queues[which_db]); + invariant(r2==0); + bl->fractal_queues[which_db] = nullptr; + } + + // if we get here we need to free up the merge_fileset and the rowset, as well as the keys + toku_free(rows->data); rows->data = NULL; + toku_free(rows->rows); rows->rows = NULL; + toku_free(fs->data_fidxs); fs->data_fidxs = NULL; + return r; +} + +static int toku_ft_loader_close_internal (FTLOADER bl) +/* Effect: Close the bulk loader. + * Return all the file descriptors in the array fds. */ +{ + int result = 0; + if (bl->N == 0) + result = update_progress(PROGRESS_MAX, bl, "done"); + else { + int remaining_progress = PROGRESS_MAX; + for (int i = 0; i < bl->N; i++) { + // Take the unallocated progress and divide it among the unfinished jobs. + // This calculation allocates all of the PROGRESS_MAX bits of progress to some job. + int allocate_here = remaining_progress/(bl->N - i); + remaining_progress -= allocate_here; + char *fname_in_cwd = toku_cachetable_get_fname_in_cwd(bl->cachetable, bl->new_fnames_in_env[i]); + result = loader_do_i(bl, i, bl->dbs[i], bl->bt_compare_funs[i], bl->descriptors[i], fname_in_cwd, allocate_here); + toku_free(fname_in_cwd); + if (result != 0) + goto error; + invariant(0 <= bl->progress && bl->progress <= PROGRESS_MAX); + } + if (result==0) invariant(remaining_progress==0); + + // fsync the directory containing the new tokudb files. + char *fname0 = toku_cachetable_get_fname_in_cwd(bl->cachetable, bl->new_fnames_in_env[0]); + int r = toku_fsync_directory(fname0); + toku_free(fname0); + if (r != 0) { + result = r; goto error; + } + } + invariant(bl->file_infos.n_files_open == 0); + invariant(bl->file_infos.n_files_extant == 0); + invariant(bl->progress == PROGRESS_MAX); + error: + toku_ft_loader_internal_destroy(bl, (bool)(result!=0)); + return result; +} + +int toku_ft_loader_close (FTLOADER bl, + ft_loader_error_func error_function, void *error_extra, + ft_loader_poll_func poll_function, void *poll_extra + ) +{ + int result = 0; + + int r; + + //printf("Closing\n"); + + ft_loader_set_error_function(&bl->error_callback, error_function, error_extra); + + ft_loader_set_poll_function(&bl->poll_callback, poll_function, poll_extra); + + if (bl->extractor_live) { + r = finish_extractor(bl); + if (r) + result = r; + invariant(!bl->extractor_live); + } else { + r = finish_primary_rows(bl); + if (r) + result = r; + } + + // check for an error during extraction + if (result == 0) { + r = ft_loader_call_error_function(&bl->error_callback); + if (r) + result = r; + } + + if (result == 0) { + r = toku_ft_loader_close_internal(bl); + if (r && result == 0) + result = r; + } else + toku_ft_loader_internal_destroy(bl, true); + + return result; +} + +int toku_ft_loader_finish_extractor(FTLOADER bl) { + int result = 0; + if (bl->extractor_live) { + int r = finish_extractor(bl); + if (r) + result = r; + invariant(!bl->extractor_live); + } else + result = EINVAL; + return result; +} + +int toku_ft_loader_abort(FTLOADER bl, bool is_error) +/* Effect : Abort the bulk loader, free ft_loader resources */ +{ + int result = 0; + + // cleanup the extractor thread + if (bl->extractor_live) { + int r = finish_extractor(bl); + if (r) + result = r; + invariant(!bl->extractor_live); + } + + for (int i = 0; i < bl->N; i++) + invariant(!bl->fractal_threads_live[i]); + + toku_ft_loader_internal_destroy(bl, is_error); + return result; +} + +int toku_ft_loader_get_error(FTLOADER bl, int *error) { + *error = ft_loader_get_error(&bl->error_callback); + return 0; +} + +static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen, int this_leafentry_size, STAT64INFO stats_to_update) { + lbuf->nkeys++; + lbuf->ndata++; + lbuf->dsize += keylen + vallen; + lbuf->off += this_leafentry_size; + + // append this key val pair to the leafnode + // #3588 TODO just make a clean ule and append it to the omt + // #3588 TODO can do the rebalancing here and avoid a lot of work later + FTNODE leafnode = lbuf->node; + uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); + DBT kdbt, vdbt; + ft_msg msg(toku_fill_dbt(&kdbt, key, keylen), toku_fill_dbt(&vdbt, val, vallen), FT_INSERT, ZERO_MSN, lbuf->xids); + uint64_t workdone = 0; + // there's no mvcc garbage in a bulk-loaded FT, so there's no need to pass useful gc info + txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true); + toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, &workdone, stats_to_update); +} + +static int write_literal(struct dbout *out, void*data, size_t len) { + invariant(out->current_off%4096==0); + int result = toku_os_write(out->fd, data, len); + if (result == 0) + out->current_off+=len; + return result; +} + +static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progress_allocation, FTLOADER bl, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method) { + int result = 0; + + // serialize leaf to buffer + size_t serialized_leaf_size = 0; + size_t uncompressed_serialized_leaf_size = 0; + char *serialized_leaf = NULL; + FTNODE_DISK_DATA ndd = NULL; + result = toku_serialize_ftnode_to_memory(lbuf->node, &ndd, target_basementnodesize, target_compression_method, true, true, &serialized_leaf_size, &uncompressed_serialized_leaf_size, &serialized_leaf); + + // write it out + if (result == 0) { + dbout_lock(out); + long long off_of_leaf = out->current_off; + result = write_literal(out, serialized_leaf, serialized_leaf_size); + if (result == 0) { + out->translation[lbuf->blocknum.b].off = off_of_leaf; + out->translation[lbuf->blocknum.b].size = serialized_leaf_size; + seek_align_locked(out); + } + dbout_unlock(out); + } + + // free the node + if (serialized_leaf) { + toku_free(ndd); + toku_free(serialized_leaf); + } + toku_ftnode_free(&lbuf->node); + toku_xids_destroy(&lbuf->xids); + toku_free(lbuf); + + //printf("Nodewrite %d (%.1f%%):", progress_allocation, 100.0*progress_allocation/PROGRESS_MAX); + if (result == 0) + result = update_progress(progress_allocation, bl, "wrote node"); + + if (result) + ft_loader_set_panic(bl, result, true, 0, nullptr, nullptr); +} + +static int write_translation_table (struct dbout *out, long long *off_of_translation_p) { + seek_align(out); + struct dbuf ttable; + dbuf_init(&ttable); + long long off_of_translation = out->current_off; + long long bt_size_on_disk = out->n_translations * 16 + 20; + putbuf_int64(&ttable, out->n_translations); // number of records + putbuf_int64(&ttable, -1LL); // the linked list + out->translation[1].off = off_of_translation; + out->translation[1].size = bt_size_on_disk; + for (int i=0; in_translations; i++) { + putbuf_int64(&ttable, out->translation[i].off); + putbuf_int64(&ttable, out->translation[i].size); + } + unsigned int checksum = toku_x1764_memory(ttable.buf, ttable.off); + putbuf_int32(&ttable, checksum); + // pad it to 512 zeros + long long encoded_length = ttable.off; + { + int nbytes_to_add = roundup_to_multiple(512, ttable.off) - encoded_length; + char zeros[nbytes_to_add]; + for (int i=0; ifd, ttable.buf, ttable.off, off_of_translation); + } + dbuf_destroy(&ttable); + *off_of_translation_p = off_of_translation; + return result; +} + +static int +write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk) { + int result = 0; + size_t size = toku_serialize_ft_size(out->ft->h); + size_t alloced_size = roundup_to_multiple(512, size); + struct wbuf wbuf; + char *MALLOC_N_ALIGNED(512, alloced_size, buf); + if (buf == NULL) { + result = get_error_errno(); + } else { + wbuf_init(&wbuf, buf, size); + out->ft->h->on_disk_stats = out->ft->in_memory_stats; + toku_serialize_ft_to_wbuf(&wbuf, out->ft->h, translation_location_on_disk, translation_size_on_disk); + for (size_t i=size; ifd, wbuf.buf, alloced_size, 0); + } + toku_free(buf); + } + return result; +} + +static int read_some_pivots (FIDX pivots_file, int n_to_read, FTLOADER bl, + /*out*/ DBT pivots[/*n_to_read*/]) +// pivots is an array to be filled in. The pivots array is uninitialized. +{ + for (int i = 0; i < n_to_read; i++) + pivots[i] = zero_dbt; + + FILE *pivots_stream = toku_bl_fidx2file(bl, pivots_file); + + int result = 0; + for (int i = 0; i < n_to_read; i++) { + int r = bl_read_dbt(&pivots[i], pivots_stream); + if (r != 0) { + result = r; + break; + } + } + return result; +} + +static void delete_pivots(DBT pivots[], int n) { + for (int i = 0; i < n; i++) + toku_free(pivots[i].data); + toku_free(pivots); +} + +static int setup_nonleaf_block (int n_children, + struct subtrees_info *subtrees, FIDX pivots_file, int64_t first_child_offset_in_subtrees, + struct subtrees_info *next_subtrees, FIDX next_pivots_file, + struct dbout *out, FTLOADER bl, + /*out*/int64_t *blocknum, + /*out*/struct subtree_info **subtrees_info_p, + /*out*/DBT **pivots_p) +// Do the serial part of setting up a non leaf block. +// Read the pivots out of the file, and store them in a newly allocated array of DBTs (returned in *pivots_p) There are (n_blocks_to_use-1) of these. +// Copy the final pivot into the next_pivots file instead of returning it. +// Copy the subtree_info from the subtrees structure, and store them in a newly allocated array of subtree_infos (return in *subtrees_info_p). There are n_blocks_to_use of these. +// Allocate a block number and return it in *blocknum. +// Store the blocknum in the next_blocks structure, so it can be combined with the pivots at the next level of the tree. +// Update n_blocks_used and n_translations. +// This code cannot be called in parallel because of all the race conditions. +// The actual creation of the node can be called in parallel after this work is done. +{ + //printf("Nonleaf has children :"); for(int i=0; isubtrees[i].block); printf("\n"); + + int result = 0; + + DBT *MALLOC_N(n_children, pivots); + if (pivots == NULL) { + result = get_error_errno(); + } + + if (result == 0) { + int r = read_some_pivots(pivots_file, n_children, bl, pivots); + if (r) + result = r; + } + + if (result == 0) { + FILE *next_pivots_stream = toku_bl_fidx2file(bl, next_pivots_file); + int r = bl_write_dbt(&pivots[n_children-1], next_pivots_stream, NULL, nullptr, bl); + if (r) + result = r; + } + + if (result == 0) { + // The last pivot was written to the next_pivots file, so we free it now instead of returning it. + toku_free(pivots[n_children-1].data); + pivots[n_children-1] = zero_dbt; + + struct subtree_info *XMALLOC_N(n_children, subtrees_array); + for (int i = 0; i < n_children; i++) { + int64_t from_blocknum = first_child_offset_in_subtrees + i; + subtrees_array[i] = subtrees->subtrees[from_blocknum]; + } + + int r = allocate_block(out, blocknum); + if (r) { + toku_free(subtrees_array); + result = r; + } else { + allocate_node(next_subtrees, *blocknum); + + *pivots_p = pivots; + *subtrees_info_p = subtrees_array; + } + } + + if (result != 0) { + if (pivots) { + delete_pivots(pivots, n_children); pivots = NULL; + } + } + + return result; +} + +static void write_nonleaf_node (FTLOADER bl, struct dbout *out, int64_t blocknum_of_new_node, int n_children, + DBT *pivots, /* must free this array, as well as the things it points t */ + struct subtree_info *subtree_info, int height, const DESCRIPTOR UU(desc), uint32_t UU(target_nodesize), uint32_t target_basementnodesize, enum toku_compression_method target_compression_method) +{ + //Nodes do not currently touch descriptors + invariant(height > 0); + + int result = 0; + + FTNODE XMALLOC(node); + toku_initialize_empty_ftnode(node, make_blocknum(blocknum_of_new_node), height, n_children, + FT_LAYOUT_VERSION, 0); + node->pivotkeys.create_from_dbts(pivots, n_children - 1); + assert(node->bp); + for (int i=0; itranslation[blocknum_of_new_node].off = out->current_off; + out->translation[blocknum_of_new_node].size = n_bytes; + //fprintf(stderr, "Wrote internal node at %ld (%ld bytes)\n", out->current_off, n_bytes); + //for (uint32_t i=0; i=' ' && b<128) ? b : '*'); } + r = write_literal(out, bytes, n_bytes); + if (r) + result = r; + else + seek_align_locked(out); + dbout_unlock(out); + toku_free(bytes); + } + } + + for (int i=0; ibp); + node->pivotkeys.destroy(); + toku_free(node); + toku_free(ndd); + toku_free(subtree_info); + + if (result != 0) + ft_loader_set_panic(bl, result, true, 0, nullptr, nullptr); +} + +static int write_nonleaves (FTLOADER bl, FIDX pivots_fidx, struct dbout *out, struct subtrees_info *sts, const DESCRIPTOR descriptor, uint32_t target_nodesize, uint32_t target_basementnodesize, enum toku_compression_method target_compression_method) { + int result = 0; + int height = 1; + + // Watch out for the case where we saved the last pivot but didn't write any more nodes out. + // The trick is not to look at n_pivots, but to look at blocks.n_blocks + while (sts->n_subtrees > 1) { + // If there is more than one block in blocks, then we must build another level of the tree. + + // we need to create a pivots file for the pivots of the next level. + // and a blocks_array + // So for example. + // 1) we grab 16 pivots and 16 blocks. + // 2) We put the 15 pivots and 16 blocks into an non-leaf node. + // 3) We put the 16th pivot into the next pivots file. + { + int r = fseek(toku_bl_fidx2file(bl, pivots_fidx), 0, SEEK_SET); + if (r!=0) { return get_error_errno(); } + } + + FIDX next_pivots_file; + { + int r = ft_loader_open_temp_file (bl, &next_pivots_file); + if (r != 0) { result = r; break; } + } + + struct subtrees_info next_sts; + subtrees_info_init(&next_sts); + next_sts.n_subtrees = 0; + next_sts.n_subtrees_limit = 1; + XMALLOC_N(next_sts.n_subtrees_limit, next_sts.subtrees); + + const int n_per_block = 15; + int64_t n_subtrees_used = 0; + while (sts->n_subtrees - n_subtrees_used >= n_per_block*2) { + // grab the first N_PER_BLOCK and build a node. + DBT *pivots; + int64_t blocknum_of_new_node; + struct subtree_info *subtree_info; + int r = setup_nonleaf_block (n_per_block, + sts, pivots_fidx, n_subtrees_used, + &next_sts, next_pivots_file, + out, bl, + &blocknum_of_new_node, &subtree_info, &pivots); + if (r) { + result = r; + break; + } else { + write_nonleaf_node(bl, out, blocknum_of_new_node, n_per_block, pivots, subtree_info, height, descriptor, target_nodesize, target_basementnodesize, target_compression_method); // frees all the data structures that go into making the node. + n_subtrees_used += n_per_block; + } + } + + int64_t n_blocks_left = sts->n_subtrees - n_subtrees_used; + if (result == 0) { + // Now we have a one or two blocks at the end to handle. + invariant(n_blocks_left>=2); + if (n_blocks_left > n_per_block) { + // Write half the remaining blocks + int64_t n_first = n_blocks_left/2; + DBT *pivots; + int64_t blocknum_of_new_node; + struct subtree_info *subtree_info; + int r = setup_nonleaf_block(n_first, + sts, pivots_fidx, n_subtrees_used, + &next_sts, next_pivots_file, + out, bl, + &blocknum_of_new_node, &subtree_info, &pivots); + if (r) { + result = r; + } else { + write_nonleaf_node(bl, out, blocknum_of_new_node, n_first, pivots, subtree_info, height, descriptor, target_nodesize, target_basementnodesize, target_compression_method); + n_blocks_left -= n_first; + n_subtrees_used += n_first; + } + } + } + if (result == 0) { + // Write the last block. + DBT *pivots; + int64_t blocknum_of_new_node; + struct subtree_info *subtree_info; + int r = setup_nonleaf_block(n_blocks_left, + sts, pivots_fidx, n_subtrees_used, + &next_sts, next_pivots_file, + out, bl, + &blocknum_of_new_node, &subtree_info, &pivots); + if (r) { + result = r; + } else { + write_nonleaf_node(bl, out, blocknum_of_new_node, n_blocks_left, pivots, subtree_info, height, descriptor, target_nodesize, target_basementnodesize, target_compression_method); + n_subtrees_used += n_blocks_left; + } + } + if (result == 0) + invariant(n_subtrees_used == sts->n_subtrees); + + + if (result == 0) // pick up write_nonleaf_node errors + result = ft_loader_get_error(&bl->error_callback); + + // Now set things up for the next iteration. + int r = ft_loader_fi_close(&bl->file_infos, pivots_fidx, true); if (r != 0 && result == 0) result = r; + r = ft_loader_fi_unlink(&bl->file_infos, pivots_fidx); if (r != 0 && result == 0) result = r; + pivots_fidx = next_pivots_file; + toku_free(sts->subtrees); sts->subtrees = NULL; + *sts = next_sts; + height++; + + if (result) + break; + } + { int r = ft_loader_fi_close (&bl->file_infos, pivots_fidx, true); if (r != 0 && result == 0) result = r; } + { int r = ft_loader_fi_unlink(&bl->file_infos, pivots_fidx); if (r != 0 && result == 0) result = r; } + return result; +} + +void ft_loader_set_fractal_workers_count_from_c(FTLOADER bl) { + ft_loader_set_fractal_workers_count (bl); +} + + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/loader.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/loader.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/loader.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/loader.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,138 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "ft/txn/txn.h" +#include "ft/cachetable/cachetable.h" +#include "ft/comparator.h" +#include "ft/ft-ops.h" + +// The loader callbacks are C functions and need to be defined as such + +typedef void (*ft_loader_error_func)(DB *, int which_db, int err, DBT *key, DBT *val, void *extra); + +typedef int (*ft_loader_poll_func)(void *extra, float progress); + +typedef struct ft_loader_s *FTLOADER; + +int toku_ft_loader_open (FTLOADER *bl, + CACHETABLE cachetable, + generate_row_for_put_func g, + DB *src_db, + int N, + FT_HANDLE ft_hs[/*N*/], DB* dbs[/*N*/], + const char * new_fnames_in_env[/*N*/], + ft_compare_func bt_compare_functions[/*N*/], + const char *temp_file_template, + LSN load_lsn, + TOKUTXN txn, + bool reserve_memory, + uint64_t reserve_memory_size, + bool compress_intermediates, + bool allow_puts); + +int toku_ft_loader_put (FTLOADER bl, DBT *key, DBT *val); + +int toku_ft_loader_close (FTLOADER bl, + ft_loader_error_func error_callback, void *error_callback_extra, + ft_loader_poll_func poll_callback, void *poll_callback_extra); + +int toku_ft_loader_abort(FTLOADER bl, + bool is_error); + +// For test purposes only +void toku_ft_loader_set_size_factor (uint32_t factor); + +void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)); + +size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/loader-internal.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/loader-internal.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/loader-internal.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/loader-internal.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,365 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." + +#include + +#include "portability/toku_pthread.h" + +#include "loader/dbufio.h" +#include "loader/loader.h" +#include "util/queue.h" + +enum { + EXTRACTOR_QUEUE_DEPTH = 2, + FILE_BUFFER_SIZE = 1<<24, + MIN_ROWSET_MEMORY = 1<<23, + MIN_MERGE_FANIN = 2, + FRACTAL_WRITER_QUEUE_DEPTH = 3, + FRACTAL_WRITER_ROWSETS = FRACTAL_WRITER_QUEUE_DEPTH + 2, + DBUFIO_DEPTH = 2, + TARGET_MERGE_BUF_SIZE = 1<<24, // we'd like the merge buffer to be this big. + MIN_MERGE_BUF_SIZE = 1<<20, // always use at least this much + MAX_UNCOMPRESSED_BUF = MIN_MERGE_BUF_SIZE +}; + +/* These functions are exported to allow the tests to compile. */ + +/* These structures maintain a collection of all the open temporary files used by the loader. */ +struct file_info { + bool is_open; + bool is_extant; // if true, the file must be unlinked. + char *fname; + FILE *file; + uint64_t n_rows; // how many rows were written into that file + size_t buffer_size; + void *buffer; +}; +struct file_infos { + int n_files; + int n_files_limit; + struct file_info *file_infos; + int n_files_open, n_files_extant; + toku_mutex_t lock; // must protect this data structure because current activity performs a REALLOC(fi->file_infos). +}; +typedef struct fidx { int idx; } FIDX; +static const FIDX FIDX_NULL __attribute__((__unused__)) = {-1}; +static int fidx_is_null (const FIDX f) __attribute__((__unused__)); +static int fidx_is_null (const FIDX f) { return f.idx==-1; } +FILE *toku_bl_fidx2file (FTLOADER bl, FIDX i); + +int ft_loader_open_temp_file (FTLOADER bl, FIDX*file_idx); + +/* These data structures are used for manipulating a collection of rows in main memory. */ +struct row { + size_t off; // the offset in the data array. + int klen,vlen; +}; +struct rowset { + uint64_t memory_budget; + size_t n_rows, n_rows_limit; + struct row *rows; + size_t n_bytes, n_bytes_limit; + char *data; +}; + +int init_rowset (struct rowset *rows, uint64_t memory_budget); +void destroy_rowset (struct rowset *rows); +int add_row (struct rowset *rows, DBT *key, DBT *val); + +int loader_write_row(DBT *key, DBT *val, FIDX data, FILE*, uint64_t *dataoff, struct wbuf *wb, FTLOADER bl); +int loader_read_row (FILE *f, DBT *key, DBT *val); + +struct merge_fileset { + bool have_sorted_output; // Is there an previous key? + FIDX sorted_output; // this points to one of the data_fidxs. If output_is_sorted then this is the file containing sorted data. It's still open + DBT prev_key; // What is it? If it's here, its the last output in the merge fileset + + int n_temp_files, n_temp_files_limit; + FIDX *data_fidxs; +}; + +void init_merge_fileset (struct merge_fileset *fs); +void destroy_merge_fileset (struct merge_fileset *fs); + +struct poll_callback_s { + ft_loader_poll_func poll_function; + void *poll_extra; +}; +typedef struct poll_callback_s *ft_loader_poll_callback; + +int ft_loader_init_poll_callback(ft_loader_poll_callback); + +void ft_loader_destroy_poll_callback(ft_loader_poll_callback); + +void ft_loader_set_poll_function(ft_loader_poll_callback, ft_loader_poll_func poll_function, void *poll_extra); + +int ft_loader_call_poll_function(ft_loader_poll_callback, float progress); + +struct error_callback_s { + int error; + ft_loader_error_func error_callback; + void *extra; + DB *db; + int which_db; + DBT key; + DBT val; + bool did_callback; + toku_mutex_t mutex; +}; +typedef struct error_callback_s *ft_loader_error_callback; + +void ft_loader_init_error_callback(ft_loader_error_callback); + +void ft_loader_destroy_error_callback(ft_loader_error_callback); + +int ft_loader_get_error(ft_loader_error_callback); + +void ft_loader_set_error_function(ft_loader_error_callback, ft_loader_error_func error_function, void *extra); + +int ft_loader_set_error(ft_loader_error_callback, int error, DB *db, int which_db, DBT *key, DBT *val); + +int ft_loader_call_error_function(ft_loader_error_callback); + +int ft_loader_set_error_and_callback(ft_loader_error_callback, int error, DB *db, int which_db, DBT *key, DBT *val); + +struct ft_loader_s { + // These two are set in the close function, and used while running close + struct error_callback_s error_callback; + struct poll_callback_s poll_callback; + + generate_row_for_put_func generate_row_for_put; + ft_compare_func *bt_compare_funs; + + DB *src_db; + int N; + DB **dbs; // N of these + DESCRIPTOR *descriptors; // N of these. + TXNID *root_xids_that_created; // N of these. + const char **new_fnames_in_env; // N of these. The file names that the final data will be written to (relative to env). + + uint64_t *extracted_datasizes; // N of these. + + struct rowset primary_rowset; // the primary rows that have been put, but the secondary rows haven't been generated. + struct rowset primary_rowset_temp; // the primary rows that are being worked on by the extractor_thread. + + QUEUE primary_rowset_queue; // main thread enqueues rowsets in this queue (in maybe 64MB chunks). The extractor thread removes them, sorts them, adn writes to file. + toku_pthread_t extractor_thread; // the thread that takes primary rowset and does extraction and the first level sort and write to file. + bool extractor_live; + + DBT *last_key; // for each rowset, remember the most recently output key. The system may choose not to keep this up-to-date when a rowset is unsorted. These keys are malloced and ulen maintains the size of the malloced block. + + struct rowset *rows; // secondary rows that have been put, but haven't been sorted and written to a file. + uint64_t n_rows; // how many rows have been put? + struct merge_fileset *fs; + + const char *temp_file_template; + + CACHETABLE cachetable; + bool did_reserve_memory; + bool compress_intermediates; + bool allow_puts; + uint64_t reserved_memory; // how much memory are we allowed to use? + + /* To make it easier to recover from errors, we don't use FILE*, instead we use an index into the file_infos. */ + struct file_infos file_infos; + +#define PROGRESS_MAX (1<<16) + int progress; // Progress runs from 0 to PROGRESS_MAX. When we call the poll function we convert to a float from 0.0 to 1.0 + // We use an integer so that we can add to the progress using a fetch-and-add instruction. + + int progress_callback_result; // initially zero, if any call to the poll function callback returns nonzero, we save the result here (and don't call the poll callback function again). + + LSN load_lsn; //LSN of the fsynced 'load' log entry. Write this LSN (as checkpoint_lsn) in ft headers made by this loader. + TXNID load_root_xid; //(Root) transaction that performed the load. + + QUEUE *fractal_queues; // an array of work queues, one for each secondary index. + toku_pthread_t *fractal_threads; + bool *fractal_threads_live; // an array of bools indicating that fractal_threads[i] is a live thread. (There is no NULL for a pthread_t, so we have to maintain this separately). + + unsigned fractal_workers; // number of fractal tree writer threads + + toku_mutex_t mutex; + bool mutex_init; +}; + +// Set the number of rows in the loader. Used for test. +void toku_ft_loader_set_n_rows(FTLOADER bl, uint64_t n_rows); + +// Get the number of rows in the loader. Used for test. +uint64_t toku_ft_loader_get_n_rows(FTLOADER bl); + +// The data passed into a fractal_thread via pthread_create. +struct fractal_thread_args { + FTLOADER bl; + const DESCRIPTOR descriptor; + int fd; // write the ft into fd. + int progress_allocation; + QUEUE q; + uint64_t total_disksize_estimate; + int errno_result; // the final result. + int which_db; + uint32_t target_nodesize; + uint32_t target_basementnodesize; + enum toku_compression_method target_compression_method; + uint32_t target_fanout; +}; + +void toku_ft_loader_set_n_rows(FTLOADER bl, uint64_t n_rows); +uint64_t toku_ft_loader_get_n_rows(FTLOADER bl); + +int merge_row_arrays_base (struct row dest[/*an+bn*/], struct row a[/*an*/], int an, struct row b[/*bn*/], int bn, + int which_db, DB *dest_db, ft_compare_func, + FTLOADER, + struct rowset *); + +int merge_files (struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func, int progress_allocation, QUEUE); + +int sort_and_write_rows (struct rowset rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func); + +int mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func, FTLOADER, struct rowset *); + +//int write_file_to_dbfile (int outfile, FIDX infile, FTLOADER bl, const DESCRIPTOR descriptor, int progress_allocation); +int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q, int n_sources, DBUFIO_FILESET bfs, FIDX srcs_fidxs[/*n_sources*/], FTLOADER bl, int which_db, DB *dest_db, ft_compare_func compare, int progress_allocation); + +int ft_loader_sort_and_write_rows (struct rowset *rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func); + +// This is probably only for testing. +int toku_loader_write_ft_from_q_in_C (FTLOADER bl, + const DESCRIPTOR descriptor, + int fd, // write to here + int progress_allocation, + QUEUE q, + uint64_t total_disksize_estimate, + int which_db, + uint32_t target_nodesize, + uint32_t target_basementnodesize, + enum toku_compression_method target_compression_method, + uint32_t fanout); + +int ft_loader_mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func, FTLOADER, struct rowset *); + +int ft_loader_write_file_to_dbfile (int outfile, FIDX infile, FTLOADER bl, const DESCRIPTOR descriptor, int progress_allocation); + +int ft_loader_init_file_infos (struct file_infos *fi); +void ft_loader_fi_destroy (struct file_infos *fi, bool is_error); +int ft_loader_fi_close (struct file_infos *fi, FIDX idx, bool require_open); +int ft_loader_fi_close_all (struct file_infos *fi); +int ft_loader_fi_reopen (struct file_infos *fi, FIDX idx, const char *mode); +int ft_loader_fi_unlink (struct file_infos *fi, FIDX idx); + +int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, + CACHETABLE cachetable, + generate_row_for_put_func g, + DB *src_db, + int N, FT_HANDLE ft_hs[/*N*/], DB* dbs[/*N*/], + const char *new_fnames_in_env[/*N*/], + ft_compare_func bt_compare_functions[/*N*/], + const char *temp_file_template, + LSN load_lsn, + TOKUTXN txn, + bool reserve_memory, + uint64_t reserve_memory_size, + bool compress_intermediates, + bool allow_puts); + +void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error); + +// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.) +uint64_t toku_ft_loader_get_rowset_budget_for_testing (void); + +int toku_ft_loader_finish_extractor(FTLOADER bl); + +int toku_ft_loader_get_error(FTLOADER bl, int *loader_errno); + +void ft_loader_lock_init(FTLOADER bl); +void ft_loader_lock_destroy(FTLOADER bl); +void ft_loader_set_fractal_workers_count_from_c(FTLOADER bl); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/pqueue.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/pqueue.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/pqueue.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/pqueue.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,235 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include +#include "toku_os.h" +#include "ft-internal.h" +#include "loader/loader-internal.h" +#include "loader/pqueue.h" + +#define pqueue_left(i) ((i) << 1) +#define pqueue_right(i) (((i) << 1) + 1) +#define pqueue_parent(i) ((i) >> 1) + +int pqueue_init(pqueue_t **result, size_t n, int which_db, DB *db, ft_compare_func compare, struct error_callback_s *err_callback) +{ + pqueue_t *MALLOC(q); + if (!q) { + return get_error_errno(); + } + + /* Need to allocate n+1 elements since element 0 isn't used. */ + MALLOC_N(n + 1, q->d); + if (!q->d) { + int r = get_error_errno(); + toku_free(q); + return r; + } + q->size = 1; + q->avail = q->step = (n+1); /* see comment above about n+1 */ + + q->which_db = which_db; + q->db = db; + q->compare = compare; + q->dup_error = 0; + + q->error_callback = err_callback; + + *result = q; + return 0; +} + +void pqueue_free(pqueue_t *q) +{ + toku_free(q->d); + toku_free(q); +} + + +size_t pqueue_size(pqueue_t *q) +{ + /* queue element 0 exists but doesn't count since it isn't used. */ + return (q->size - 1); +} + +static int pqueue_compare(pqueue_t *q, DBT *next_key, DBT *next_val, DBT *curr_key) +{ + int r = q->compare(q->db, next_key, curr_key); + if ( r == 0 ) { // duplicate key : next_key == curr_key + q->dup_error = 1; + if (q->error_callback) + ft_loader_set_error_and_callback(q->error_callback, DB_KEYEXIST, q->db, q->which_db, next_key, next_val); + } + return ( r > -1 ); +} + +static void pqueue_bubble_up(pqueue_t *q, size_t i) +{ + size_t parent_node; + pqueue_node_t *moving_node = q->d[i]; + DBT *moving_key = moving_node->key; + + for (parent_node = pqueue_parent(i); + ((i > 1) && pqueue_compare(q, q->d[parent_node]->key, q->d[parent_node]->val, moving_key)); + i = parent_node, parent_node = pqueue_parent(i)) + { + q->d[i] = q->d[parent_node]; + } + + q->d[i] = moving_node; +} + + +static size_t pqueue_maxchild(pqueue_t *q, size_t i) +{ + size_t child_node = pqueue_left(i); + + if (child_node >= q->size) + return 0; + + if ((child_node+1) < q->size && + pqueue_compare(q, q->d[child_node]->key, q->d[child_node]->val, q->d[child_node+1]->key)) + child_node++; /* use right child instead of left */ + + return child_node; +} + + +static void pqueue_percolate_down(pqueue_t *q, size_t i) +{ + size_t child_node; + pqueue_node_t *moving_node = q->d[i]; + DBT *moving_key = moving_node->key; + DBT *moving_val = moving_node->val; + + while ((child_node = pqueue_maxchild(q, i)) && + pqueue_compare(q, moving_key, moving_val, q->d[child_node]->key)) + { + q->d[i] = q->d[child_node]; + i = child_node; + } + + q->d[i] = moving_node; +} + + +int pqueue_insert(pqueue_t *q, pqueue_node_t *d) +{ + size_t i; + + if (!q) return 1; + if (q->size >= q->avail) return 1; + + /* insert item */ + i = q->size++; + q->d[i] = d; + pqueue_bubble_up(q, i); + + if ( q->dup_error ) return DB_KEYEXIST; + return 0; +} + +int pqueue_pop(pqueue_t *q, pqueue_node_t **d) +{ + if (!q || q->size == 1) { + *d = NULL; + return 0; + } + + *d = q->d[1]; + q->d[1] = q->d[--q->size]; + pqueue_percolate_down(q, 1); + + if ( q->dup_error ) return DB_KEYEXIST; + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/pqueue.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/pqueue.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/loader/pqueue.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/loader/pqueue.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,123 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + + +typedef struct ft_pqueue_node_t +{ + DBT *key; + DBT *val; + int i; +} pqueue_node_t; + +typedef struct ft_pqueue_t +{ + size_t size; + size_t avail; + size_t step; + + int which_db; + DB *db; // needed for compare function + ft_compare_func compare; + pqueue_node_t **d; + int dup_error; + + struct error_callback_s *error_callback; + +} pqueue_t; + +int pqueue_init(pqueue_t **result, size_t n, int which_db, DB *db, ft_compare_func compare, struct error_callback_s *err_callback); +void pqueue_free(pqueue_t *q); +size_t pqueue_size(pqueue_t *q); +int pqueue_insert(pqueue_t *q, pqueue_node_t *d); +int pqueue_pop(pqueue_t *q, pqueue_node_t **d); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logcursor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logcursor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logcursor.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logcursor.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,553 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "log-internal.h" -#include "logcursor.h" -#include -#include - -enum lc_direction { LC_FORWARD, LC_BACKWARD, LC_FIRST, LC_LAST }; - -struct toku_logcursor { - char *logdir; // absolute directory name - char **logfiles; - int n_logfiles; - int cur_logfiles_index; - FILE *cur_fp; - size_t buffer_size; - void *buffer; - bool is_open; - struct log_entry entry; - bool entry_valid; - LSN cur_lsn; - enum lc_direction last_direction; -}; - -#define LC_LSN_ERROR (DB_RUNRECOVERY) - -void toku_logcursor_print(TOKULOGCURSOR lc) { - printf("lc = %p\n", lc); - printf(" logdir = %s\n", lc->logdir); - printf(" logfiles = %p\n", lc->logfiles); - for (int lf=0;lfn_logfiles;lf++) { - printf(" logfile[%d] = %p (%s)\n", lf, lc->logfiles[lf], lc->logfiles[lf]); - } - printf(" n_logfiles = %d\n", lc->n_logfiles); - printf(" cur_logfiles_index = %d\n", lc->cur_logfiles_index); - printf(" cur_fp = %p\n", lc->cur_fp); - printf(" cur_lsn = %" PRIu64 "\n", lc->cur_lsn.lsn); - printf(" last_direction = %d\n", (int) lc->last_direction); -} - -static int lc_close_cur_logfile(TOKULOGCURSOR lc) { - int r=0; - if ( lc->is_open ) { - r = fclose(lc->cur_fp); - assert(0==r); - lc->is_open = false; - } - return 0; -} - -static toku_off_t lc_file_len(const char *name) { - toku_struct_stat buf; - int r = toku_stat(name, &buf); - assert(r == 0); - return buf.st_size; -} - -// Cat the file and throw away the contents. This brings the file into the file system cache -// and makes subsequent accesses to it fast. The intention is to speed up backward scans of the -// file. -static void lc_catfile(const char *fname, void *buffer, size_t buffer_size) { - int fd = open(fname, O_RDONLY); - if (fd >= 0) { - while (1) { - ssize_t r = read(fd, buffer, buffer_size); - if ((int)r <= 0) - break; - } - close(fd); - } -} - -static int lc_open_logfile(TOKULOGCURSOR lc, int index) { - int r=0; - assert( !lc->is_open ); - if( index == -1 || index >= lc->n_logfiles) return DB_NOTFOUND; - lc_catfile(lc->logfiles[index], lc->buffer, lc->buffer_size); - lc->cur_fp = fopen(lc->logfiles[index], "rb"); - if ( lc->cur_fp == NULL ) - return DB_NOTFOUND; - r = setvbuf(lc->cur_fp, (char *) lc->buffer, _IOFBF, lc->buffer_size); - assert(r == 0); - // position fp past header, ignore 0 length file (t:2384) - unsigned int version=0; - if ( lc_file_len(lc->logfiles[index]) >= 12 ) { - r = toku_read_logmagic(lc->cur_fp, &version); - if (r!=0) - return DB_BADFORMAT; - if (version < TOKU_LOG_MIN_SUPPORTED_VERSION || version > TOKU_LOG_VERSION) - return DB_BADFORMAT; - } - // mark as open - lc->is_open = true; - return r; -} - -static int lc_check_lsn(TOKULOGCURSOR lc, int dir) { - int r=0; - LSN lsn = toku_log_entry_get_lsn(&(lc->entry)); - if (((dir == LC_FORWARD) && ( lsn.lsn != lc->cur_lsn.lsn + 1 )) || - ((dir == LC_BACKWARD) && ( lsn.lsn != lc->cur_lsn.lsn - 1 ))) { -// int index = lc->cur_logfiles_index; -// fprintf(stderr, "Bad LSN: %d %s direction = %d, lsn.lsn = %" PRIu64 ", cur_lsn.lsn=%" PRIu64 "\n", -// index, lc->logfiles[index], dir, lsn.lsn, lc->cur_lsn.lsn); - if (tokudb_recovery_trace) - printf("DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, 0); - return LC_LSN_ERROR; - } - lc->cur_lsn.lsn = lsn.lsn; - return r; -} - -// toku_logcursor_create() -// - returns a pointer to a logcursor - -static int lc_create(TOKULOGCURSOR *lc, const char *log_dir) { - - // malloc a cursor - TOKULOGCURSOR cursor = (TOKULOGCURSOR) toku_xmalloc(sizeof(struct toku_logcursor)); - // find logfiles in logdir - cursor->is_open = false; - cursor->cur_logfiles_index = 0; - cursor->entry_valid = false; - cursor->buffer_size = 1<<20; // use a 1MB stream buffer (setvbuf) - cursor->buffer = toku_malloc(cursor->buffer_size); // it does not matter if it failes - // cursor->logdir must be an absolute path - if (toku_os_is_absolute_name(log_dir)) { - cursor->logdir = (char *) toku_xmalloc(strlen(log_dir)+1); - sprintf(cursor->logdir, "%s", log_dir); - } else { - char cwdbuf[PATH_MAX]; - char *cwd = getcwd(cwdbuf, PATH_MAX); - assert(cwd); - cursor->logdir = (char *) toku_xmalloc(strlen(cwd)+strlen(log_dir)+2); - sprintf(cursor->logdir, "%s/%s", cwd, log_dir); - } - cursor->logfiles = NULL; - cursor->n_logfiles = 0; - cursor->cur_fp = NULL; - cursor->cur_lsn.lsn=0; - cursor->last_direction=LC_FIRST; - - *lc = cursor; - return 0; -} - -static int lc_fix_bad_logfile(TOKULOGCURSOR lc); - -int toku_logcursor_create(TOKULOGCURSOR *lc, const char *log_dir) { - TOKULOGCURSOR cursor; - int r = lc_create(&cursor, log_dir); - if ( r!=0 ) - return r; - - r = toku_logger_find_logfiles(cursor->logdir, &(cursor->logfiles), &(cursor->n_logfiles)); - if (r!=0) { - toku_logcursor_destroy(&cursor); - } else { - *lc = cursor; - } - return r; -} - -int toku_logcursor_create_for_file(TOKULOGCURSOR *lc, const char *log_dir, const char *log_file) { - int r = lc_create(lc, log_dir); - if ( r!=0 ) - return r; - - TOKULOGCURSOR cursor = *lc; - int fullnamelen = strlen(cursor->logdir) + strlen(log_file) + 3; - char *XMALLOC_N(fullnamelen, log_file_fullname); - sprintf(log_file_fullname, "%s/%s", cursor->logdir, log_file); - - cursor->n_logfiles=1; - - char **XMALLOC(logfiles); - cursor->logfiles = logfiles; - cursor->logfiles[0] = log_file_fullname; - *lc = cursor; - return 0; -} - -int toku_logcursor_destroy(TOKULOGCURSOR *lc) { - int r=0; - if ( *lc ) { - if ( (*lc)->entry_valid ) { - toku_log_free_log_entry_resources(&((*lc)->entry)); - (*lc)->entry_valid = false; - } - r = lc_close_cur_logfile(*lc); - int lf; - for(lf=0;lf<(*lc)->n_logfiles;lf++) { - if ( (*lc)->logfiles[lf] ) toku_free((*lc)->logfiles[lf]); - } - if ( (*lc)->logfiles ) toku_free((*lc)->logfiles); - if ( (*lc)->logdir ) toku_free((*lc)->logdir); - if ( (*lc)->buffer ) toku_free((*lc)->buffer); - toku_free(*lc); - *lc = NULL; - } - return r; -} - -static int lc_log_read(TOKULOGCURSOR lc) -{ - int r = toku_log_fread(lc->cur_fp, &(lc->entry)); - while ( r == EOF ) { - // move to next file - r = lc_close_cur_logfile(lc); - if (r!=0) return r; - if ( lc->cur_logfiles_index == lc->n_logfiles-1) return DB_NOTFOUND; - lc->cur_logfiles_index++; - r = lc_open_logfile(lc, lc->cur_logfiles_index); - if (r!=0) return r; - r = toku_log_fread(lc->cur_fp, &(lc->entry)); - } - if (r!=0) { - toku_log_free_log_entry_resources(&(lc->entry)); - time_t tnow = time(NULL); - if (r==DB_BADFORMAT) { - fprintf(stderr, "%.24s Tokudb bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]); - } - else { - fprintf(stderr, "%.24s Tokudb unexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]); - } - } - return r; -} - -static int lc_log_read_backward(TOKULOGCURSOR lc) -{ - int r = toku_log_fread_backward(lc->cur_fp, &(lc->entry)); - while ( -1 == r) { // if within header length of top of file - // move to previous file - r = lc_close_cur_logfile(lc); - if (r!=0) - return r; - if ( lc->cur_logfiles_index == 0 ) - return DB_NOTFOUND; - lc->cur_logfiles_index--; - r = lc_open_logfile(lc, lc->cur_logfiles_index); - if (r!=0) - return r; - // seek to end - r = fseek(lc->cur_fp, 0, SEEK_END); - assert(0==r); - r = toku_log_fread_backward(lc->cur_fp, &(lc->entry)); - } - if (r!=0) { - toku_log_free_log_entry_resources(&(lc->entry)); - time_t tnow = time(NULL); - if (r==DB_BADFORMAT) { - fprintf(stderr, "%.24s Tokudb bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]); - } - else { - fprintf(stderr, "%.24s Tokudb uUnexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]); - } - } - return r; -} - -int toku_logcursor_next(TOKULOGCURSOR lc, struct log_entry **le) { - int r=0; - if ( lc->entry_valid ) { - toku_log_free_log_entry_resources(&(lc->entry)); - lc->entry_valid = false; - if (lc->last_direction == LC_BACKWARD) { - struct log_entry junk; - r = toku_log_fread(lc->cur_fp, &junk); - assert(r == 0); - toku_log_free_log_entry_resources(&junk); - } - } else { - r = toku_logcursor_first(lc, le); - return r; - } - // read the entry - r = lc_log_read(lc); - if (r!=0) return r; - r = lc_check_lsn(lc, LC_FORWARD); - if (r!=0) return r; - lc->last_direction = LC_FORWARD; - lc->entry_valid = true; - *le = &(lc->entry); - return r; -} - -int toku_logcursor_prev(TOKULOGCURSOR lc, struct log_entry **le) { - int r=0; - if ( lc->entry_valid ) { - toku_log_free_log_entry_resources(&(lc->entry)); - lc->entry_valid = false; - if (lc->last_direction == LC_FORWARD) { - struct log_entry junk; - r = toku_log_fread_backward(lc->cur_fp, &junk); - assert(r == 0); - toku_log_free_log_entry_resources(&junk); - } - } else { - r = toku_logcursor_last(lc, le); - return r; - } - // read the entry - r = lc_log_read_backward(lc); - if (r!=0) return r; - r = lc_check_lsn(lc, LC_BACKWARD); - if (r!=0) return r; - lc->last_direction = LC_BACKWARD; - lc->entry_valid = true; - *le = &(lc->entry); - return r; -} - -int toku_logcursor_first(TOKULOGCURSOR lc, struct log_entry **le) { - int r=0; - if ( lc->entry_valid ) { - toku_log_free_log_entry_resources(&(lc->entry)); - lc->entry_valid = false; - } - // close any but the first log file - if ( lc->cur_logfiles_index != 0 ) { - lc_close_cur_logfile(lc); - } - // open first log file if needed - if ( !lc->is_open ) { - r = lc_open_logfile(lc, 0); - if (r!=0) - return r; - lc->cur_logfiles_index = 0; - } - // read the entry - r = lc_log_read(lc); - if (r!=0) return r; - - r = lc_check_lsn(lc, LC_FIRST); - if (r!=0) return r; - lc->last_direction = LC_FIRST; - lc->entry_valid = true; - *le = &(lc->entry); - return r; -} - -//get last entry in the logfile specified by logcursor -int toku_logcursor_last(TOKULOGCURSOR lc, struct log_entry **le) { - int r=0; - if ( lc->entry_valid ) { - toku_log_free_log_entry_resources(&(lc->entry)); - lc->entry_valid = false; - } - // close any but last log file - if ( lc->cur_logfiles_index != lc->n_logfiles-1 ) { - lc_close_cur_logfile(lc); - } - // open last log file if needed - if ( !lc->is_open ) { - r = lc_open_logfile(lc, lc->n_logfiles-1); - if (r!=0) - return r; - lc->cur_logfiles_index = lc->n_logfiles-1; - } - while (1) { - // seek to end - r = fseek(lc->cur_fp, 0, SEEK_END); assert(r==0); - // read backward - r = toku_log_fread_backward(lc->cur_fp, &(lc->entry)); - if (r==0) // got a good entry - break; - if (r>0) { - toku_log_free_log_entry_resources(&(lc->entry)); - // got an error, - // probably a corrupted last log entry due to a crash - // try scanning forward from the beginning to find the last good entry - time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery repairing log\n", ctime(&tnow)); - r = lc_fix_bad_logfile(lc); - if ( r != 0 ) { - fprintf(stderr, "%.24s Tokudb recovery repair unsuccessful\n", ctime(&tnow)); - return DB_BADFORMAT; - } - // try reading again - r = toku_log_fread_backward(lc->cur_fp, &(lc->entry)); - if (r==0) // got a good entry - break; - } - // move to previous file - r = lc_close_cur_logfile(lc); - if (r!=0) - return r; - if ( lc->cur_logfiles_index == 0 ) - return DB_NOTFOUND; - lc->cur_logfiles_index--; - r = lc_open_logfile(lc, lc->cur_logfiles_index); - if (r!=0) - return r; - } - r = lc_check_lsn(lc, LC_LAST); - if (r!=0) - return r; - lc->last_direction = LC_LAST; - lc->entry_valid = true; - *le = &(lc->entry); - return r; -} - -// return 0 if log exists, ENOENT if no log -int -toku_logcursor_log_exists(const TOKULOGCURSOR lc) { - int r; - - if (lc->n_logfiles) - r = 0; - else - r = ENOENT; - - return r; -} - -// fix a logfile with a bad last entry -// - return with fp pointing to end-of-file so that toku_logcursor_last can be retried -static int lc_fix_bad_logfile(TOKULOGCURSOR lc) { - struct log_entry le; - unsigned int version=0; - int r = 0; - - r = fseek(lc->cur_fp, 0, SEEK_SET); - if ( r!=0 ) - return r; - r = toku_read_logmagic(lc->cur_fp, &version); - if ( r!=0 ) - return r; - if (version != TOKU_LOG_VERSION) - return -1; - - toku_off_t last_good_pos; - last_good_pos = ftello(lc->cur_fp); - while (1) { - // initialize le - // - reading incomplete entries can result in fields that cannot be freed - memset(&le, 0, sizeof(le)); - r = toku_log_fread(lc->cur_fp, &le); - toku_log_free_log_entry_resources(&le); - if ( r!=0 ) - break; - last_good_pos = ftello(lc->cur_fp); - } - // now have position of last good entry - // 1) close the file - // 2) truncate the file to remove the error - // 3) reopen the file - // 4) set the pos to last - r = lc_close_cur_logfile(lc); - if ( r!=0 ) - return r; - r = truncate(lc->logfiles[lc->n_logfiles - 1], last_good_pos); - if ( r!=0 ) - return r; - r = lc_open_logfile(lc, lc->n_logfiles-1); - if ( r!=0 ) - return r; - r = fseek(lc->cur_fp, 0, SEEK_END); - if ( r!=0 ) - return r; - return 0; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logcursor.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logcursor.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logcursor.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logcursor.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,132 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKULOGCURSOR_H -#define TOKULOGCURSOR_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include - - -struct toku_logcursor; -typedef struct toku_logcursor *TOKULOGCURSOR; - -// All routines return 0 on success - -// toku_logcursor_create() -// - creates a logcursor (lc) -// - following toku_logcursor_create() -// if toku_logcursor_next() is called, it returns the first entry in the log -// if toku_logcursor_prev() is called, it returns the last entry in the log -int toku_logcursor_create(TOKULOGCURSOR *lc, const char *log_dir); -// toku_logcursor_create_for_file() -// - creates a logcusor (lc) that only knows about the file log_file -int toku_logcursor_create_for_file(TOKULOGCURSOR *lc, const char *log_dir, const char *log_file); -// toku_logcursor_destroy() -// - frees all resources associated with the logcursor, including the log_entry -// associated with the latest cursor action -int toku_logcursor_destroy(TOKULOGCURSOR *lc); - -// toku_logcursor_[next,prev,first,last] take care of malloc'ing and free'ing log_entrys. -// - routines NULL out the **le pointers on entry, then set the **le pointers to -// the malloc'ed entries when successful, -int toku_logcursor_next(TOKULOGCURSOR lc, struct log_entry **le); -int toku_logcursor_prev(TOKULOGCURSOR lc, struct log_entry **le); - -int toku_logcursor_first(const TOKULOGCURSOR lc, struct log_entry **le); -int toku_logcursor_last(const TOKULOGCURSOR lc, struct log_entry **le); - -// return 0 if log exists, ENOENT if no log -int toku_logcursor_log_exists(const TOKULOGCURSOR lc); - -void toku_logcursor_print(TOKULOGCURSOR lc); - - -#endif // TOKULOGCURSOR_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logfilemgr.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logfilemgr.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logfilemgr.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logfilemgr.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,260 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "log-internal.h" -#include "logcursor.h" -#include "logfilemgr.h" - -// for now, implement with singlely-linked-list -// first = oldest (delete from beginning) -// last = newest (add to end) - -struct lfm_entry { - TOKULOGFILEINFO lf_info; - struct lfm_entry *next; -}; - -struct toku_logfilemgr { - struct lfm_entry *first; - struct lfm_entry *last; - int n_entries; -}; - -int toku_logfilemgr_create(TOKULOGFILEMGR *lfm) { - // malloc a logfilemgr - TOKULOGFILEMGR XMALLOC(mgr); - mgr->first = NULL; - mgr->last = NULL; - mgr->n_entries = 0; - *lfm = mgr; - return 0; -} - -int toku_logfilemgr_destroy(TOKULOGFILEMGR *lfm) { - int r=0; - if ( *lfm != NULL ) { // be tolerant of being passed a NULL - TOKULOGFILEMGR mgr = *lfm; - while ( mgr->n_entries > 0 ) { - toku_logfilemgr_delete_oldest_logfile_info(mgr); - } - toku_free(*lfm); - *lfm = NULL; - } - return r; -} - -int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir, TXNID *last_xid_if_clean_shutdown) { - invariant_notnull(lfm); - invariant_notnull(last_xid_if_clean_shutdown); - - int r; - int n_logfiles; - char **logfiles; - r = toku_logger_find_logfiles(log_dir, &logfiles, &n_logfiles); - if (r!=0) - return r; - - TOKULOGCURSOR cursor; - struct log_entry *entry; - TOKULOGFILEINFO lf_info; - long long index = -1; - char *basename; - LSN tmp_lsn = {0}; - TXNID last_xid = TXNID_NONE; - for(int i=0;i=TOKU_LOG_MIN_SUPPORTED_VERSION); - assert(version<=TOKU_LOG_VERSION); - lf_info->index = index; - lf_info->version = version; - // find last LSN in logfile - r = toku_logcursor_create_for_file(&cursor, log_dir, basename); - if (r!=0) { - return r; - } - r = toku_logcursor_last(cursor, &entry); // set "entry" to last log entry in logfile - if (r == 0) { - lf_info->maxlsn = toku_log_entry_get_lsn(entry); - - invariant(lf_info->maxlsn.lsn >= tmp_lsn.lsn); - tmp_lsn = lf_info->maxlsn; - if (entry->cmd == LT_shutdown) { - last_xid = entry->u.shutdown.last_xid; - } else { - last_xid = TXNID_NONE; - } - } - else { - lf_info->maxlsn = tmp_lsn; // handle empty logfile (no LSN in file) case - } - - // add to logfilemgr - toku_logfilemgr_add_logfile_info(lfm, lf_info); - toku_logcursor_destroy(&cursor); - } - for(int i=0;in_entries; -} - -int toku_logfilemgr_add_logfile_info(TOKULOGFILEMGR lfm, TOKULOGFILEINFO lf_info) { - assert(lfm); - struct lfm_entry *XMALLOC(entry); - entry->lf_info = lf_info; - entry->next = NULL; - if ( lfm->n_entries != 0 ) - lfm->last->next = entry; - lfm->last = entry; - lfm->n_entries++; - if (lfm->n_entries == 1 ) { - lfm->first = lfm->last; - } - return 0; -} - -TOKULOGFILEINFO toku_logfilemgr_get_oldest_logfile_info(TOKULOGFILEMGR lfm) { - assert(lfm); - return lfm->first->lf_info; -} - -void toku_logfilemgr_delete_oldest_logfile_info(TOKULOGFILEMGR lfm) { - assert(lfm); - if ( lfm->n_entries > 0 ) { - struct lfm_entry *entry = lfm->first; - toku_free(entry->lf_info); - lfm->first = entry->next; - toku_free(entry); - lfm->n_entries--; - if ( lfm->n_entries == 0 ) { - lfm->last = lfm->first = NULL; - } - } -} - -LSN toku_logfilemgr_get_last_lsn(TOKULOGFILEMGR lfm) { - assert(lfm); - if ( lfm->n_entries == 0 ) { - LSN lsn; - lsn.lsn = 0; - return lsn; - } - return lfm->last->lf_info->maxlsn; -} - -void toku_logfilemgr_update_last_lsn(TOKULOGFILEMGR lfm, LSN lsn) { - assert(lfm); - assert(lfm->last!=NULL); - lfm->last->lf_info->maxlsn = lsn; -} - -void toku_logfilemgr_print(TOKULOGFILEMGR lfm) { - assert(lfm); - printf("toku_logfilemgr_print [%p] : %d entries \n", lfm, lfm->n_entries); - struct lfm_entry *entry = lfm->first; - for (int i=0;in_entries;i++) { - printf(" entry %d : index = %" PRId64 ", maxlsn = %" PRIu64 "\n", i, entry->lf_info->index, entry->lf_info->maxlsn.lsn); - entry = entry->next; - } -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logfilemgr.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logfilemgr.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logfilemgr.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logfilemgr.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,123 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKULOGFILEMGR_H -#define TOKULOGFILEMGR_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include - - -// this is the basic information we need to keep per logfile -struct toku_logfile_info { - int64_t index; - LSN maxlsn; - uint32_t version; -}; -typedef struct toku_logfile_info *TOKULOGFILEINFO; - -struct toku_logfilemgr; -typedef struct toku_logfilemgr *TOKULOGFILEMGR; - -int toku_logfilemgr_create(TOKULOGFILEMGR *lfm); -int toku_logfilemgr_destroy(TOKULOGFILEMGR *lfm); - -int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir, TXNID *last_xid_if_clean_shutdown); -int toku_logfilemgr_num_logfiles(TOKULOGFILEMGR lfm); -int toku_logfilemgr_add_logfile_info(TOKULOGFILEMGR lfm, TOKULOGFILEINFO lf_info); -TOKULOGFILEINFO toku_logfilemgr_get_oldest_logfile_info(TOKULOGFILEMGR lfm); -void toku_logfilemgr_delete_oldest_logfile_info(TOKULOGFILEMGR lfm); -LSN toku_logfilemgr_get_last_lsn(TOKULOGFILEMGR lfm); -void toku_logfilemgr_update_last_lsn(TOKULOGFILEMGR lfm, LSN lsn); - -void toku_logfilemgr_print(TOKULOGFILEMGR lfm); - - -#endif //TOKULOGFILEMGR_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logformat.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logformat.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logformat.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logformat.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,881 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -/* This file defines the logformat in an executable fashion. - * This code is used to generate - * The code that writes into the log. - * The code that reads the log and prints it to stdout (the log_print utility) - * The code that reads the log for recovery. - * The struct definitions. - * The Latex documentation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -typedef struct field { - const char *type; - const char *name; - const char *format; // optional format string -} F; - -#define NULLFIELD {0,0,0} -#define FA (F[]) - -enum log_begin_action { - IGNORE_LOG_BEGIN, - SHOULD_LOG_BEGIN, - ASSERT_BEGIN_WAS_LOGGED, - LOG_BEGIN_ACTION_NA = IGNORE_LOG_BEGIN -}; - -struct logtype { - const char *name; - unsigned int command_and_flags; - struct field *fields; - enum log_begin_action log_begin_action; -}; - -// In the fields, don't mention the command, the LSN, the CRC or the trailing LEN. - -const struct logtype rollbacks[] = { - //TODO: #2037 Add dname - {"fdelete", 'U', FA{{"FILENUM", "filenum", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - //TODO: #2037 Add dname - {"fcreate", 'F', FA{{"FILENUM", "filenum", 0}, - {"BYTESTRING", "iname", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - // cmdinsert is used to insert a key-value pair into a DB. For rollback we don't need the data. - {"cmdinsert", 'i', FA{ - {"FILENUM", "filenum", 0}, - {"BYTESTRING", "key", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - {"cmddelete", 'd', FA{ - {"FILENUM", "filenum", 0}, - {"BYTESTRING", "key", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - {"rollinclude", 'r', FA{{"TXNID_PAIR", "xid", 0}, - {"uint64_t", "num_nodes", 0}, - {"BLOCKNUM", "spilled_head", 0}, - {"BLOCKNUM", "spilled_tail", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - {"load", 'l', FA{{"FILENUM", "old_filenum", 0}, - {"BYTESTRING", "new_iname", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - // #2954 - {"hot_index", 'h', FA{{"FILENUMS", "hot_index_filenums", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - {"dictionary_redirect", 'R', FA{{"FILENUM", "old_filenum", 0}, - {"FILENUM", "new_filenum", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - {"cmdupdate", 'u', FA{{"FILENUM", "filenum", 0}, - {"BYTESTRING", "key", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - {"cmdupdatebroadcast", 'B', FA{{"FILENUM", "filenum", 0}, - {"bool", "is_resetting_op", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - {"change_fdescriptor", 'D', FA{{"FILENUM", "filenum", 0}, - {"BYTESTRING", "old_descriptor", 0}, - NULLFIELD}, LOG_BEGIN_ACTION_NA}, - {0,0,FA{NULLFIELD}, LOG_BEGIN_ACTION_NA} -}; - -const struct logtype logtypes[] = { - // Records produced by checkpoints -#if 0 // no longer used, but reserve the type - {"local_txn_checkpoint", 'c', FA{{"TXNID", "xid", 0}, NULLFIELD}}, -#endif - {"begin_checkpoint", 'x', FA{{"uint64_t", "timestamp", 0}, {"TXNID", "last_xid", 0}, NULLFIELD}, IGNORE_LOG_BEGIN}, - {"end_checkpoint", 'X', FA{{"LSN", "lsn_begin_checkpoint", 0}, - {"uint64_t", "timestamp", 0}, - {"uint32_t", "num_fassociate_entries", 0}, // how many files were checkpointed - {"uint32_t", "num_xstillopen_entries", 0}, // how many txns were checkpointed - NULLFIELD}, IGNORE_LOG_BEGIN}, - //TODO: #2037 Add dname - {"fassociate", 'f', FA{{"FILENUM", "filenum", 0}, - {"uint32_t", "treeflags", 0}, - {"BYTESTRING", "iname", 0}, // pathname of file - {"uint8_t", "unlink_on_close", 0}, - NULLFIELD}, IGNORE_LOG_BEGIN}, - //We do not use a TXNINFO struct since recovery log has - //FILENUMS and TOKUTXN has FTs (for open_fts) - {"xstillopen", 's', FA{{"TXNID_PAIR", "xid", 0}, - {"TXNID_PAIR", "parentxid", 0}, - {"uint64_t", "rollentry_raw_count", 0}, - {"FILENUMS", "open_filenums", 0}, - {"uint8_t", "force_fsync_on_commit", 0}, - {"uint64_t", "num_rollback_nodes", 0}, - {"uint64_t", "num_rollentries", 0}, - {"BLOCKNUM", "spilled_rollback_head", 0}, - {"BLOCKNUM", "spilled_rollback_tail", 0}, - {"BLOCKNUM", "current_rollback", 0}, - NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, // record all transactions - // prepared txns need a gid - {"xstillopenprepared", 'p', FA{{"TXNID_PAIR", "xid", 0}, - {"XIDP", "xa_xid", 0}, // prepared transactions need a gid, and have no parentxid. - {"uint64_t", "rollentry_raw_count", 0}, - {"FILENUMS", "open_filenums", 0}, - {"uint8_t", "force_fsync_on_commit", 0}, - {"uint64_t", "num_rollback_nodes", 0}, - {"uint64_t", "num_rollentries", 0}, - {"BLOCKNUM", "spilled_rollback_head", 0}, - {"BLOCKNUM", "spilled_rollback_tail", 0}, - {"BLOCKNUM", "current_rollback", 0}, - NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, // record all transactions - // Records produced by transactions - {"xbegin", 'b', FA{{"TXNID_PAIR", "xid", 0},{"TXNID_PAIR", "parentxid", 0},NULLFIELD}, IGNORE_LOG_BEGIN}, - {"xcommit",'C', FA{{"TXNID_PAIR", "xid", 0},NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, - {"xprepare",'P', FA{{"TXNID_PAIR", "xid", 0}, {"XIDP", "xa_xid", 0}, NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, - {"xabort", 'q', FA{{"TXNID_PAIR", "xid", 0},NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, - //TODO: #2037 Add dname - {"fcreate", 'F', FA{{"TXNID_PAIR", "xid", 0}, - {"FILENUM", "filenum", 0}, - {"BYTESTRING", "iname", 0}, - {"uint32_t", "mode", "0%o"}, - {"uint32_t", "treeflags", 0}, - {"uint32_t", "nodesize", 0}, - {"uint32_t", "basementnodesize", 0}, - {"uint32_t", "compression_method", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - //TODO: #2037 Add dname - {"fopen", 'O', FA{{"BYTESTRING", "iname", 0}, - {"FILENUM", "filenum", 0}, - {"uint32_t", "treeflags", 0}, - NULLFIELD}, IGNORE_LOG_BEGIN}, - //TODO: #2037 Add dname - {"fclose", 'e', FA{{"BYTESTRING", "iname", 0}, - {"FILENUM", "filenum", 0}, - NULLFIELD}, IGNORE_LOG_BEGIN}, - //TODO: #2037 Add dname - {"fdelete", 'U', FA{{"TXNID_PAIR", "xid", 0}, - {"FILENUM", "filenum", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - {"enq_insert", 'I', FA{{"FILENUM", "filenum", 0}, - {"TXNID_PAIR", "xid", 0}, - {"BYTESTRING", "key", 0}, - {"BYTESTRING", "value", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - {"enq_insert_no_overwrite", 'i', FA{{"FILENUM", "filenum", 0}, - {"TXNID_PAIR", "xid", 0}, - {"BYTESTRING", "key", 0}, - {"BYTESTRING", "value", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - {"enq_delete_any", 'E', FA{{"FILENUM", "filenum", 0}, - {"TXNID_PAIR", "xid", 0}, - {"BYTESTRING", "key", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - {"enq_insert_multiple", 'm', FA{{"FILENUM", "src_filenum", 0}, - {"FILENUMS", "dest_filenums", 0}, - {"TXNID_PAIR", "xid", 0}, - {"BYTESTRING", "src_key", 0}, - {"BYTESTRING", "src_val", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - {"enq_delete_multiple", 'M', FA{{"FILENUM", "src_filenum", 0}, - {"FILENUMS", "dest_filenums", 0}, - {"TXNID_PAIR", "xid", 0}, - {"BYTESTRING", "src_key", 0}, - {"BYTESTRING", "src_val", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - {"comment", 'T', FA{{"uint64_t", "timestamp", 0}, - {"BYTESTRING", "comment", 0}, - NULLFIELD}, IGNORE_LOG_BEGIN}, - // Note: shutdown_up_to_19 log entry is NOT ALLOWED TO BE CHANGED. - // Do not change the letter ('Q'), do not add fields, - // do not remove fields. - // TODO: Kill this logentry entirely once we no longer support version 19. - {"shutdown_up_to_19", 'Q', FA{{"uint64_t", "timestamp", 0}, - NULLFIELD}, IGNORE_LOG_BEGIN}, - // Note: Shutdown log entry is NOT ALLOWED TO BE CHANGED. - // Do not change the letter ('0'), do not add fields, - // do not remove fields. - // You CAN leave this alone and add a new one, but then you have - // to deal with the upgrade mechanism again. - // This is how we detect clean shutdowns from OLDER VERSIONS. - // This log entry must always be readable for future versions. - // If you DO change it, you need to write a separate log upgrade mechanism. - {"shutdown", '0', FA{{"uint64_t", "timestamp", 0}, - {"TXNID", "last_xid", 0}, - NULLFIELD}, IGNORE_LOG_BEGIN}, - {"load", 'l', FA{{"TXNID_PAIR", "xid", 0}, - {"FILENUM", "old_filenum", 0}, - {"BYTESTRING", "new_iname", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - // #2954 - {"hot_index", 'h', FA{{"TXNID_PAIR", "xid", 0}, - {"FILENUMS", "hot_index_filenums", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - {"enq_update", 'u', FA{{"FILENUM", "filenum", 0}, - {"TXNID_PAIR", "xid", 0}, - {"BYTESTRING", "key", 0}, - {"BYTESTRING", "extra", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - {"enq_updatebroadcast", 'B', FA{{"FILENUM", "filenum", 0}, - {"TXNID_PAIR", "xid", 0}, - {"BYTESTRING", "extra", 0}, - {"bool", "is_resetting_op", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - {"change_fdescriptor", 'D', FA{{"FILENUM", "filenum", 0}, - {"TXNID_PAIR", "xid", 0}, - {"BYTESTRING", "old_descriptor", 0}, - {"BYTESTRING", "new_descriptor", 0}, - {"bool", "update_cmp_descriptor", 0}, - NULLFIELD}, SHOULD_LOG_BEGIN}, - {0,0,FA{NULLFIELD}, (enum log_begin_action) 0} -}; - - -#define DO_STRUCTS(lt, array, body) do { \ - const struct logtype *lt; \ - for (lt=&array[0]; lt->name; lt++) { \ - body; \ - } } while (0) - -#define DO_ROLLBACKS(lt, body) DO_STRUCTS(lt, rollbacks, body) - -#define DO_LOGTYPES(lt, body) DO_STRUCTS(lt, logtypes, body) - -#define DO_LOGTYPES_AND_ROLLBACKS(lt, body) (DO_ROLLBACKS(lt,body), DO_LOGTYPES(lt, body)) - -#define DO_FIELDS(fld, lt, body) do { \ - struct field *fld; \ - for (fld=lt->fields; fld->type; fld++) { \ - body; \ - } } while (0) - - -static void __attribute__((format (printf, 3, 4))) fprintf2 (FILE *f1, FILE *f2, const char *format, ...) { - va_list ap; - int r; - va_start(ap, format); - r=vfprintf(f1, format, ap); assert(r>=0); - va_end(ap); - va_start(ap, format); - r=vfprintf(f2, format, ap); assert(r>=0); - va_end(ap); -} - -FILE *hf=0, *cf=0, *pf=0; - -static void -generate_enum_internal (const char *enum_name, const char *enum_prefix, const struct logtype *lts) { - char used_cmds[256]; - int count=0; - memset(used_cmds, 0, 256); - fprintf(hf, "enum %s {", enum_name); - DO_STRUCTS(lt, lts, - { - unsigned char cmd = (unsigned char)(lt->command_and_flags&0xff); - if (count!=0) fprintf(hf, ","); - count++; - fprintf(hf, "\n"); - fprintf(hf," %s_%-16s = '%c'", enum_prefix, lt->name, cmd); - if (used_cmds[cmd]!=0) { fprintf(stderr, "%s:%d: error: Command %d (%c) was used twice (second time for %s)\n", __FILE__, __LINE__, cmd, cmd, lt->name); abort(); } - used_cmds[cmd]=1; - }); - fprintf(hf, "\n};\n\n"); - -} - -static void -generate_enum (void) { - generate_enum_internal("lt_cmd", "LT", logtypes); - generate_enum_internal("rt_cmd", "RT", rollbacks); -} - -static void -generate_log_struct (void) { - DO_LOGTYPES(lt, - { fprintf(hf, "struct logtype_%s {\n", lt->name); - fprintf(hf, " %-16s lsn;\n", "LSN"); - DO_FIELDS(field_type, lt, - fprintf(hf, " %-16s %s;\n", field_type->type, field_type->name)); - fprintf(hf, " %-16s crc;\n", "uint32_t"); - fprintf(hf, " %-16s len;\n", "uint32_t"); - fprintf(hf, "};\n"); - //fprintf(hf, "void toku_recover_%s (LSN lsn", lt->name); - //DO_FIELDS(field_type, lt, fprintf(hf, ", %s %s", field_type->type, field_type->name)); - //fprintf(hf, ");\n"); - }); - DO_ROLLBACKS(lt, - { fprintf(hf, "struct rolltype_%s {\n", lt->name); - DO_FIELDS(field_type, lt, - fprintf(hf, " %-16s %s;\n", field_type->type, field_type->name)); - fprintf(hf, "};\n"); - fprintf(hf, "int toku_rollback_%s (", lt->name); - DO_FIELDS(field_type, lt, fprintf(hf, "%s %s,", field_type->type, field_type->name)); - fprintf(hf, "TOKUTXN txn, LSN oplsn);\n"); - fprintf(hf, "int toku_commit_%s (", lt->name); - DO_FIELDS(field_type, lt, fprintf(hf, "%s %s,", field_type->type, field_type->name)); - fprintf(hf, "TOKUTXN txn, LSN oplsn);\n"); - }); - fprintf(hf, "struct log_entry {\n"); - fprintf(hf, " enum lt_cmd cmd;\n"); - fprintf(hf, " union {\n"); - DO_LOGTYPES(lt, fprintf(hf," struct logtype_%s %s;\n", lt->name, lt->name)); - fprintf(hf, " } u;\n"); - fprintf(hf, "};\n"); - - fprintf(hf, "struct roll_entry {\n"); - fprintf(hf, " enum rt_cmd cmd;\n"); - fprintf(hf, " struct roll_entry *prev; /* for in-memory list of log entries. Threads from newest to oldest. */\n"); - fprintf(hf, " union {\n"); - DO_ROLLBACKS(lt, fprintf(hf," struct rolltype_%s %s;\n", lt->name, lt->name)); - fprintf(hf, " } u;\n"); - fprintf(hf, "};\n"); - -} - -static void -generate_dispatch (void) { - fprintf(hf, "#define rolltype_dispatch(s, funprefix) ({ switch((s)->cmd) {\\\n"); - DO_ROLLBACKS(lt, fprintf(hf, " case RT_%s: funprefix ## %s (&(s)->u.%s); break;\\\n", lt->name, lt->name, lt->name)); - fprintf(hf, " }})\n"); - - fprintf(hf, "#define logtype_dispatch_assign(s, funprefix, var, ...) do { switch((s)->cmd) {\\\n"); - DO_LOGTYPES(lt, fprintf(hf, " case LT_%s: var = funprefix ## %s (&(s)->u.%s, __VA_ARGS__); break;\\\n", lt->name, lt->name, lt->name)); - fprintf(hf, " }} while (0)\n"); - - fprintf(hf, "#define rolltype_dispatch_assign(s, funprefix, var, ...) do { \\\n"); - fprintf(hf, " switch((s)->cmd) {\\\n"); - DO_ROLLBACKS(lt, { - fprintf(hf, " case RT_%s: var = funprefix ## %s (", lt->name, lt->name); - int fieldcount=0; - DO_FIELDS(field_type, lt, { - if (fieldcount>0) fprintf(hf, ","); - fprintf(hf, "(s)->u.%s.%s", lt->name, field_type->name); - fieldcount++; - }); - fprintf(hf, ", __VA_ARGS__); break;\\\n"); - }); - fprintf(hf, " default: assert(0);} } while (0)\n"); - - fprintf(hf, "#define logtype_dispatch_args(s, funprefix, ...) do { switch((s)->cmd) {\\\n"); - DO_LOGTYPES(lt, - { - fprintf(hf, " case LT_%s: funprefix ## %s ((s)->u.%s.lsn", lt->name, lt->name, lt->name); - DO_FIELDS(field_type, lt, fprintf(hf, ",(s)->u.%s.%s", lt->name, field_type->name)); - fprintf(hf, ", __VA_ARGS__); break;\\\n"); - }); - fprintf(hf, " }} while (0)\n"); -} - -static void -generate_get_timestamp(void) { - fprintf(cf, "static uint64_t toku_get_timestamp(void) {\n"); - fprintf(cf, " struct timeval tv; int r = gettimeofday(&tv, NULL);\n"); - fprintf(cf, " assert(r==0);\n"); - fprintf(cf, " return tv.tv_sec * 1000000ULL + tv.tv_usec;\n"); - fprintf(cf, "}\n"); -} - -static void -generate_log_writer (void) { - generate_get_timestamp(); - DO_LOGTYPES(lt, { - //TODO(yoni): The overhead variables are NOT correct for BYTESTRING, FILENUMS (or any other variable length type) - // We should switch to something like using toku_logsizeof_*. - fprintf(hf, "static const size_t toku_log_%s_overhead = (+4+1+8", lt->name); - DO_FIELDS(field_type, lt, fprintf(hf, "+sizeof(%s)", field_type->type)); - fprintf(hf, "+8);\n"); - fprintf2(cf, hf, "void toku_log_%s (TOKULOGGER logger, LSN *lsnp, int do_fsync", lt->name); - switch (lt->log_begin_action) { - case SHOULD_LOG_BEGIN: - case ASSERT_BEGIN_WAS_LOGGED: { - fprintf2(cf, hf, ", TOKUTXN txn"); - break; - } - case IGNORE_LOG_BEGIN: break; - } - DO_FIELDS(field_type, lt, fprintf2(cf, hf, ", %s %s", field_type->type, field_type->name)); - fprintf(hf, ");\n"); - fprintf(cf, ") {\n"); - fprintf(cf, " if (logger == NULL) {\n"); - fprintf(cf, " return;\n"); - fprintf(cf, " }\n"); - switch (lt->log_begin_action) { - case SHOULD_LOG_BEGIN: { - fprintf(cf, " //txn can be NULL during tests\n"); - fprintf(cf, " //never null when not checkpoint.\n"); - fprintf(cf, " if (txn && !txn->begin_was_logged) {\n"); - fprintf(cf, " invariant(!txn_declared_read_only(txn));\n"); - fprintf(cf, " toku_maybe_log_begin_txn_for_write_operation(txn);\n"); - fprintf(cf, " }\n"); - break; - } - case ASSERT_BEGIN_WAS_LOGGED: { - fprintf(cf, " //txn can be NULL during tests\n"); - fprintf(cf, " invariant(!txn || txn->begin_was_logged);\n"); - fprintf(cf, " invariant(!txn || !txn_declared_read_only(txn));\n"); - break; - } - case IGNORE_LOG_BEGIN: break; - } - fprintf(cf, " if (!logger->write_log_files) {\n"); - fprintf(cf, " ml_lock(&logger->input_lock);\n"); - fprintf(cf, " logger->lsn.lsn++;\n"); - fprintf(cf, " if (lsnp) *lsnp=logger->lsn;\n"); - fprintf(cf, " ml_unlock(&logger->input_lock);\n"); - fprintf(cf, " return;\n"); - fprintf(cf, " }\n"); - fprintf(cf, " const unsigned int buflen= (+4 // len at the beginning\n"); - fprintf(cf, " +1 // log command\n"); - fprintf(cf, " +8 // lsn\n"); - DO_FIELDS(field_type, lt, - fprintf(cf, " +toku_logsizeof_%s(%s)\n", field_type->type, field_type->name)); - fprintf(cf, " +8 // crc + len\n"); - fprintf(cf, " );\n"); - fprintf(cf, " struct wbuf wbuf;\n"); - fprintf(cf, " ml_lock(&logger->input_lock);\n"); - fprintf(cf, " toku_logger_make_space_in_inbuf(logger, buflen);\n"); - fprintf(cf, " wbuf_nocrc_init(&wbuf, logger->inbuf.buf+logger->inbuf.n_in_buf, buflen);\n"); - fprintf(cf, " wbuf_nocrc_int(&wbuf, buflen);\n"); - fprintf(cf, " wbuf_nocrc_char(&wbuf, '%c');\n", (char)(0xff<->command_and_flags)); - fprintf(cf, " logger->lsn.lsn++;\n"); - fprintf(cf, " logger->inbuf.max_lsn_in_buf = logger->lsn;\n"); - fprintf(cf, " wbuf_nocrc_LSN(&wbuf, logger->lsn);\n"); - fprintf(cf, " if (lsnp) *lsnp=logger->lsn;\n"); - DO_FIELDS(field_type, lt, - if (strcmp(field_type->name, "timestamp") == 0) - fprintf(cf, " if (timestamp == 0) timestamp = toku_get_timestamp();\n"); - fprintf(cf, " wbuf_nocrc_%s(&wbuf, %s);\n", field_type->type, field_type->name)); - fprintf(cf, " wbuf_nocrc_int(&wbuf, toku_x1764_memory(wbuf.buf, wbuf.ndone));\n"); - fprintf(cf, " wbuf_nocrc_int(&wbuf, buflen);\n"); - fprintf(cf, " assert(wbuf.ndone==buflen);\n"); - fprintf(cf, " logger->inbuf.n_in_buf += buflen;\n"); - fprintf(cf, " toku_logger_maybe_fsync(logger, logger->lsn, do_fsync, true);\n"); - fprintf(cf, "}\n\n"); - }); -} - -static void -generate_log_reader (void) { - DO_LOGTYPES(lt, { - fprintf(cf, "static int toku_log_fread_%s (FILE *infile, uint32_t len1, struct logtype_%s *data, struct x1764 *checksum)", lt->name, lt->name); - fprintf(cf, " {\n"); - fprintf(cf, " int r=0;\n"); - fprintf(cf, " uint32_t actual_len=5; // 1 for the command, 4 for the first len.\n"); - fprintf(cf, " r=toku_fread_%-16s(infile, &data->%-16s, checksum, &actual_len); if (r!=0) return r;\n", "LSN", "lsn"); - DO_FIELDS(field_type, lt, - fprintf(cf, " r=toku_fread_%-16s(infile, &data->%-16s, checksum, &actual_len); if (r!=0) return r;\n", field_type->type, field_type->name)); - fprintf(cf, " uint32_t checksum_in_file, len_in_file;\n"); - fprintf(cf, " r=toku_fread_uint32_t_nocrclen(infile, &checksum_in_file); actual_len+=4; if (r!=0) return r;\n"); - fprintf(cf, " r=toku_fread_uint32_t_nocrclen(infile, &len_in_file); actual_len+=4; if (r!=0) return r;\n"); - fprintf(cf, " if (checksum_in_file!=toku_x1764_finish(checksum) || len_in_file!=actual_len || len1 != len_in_file) return DB_BADFORMAT;\n"); - fprintf(cf, " return 0;\n"); - fprintf(cf, "}\n\n"); - }); - fprintf2(cf, hf, "int toku_log_fread (FILE *infile, struct log_entry *le)"); - fprintf(hf, ";\n"); - fprintf(cf, " {\n"); - fprintf(cf, " uint32_t len1; int r;\n"); - fprintf(cf, " uint32_t ignorelen=0;\n"); - fprintf(cf, " struct x1764 checksum;\n"); - fprintf(cf, " toku_x1764_init(&checksum);\n"); - fprintf(cf, " r = toku_fread_uint32_t(infile, &len1, &checksum, &ignorelen); if (r!=0) return r;\n"); - fprintf(cf, " int cmd=fgetc(infile);\n"); - fprintf(cf, " if (cmd==EOF) return EOF;\n"); - fprintf(cf, " char cmdchar = (char)cmd;\n"); - fprintf(cf, " toku_x1764_add(&checksum, &cmdchar, 1);\n"); - fprintf(cf, " le->cmd=(enum lt_cmd)cmd;\n"); - fprintf(cf, " switch ((enum lt_cmd)cmd) {\n"); - DO_LOGTYPES(lt, { - fprintf(cf, " case LT_%s:\n", lt->name); - fprintf(cf, " return toku_log_fread_%s (infile, len1, &le->u.%s, &checksum);\n", lt->name, lt->name); - }); - fprintf(cf, " };\n"); - fprintf(cf, " return DB_BADFORMAT;\n"); // Should read past the record using the len field. - fprintf(cf, "}\n\n"); - //fprintf2(cf, hf, "// Return 0 if there is something to read, return -1 if nothing to read, abort if an error.\n"); - fprintf2(cf, hf, "// Return 0 if there is something to read, -1 if nothing to read, >0 on error\n"); - fprintf2(cf, hf, "int toku_log_fread_backward (FILE *infile, struct log_entry *le)"); - fprintf(hf, ";\n"); - fprintf(cf, "{\n"); - fprintf(cf, " memset(le, 0, sizeof(*le));\n"); - fprintf(cf, " long pos = ftell(infile);\n"); - fprintf(cf, " if (pos<=12) return -1;\n"); - fprintf(cf, " int r = fseek(infile, -4, SEEK_CUR); \n");// assert(r==0);\n"); - fprintf(cf, " if (r!=0) return get_error_errno();\n"); - fprintf(cf, " uint32_t len;\n"); - fprintf(cf, " r = toku_fread_uint32_t_nocrclen(infile, &len); \n");// assert(r==0);\n"); - fprintf(cf, " if (r!=0) return 1;\n"); - fprintf(cf, " r = fseek(infile, -(int)len, SEEK_CUR) ; \n");// assert(r==0);\n"); - fprintf(cf, " if (r!=0) return get_error_errno();\n"); - fprintf(cf, " r = toku_log_fread(infile, le); \n");// assert(r==0);\n"); - fprintf(cf, " if (r!=0) return 1;\n"); - fprintf(cf, " long afterpos = ftell(infile);\n"); - fprintf(cf, " if (afterpos != pos) return 1;\n"); - fprintf(cf, " r = fseek(infile, -(int)len, SEEK_CUR); \n");// assert(r==0);\n"); - fprintf(cf, " if (r!=0) return get_error_errno();\n"); - fprintf(cf, " return 0;\n"); - fprintf(cf, "}\n\n"); - - DO_LOGTYPES(lt, ({ - fprintf(cf, "static void toku_log_free_log_entry_%s_resources (struct logtype_%s *data", lt->name, lt->name); - if (!lt->fields->type) fprintf(cf, " __attribute__((__unused__))"); - fprintf(cf, ") {\n"); - DO_FIELDS(field_type, lt, - fprintf(cf, " toku_free_%s(data->%s);\n", field_type->type, field_type->name); - ); - fprintf(cf, "}\n\n"); - })); - fprintf2(cf, hf, "void toku_log_free_log_entry_resources (struct log_entry *le)"); - fprintf(hf, ";\n"); - fprintf(cf, " {\n"); - fprintf(cf, " switch ((enum lt_cmd)le->cmd) {\n"); - DO_LOGTYPES(lt, { - fprintf(cf, " case LT_%s:\n", lt->name); - fprintf(cf, " return toku_log_free_log_entry_%s_resources (&(le->u.%s));\n", lt->name, lt->name); - }); - fprintf(cf, " };\n"); - fprintf(cf, " return;\n"); - fprintf(cf, "}\n\n"); -} - -static void -generate_logprint (void) { - unsigned maxnamelen=0; - fprintf2(pf, hf, "int toku_logprint_one_record(FILE *outf, FILE *f)"); - fprintf(hf, ";\n"); - fprintf(pf, " {\n"); - fprintf(pf, " int cmd, r;\n"); - fprintf(pf, " uint32_t len1, crc_in_file;\n"); - fprintf(pf, " uint32_t ignorelen=0;\n"); - fprintf(pf, " struct x1764 checksum;\n"); - fprintf(pf, " toku_x1764_init(&checksum);\n"); - fprintf(pf, " r=toku_fread_uint32_t(f, &len1, &checksum, &ignorelen);\n"); - fprintf(pf, " if (r==EOF) return EOF;\n"); - fprintf(pf, " cmd=fgetc(f);\n"); - fprintf(pf, " if (cmd==EOF) return DB_BADFORMAT;\n"); - fprintf(pf, " uint32_t len_in_file, len=1+4; // cmd + len1\n"); - fprintf(pf, " char charcmd = (char)cmd;\n"); - fprintf(pf, " toku_x1764_add(&checksum, &charcmd, 1);\n"); - fprintf(pf, " switch ((enum lt_cmd)cmd) {\n"); - DO_LOGTYPES(lt, { if (strlen(lt->name)>maxnamelen) maxnamelen=strlen(lt->name); }); - DO_LOGTYPES(lt, { - unsigned char cmd = (unsigned char)(0xff<->command_and_flags); - fprintf(pf, " case LT_%s: \n", lt->name); - // We aren't using the log reader here because we want better diagnostics as soon as things go wrong. - fprintf(pf, " fprintf(outf, \"%%-%us \", \"%s\");\n", maxnamelen, lt->name); - if (isprint(cmd)) fprintf(pf," fprintf(outf, \" '%c':\");\n", cmd); - else fprintf(pf," fprintf(outf, \"0%03o:\");\n", cmd); - fprintf(pf, " r = toku_logprint_%-16s(outf, f, \"lsn\", &checksum, &len, 0); if (r!=0) return r;\n", "LSN"); - DO_FIELDS(field_type, lt, { - fprintf(pf, " r = toku_logprint_%-16s(outf, f, \"%s\", &checksum, &len,", field_type->type, field_type->name); - if (field_type->format) fprintf(pf, "\"%s\"", field_type->format); - else fprintf(pf, "0"); - fprintf(pf, "); if (r!=0) return r;\n"); - }); - fprintf(pf, " {\n"); - fprintf(pf, " uint32_t actual_murmur = toku_x1764_finish(&checksum);\n"); - fprintf(pf, " r = toku_fread_uint32_t_nocrclen (f, &crc_in_file); len+=4; if (r!=0) return r;\n"); - fprintf(pf, " fprintf(outf, \" crc=%%08x\", crc_in_file);\n"); - fprintf(pf, " if (crc_in_file!=actual_murmur) fprintf(outf, \" checksum=%%08x\", actual_murmur);\n"); - fprintf(pf, " r = toku_fread_uint32_t_nocrclen (f, &len_in_file); len+=4; if (r!=0) return r;\n"); - fprintf(pf, " fprintf(outf, \" len=%%u\", len_in_file);\n"); - fprintf(pf, " if (len_in_file!=len) fprintf(outf, \" actual_len=%%u\", len);\n"); - fprintf(pf, " if (len_in_file!=len || crc_in_file!=actual_murmur) return DB_BADFORMAT;\n"); - fprintf(pf, " };\n"); - fprintf(pf, " fprintf(outf, \"\\n\");\n"); - fprintf(pf, " return 0;\n\n"); - }); - fprintf(pf, " }\n"); - fprintf(pf, " fprintf(outf, \"Unknown command %%d ('%%c')\", cmd, cmd);\n"); - fprintf(pf, " return DB_BADFORMAT;\n"); - fprintf(pf, "}\n\n"); -} - -static void -generate_rollbacks (void) { - DO_ROLLBACKS(lt, { - fprintf2(cf, hf, "void toku_logger_save_rollback_%s (TOKUTXN txn", lt->name); - DO_FIELDS(field_type, lt, { - if ( strcmp(field_type->type, "BYTESTRING") == 0 ) { - fprintf2(cf, hf, ", BYTESTRING *%s_ptr", field_type->name); - } - else if ( strcmp(field_type->type, "FILENUMS") == 0 ) { - fprintf2(cf, hf, ", FILENUMS *%s_ptr", field_type->name); - } - else { - fprintf2(cf, hf, ", %s %s", field_type->type, field_type->name); - } - }); - - fprintf(hf, ");\n"); - fprintf(cf, ") {\n"); - fprintf(cf, " toku_txn_lock(txn);\n"); - fprintf(cf, " ROLLBACK_LOG_NODE log;\n"); - fprintf(cf, " toku_get_and_pin_rollback_log_for_new_entry(txn, &log);\n"); - // 'memdup' all BYTESTRINGS here - DO_FIELDS(field_type, lt, { - if ( strcmp(field_type->type, "BYTESTRING") == 0 ) { - fprintf(cf, " BYTESTRING %s = {\n" - " .len = %s_ptr->len,\n" - " .data = cast_to_typeof(%s.data) toku_memdup_in_rollback(log, %s_ptr->data, %s_ptr->len)\n" - " };\n", - field_type->name, field_type->name, field_type->name, field_type->name, field_type->name); - } - if ( strcmp(field_type->type, "FILENUMS") == 0 ) { - fprintf(cf, " FILENUMS %s = {\n" - " .num = %s_ptr->num,\n" - " .filenums = cast_to_typeof(%s.filenums) toku_memdup_in_rollback(log, %s_ptr->filenums, %s_ptr->num * (sizeof (FILENUM)))\n" - " };\n", - field_type->name, field_type->name, field_type->name, field_type->name, field_type->name); - } - }); - { - int count=0; - fprintf(cf, " uint32_t rollback_fsize = toku_logger_rollback_fsize_%s(", lt->name); - DO_FIELDS(field_type, lt, fprintf(cf, "%s%s", (count++>0)?", ":"", field_type->name)); - fprintf(cf, ");\n"); - } - fprintf(cf, " struct roll_entry *v;\n"); - fprintf(cf, " size_t mem_needed = sizeof(v->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name); - fprintf(cf, " CAST_FROM_VOIDP(v, toku_malloc_in_rollback(log, mem_needed));\n"); - fprintf(cf, " assert(v);\n"); - fprintf(cf, " v->cmd = (enum rt_cmd)%u;\n", lt->command_and_flags&0xff); - DO_FIELDS(field_type, lt, fprintf(cf, " v->u.%s.%s = %s;\n", lt->name, field_type->name, field_type->name)); - fprintf(cf, " v->prev = log->newest_logentry;\n"); - fprintf(cf, " if (log->oldest_logentry==NULL) log->oldest_logentry=v;\n"); - fprintf(cf, " log->newest_logentry = v;\n"); - fprintf(cf, " log->rollentry_resident_bytecount += rollback_fsize;\n"); - fprintf(cf, " txn->roll_info.rollentry_raw_count += rollback_fsize;\n"); - fprintf(cf, " txn->roll_info.num_rollentries++;\n"); - fprintf(cf, " log->dirty = true;\n"); - fprintf(cf, " // spill and unpin assert success internally\n"); - fprintf(cf, " toku_maybe_spill_rollbacks(txn, log);\n"); - fprintf(cf, " toku_rollback_log_unpin(txn, log);\n"); - fprintf(cf, " toku_txn_unlock(txn);\n"); - fprintf(cf, "}\n"); - }); - - DO_ROLLBACKS(lt, { - fprintf2(cf, hf, "void toku_logger_rollback_wbuf_nocrc_write_%s (struct wbuf *wbuf", lt->name); - DO_FIELDS(field_type, lt, fprintf2(cf, hf, ", %s %s", field_type->type, field_type->name)); - fprintf2(cf, hf, ")"); - fprintf(hf, ";\n"); - fprintf(cf, " {\n"); - - { - int count=0; - fprintf(cf, " uint32_t rollback_fsize = toku_logger_rollback_fsize_%s(", lt->name); - DO_FIELDS(field_type, lt, fprintf(cf, "%s%s", (count++>0)?", ":"", field_type->name)); - fprintf(cf, ");\n"); - fprintf(cf, " wbuf_nocrc_int(wbuf, rollback_fsize);\n"); - } - fprintf(cf, " wbuf_nocrc_char(wbuf, '%c');\n", (char)(0xff<->command_and_flags)); - DO_FIELDS(field_type, lt, fprintf(cf, " wbuf_nocrc_%s(wbuf, %s);\n", field_type->type, field_type->name)); - fprintf(cf, "}\n"); - }); - fprintf2(cf, hf, "void toku_logger_rollback_wbuf_nocrc_write (struct wbuf *wbuf, struct roll_entry *r)"); - fprintf(hf, ";\n"); - fprintf(cf, " {\n switch (r->cmd) {\n"); - DO_ROLLBACKS(lt, { - fprintf(cf, " case RT_%s: toku_logger_rollback_wbuf_nocrc_write_%s(wbuf", lt->name, lt->name); - DO_FIELDS(field_type, lt, fprintf(cf, ", r->u.%s.%s", lt->name, field_type->name)); - fprintf(cf, "); return;\n"); - }); - fprintf(cf, " }\n assert(0);\n"); - fprintf(cf, "}\n"); - DO_ROLLBACKS(lt, { - fprintf2(cf, hf, "uint32_t toku_logger_rollback_fsize_%s (", lt->name); - int count=0; - DO_FIELDS(field_type, lt, fprintf2(cf, hf, "%s%s %s", (count++>0)?", ":"", field_type->type, field_type->name)); - fprintf(hf, ");\n"); - fprintf(cf, ") {\n"); - fprintf(cf, " return 1 /* the cmd*/\n"); - fprintf(cf, " + 4 /* the int at the end saying the size */"); - DO_FIELDS(field_type, lt, - fprintf(cf, "\n + toku_logsizeof_%s(%s)", field_type->type, field_type->name)); - fprintf(cf, ";\n}\n"); - }); - fprintf2(cf, hf, "uint32_t toku_logger_rollback_fsize(struct roll_entry *item)"); - fprintf(hf, ";\n"); - fprintf(cf, "{\n switch(item->cmd) {\n"); - DO_ROLLBACKS(lt, { - fprintf(cf, " case RT_%s: return toku_logger_rollback_fsize_%s(", lt->name, lt->name); - int count=0; - DO_FIELDS(field_type, lt, fprintf(cf, "%sitem->u.%s.%s", (count++>0)?", ":"", lt->name, field_type->name)); - fprintf(cf, ");\n"); - }); - fprintf(cf, " }\n assert(0);\n return 0;\n"); - fprintf(cf, "}\n"); - - fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, MEMARENA ma)"); - fprintf(hf, ";\n"); - fprintf(cf, " {\n assert(n_bytes>0);\n struct roll_entry *item;\n enum rt_cmd cmd = (enum rt_cmd)(buf[0]);\n size_t mem_needed;\n"); - fprintf(cf, " struct rbuf rc = {buf, n_bytes, 1};\n"); - fprintf(cf, " switch(cmd) {\n"); - DO_ROLLBACKS(lt, { - fprintf(cf, " case RT_%s:\n", lt->name); - fprintf(cf, " mem_needed = sizeof(item->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name); - fprintf(cf, " CAST_FROM_VOIDP(item, toku_memarena_malloc(ma, mem_needed));\n"); - fprintf(cf, " item->cmd = cmd;\n"); - DO_FIELDS(field_type, lt, fprintf(cf, " rbuf_ma_%s(&rc, ma, &item->u.%s.%s);\n", field_type->type, lt->name, field_type->name)); - fprintf(cf, " *itemp = item;\n"); - fprintf(cf, " return 0;\n"); - }); - fprintf(cf, " }\n return EINVAL;\n}\n"); -} - -static void -generate_log_entry_functions(void) { - fprintf(hf, "LSN toku_log_entry_get_lsn(struct log_entry *);\n"); - fprintf(cf, "LSN toku_log_entry_get_lsn(struct log_entry *le) {\n"); - fprintf(cf, " return le->u.begin_checkpoint.lsn;\n"); - fprintf(cf, "}\n"); -} - -const char codefile[] = "log_code.cc"; -const char printfile[] = "log_print.cc"; -const char headerfile[] = "log_header.h"; -int main (int argc, const char *const argv[]) { - assert(argc==2); // the single argument is the directory into which to put things - const char *dir = argv[1]; - size_t codepathlen = sizeof(codefile) + strlen(dir) + 4; - size_t printpathlen = sizeof(printfile) + strlen(dir) + 4; - size_t headerpathlen = sizeof(headerfile) + strlen(dir) + 4; - char codepath[codepathlen]; - char printpath[printpathlen]; - char headerpath[headerpathlen]; - { int r = snprintf(codepath, codepathlen, "%s/%s", argv[1], codefile); assert(r<(int)codepathlen); } - { int r = snprintf(printpath, printpathlen, "%s/%s", argv[1], printfile); assert(r<(int)printpathlen); } - { int r = snprintf(headerpath, headerpathlen, "%s/%s", argv[1], headerfile); assert(r<(int)headerpathlen); } - chmod(codepath, S_IRUSR|S_IWUSR); - chmod(headerpath, S_IRUSR|S_IWUSR); - unlink(codepath); - unlink(headerpath); - cf = fopen(codepath, "w"); - if (cf==0) { int r = get_error_errno(); printf("fopen of %s failed because of errno=%d (%s)\n", codepath, r, strerror(r)); } // sometimes this is failing, so let's make a better diagnostic - assert(cf!=0); - hf = fopen(headerpath, "w"); assert(hf!=0); - pf = fopen(printpath, "w"); assert(pf!=0); - fprintf2(cf, hf, "/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */\n"); - fprintf2(cf, hf, "// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:\n"); - fprintf(hf, "#ifndef LOG_HEADER_H\n"); - fprintf(hf, "#define LOG_HEADER_H\n"); - fprintf2(cf, hf, "/* Do not edit this file. This code generated by logformat.c. Copyright (c) 2007-2013 Tokutek Inc. */\n"); - fprintf2(cf, hf, "#ident \"Copyright (c) 2007-2013 Tokutek Inc. All rights reserved.\"\n"); - fprintf2(cf, pf, "#include \n"); - fprintf2(cf, pf, "#include \n"); - fprintf2(cf, pf, "#include \n"); - fprintf2(cf, pf, "#include \n"); - fprintf(hf, "#include \n"); - fprintf(hf, "#include \n"); - generate_enum(); - generate_log_struct(); - generate_dispatch(); - generate_log_writer(); - generate_log_reader(); - generate_rollbacks(); - generate_log_entry_functions(); - generate_logprint(); - fprintf(hf, "#endif\n"); - { - int r=fclose(hf); assert(r==0); - r=fclose(cf); assert(r==0); - r=fclose(pf); assert(r==0); - // Make it tougher to modify by mistake - chmod(codepath, S_IRUSR|S_IRGRP|S_IROTH); - chmod(headerpath, S_IRUSR|S_IRGRP|S_IROTH); - } - return 0; -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logcursor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logcursor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logcursor.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logcursor.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,551 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "log-internal.h" +#include "logger/logcursor.h" +#include +#include + +enum lc_direction { LC_FORWARD, LC_BACKWARD, LC_FIRST, LC_LAST }; + +struct toku_logcursor { + char *logdir; // absolute directory name + char **logfiles; + int n_logfiles; + int cur_logfiles_index; + FILE *cur_fp; + size_t buffer_size; + void *buffer; + bool is_open; + struct log_entry entry; + bool entry_valid; + LSN cur_lsn; + enum lc_direction last_direction; +}; + +#define LC_LSN_ERROR (DB_RUNRECOVERY) + +void toku_logcursor_print(TOKULOGCURSOR lc) { + printf("lc = %p\n", lc); + printf(" logdir = %s\n", lc->logdir); + printf(" logfiles = %p\n", lc->logfiles); + for (int lf=0;lfn_logfiles;lf++) { + printf(" logfile[%d] = %p (%s)\n", lf, lc->logfiles[lf], lc->logfiles[lf]); + } + printf(" n_logfiles = %d\n", lc->n_logfiles); + printf(" cur_logfiles_index = %d\n", lc->cur_logfiles_index); + printf(" cur_fp = %p\n", lc->cur_fp); + printf(" cur_lsn = %" PRIu64 "\n", lc->cur_lsn.lsn); + printf(" last_direction = %d\n", (int) lc->last_direction); +} + +static int lc_close_cur_logfile(TOKULOGCURSOR lc) { + int r=0; + if ( lc->is_open ) { + r = fclose(lc->cur_fp); + assert(0==r); + lc->is_open = false; + } + return 0; +} + +static toku_off_t lc_file_len(const char *name) { + toku_struct_stat buf; + int r = toku_stat(name, &buf); + assert(r == 0); + return buf.st_size; +} + +// Cat the file and throw away the contents. This brings the file into the file system cache +// and makes subsequent accesses to it fast. The intention is to speed up backward scans of the +// file. +static void lc_catfile(const char *fname, void *buffer, size_t buffer_size) { + int fd = open(fname, O_RDONLY); + if (fd >= 0) { + while (1) { + ssize_t r = read(fd, buffer, buffer_size); + if ((int)r <= 0) + break; + } + close(fd); + } +} + +static int lc_open_logfile(TOKULOGCURSOR lc, int index) { + int r=0; + assert( !lc->is_open ); + if( index == -1 || index >= lc->n_logfiles) return DB_NOTFOUND; + lc_catfile(lc->logfiles[index], lc->buffer, lc->buffer_size); + lc->cur_fp = fopen(lc->logfiles[index], "rb"); + if ( lc->cur_fp == NULL ) + return DB_NOTFOUND; + r = setvbuf(lc->cur_fp, (char *) lc->buffer, _IOFBF, lc->buffer_size); + assert(r == 0); + // position fp past header, ignore 0 length file (t:2384) + unsigned int version=0; + if ( lc_file_len(lc->logfiles[index]) >= 12 ) { + r = toku_read_logmagic(lc->cur_fp, &version); + if (r!=0) + return DB_BADFORMAT; + if (version < TOKU_LOG_MIN_SUPPORTED_VERSION || version > TOKU_LOG_VERSION) + return DB_BADFORMAT; + } + // mark as open + lc->is_open = true; + return r; +} + +static int lc_check_lsn(TOKULOGCURSOR lc, int dir) { + int r=0; + LSN lsn = toku_log_entry_get_lsn(&(lc->entry)); + if (((dir == LC_FORWARD) && ( lsn.lsn != lc->cur_lsn.lsn + 1 )) || + ((dir == LC_BACKWARD) && ( lsn.lsn != lc->cur_lsn.lsn - 1 ))) { +// int index = lc->cur_logfiles_index; +// fprintf(stderr, "Bad LSN: %d %s direction = %d, lsn.lsn = %" PRIu64 ", cur_lsn.lsn=%" PRIu64 "\n", +// index, lc->logfiles[index], dir, lsn.lsn, lc->cur_lsn.lsn); + if (tokuft_recovery_trace) + printf("DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, 0); + return LC_LSN_ERROR; + } + lc->cur_lsn.lsn = lsn.lsn; + return r; +} + +// toku_logcursor_create() +// - returns a pointer to a logcursor + +static int lc_create(TOKULOGCURSOR *lc, const char *log_dir) { + + // malloc a cursor + TOKULOGCURSOR cursor = (TOKULOGCURSOR) toku_xmalloc(sizeof(struct toku_logcursor)); + // find logfiles in logdir + cursor->is_open = false; + cursor->cur_logfiles_index = 0; + cursor->entry_valid = false; + cursor->buffer_size = 1<<20; // use a 1MB stream buffer (setvbuf) + cursor->buffer = toku_malloc(cursor->buffer_size); // it does not matter if it failes + // cursor->logdir must be an absolute path + if (toku_os_is_absolute_name(log_dir)) { + cursor->logdir = (char *) toku_xmalloc(strlen(log_dir)+1); + sprintf(cursor->logdir, "%s", log_dir); + } else { + char cwdbuf[PATH_MAX]; + char *cwd = getcwd(cwdbuf, PATH_MAX); + assert(cwd); + cursor->logdir = (char *) toku_xmalloc(strlen(cwd)+strlen(log_dir)+2); + sprintf(cursor->logdir, "%s/%s", cwd, log_dir); + } + cursor->logfiles = NULL; + cursor->n_logfiles = 0; + cursor->cur_fp = NULL; + cursor->cur_lsn.lsn=0; + cursor->last_direction=LC_FIRST; + + *lc = cursor; + return 0; +} + +static int lc_fix_bad_logfile(TOKULOGCURSOR lc); + +int toku_logcursor_create(TOKULOGCURSOR *lc, const char *log_dir) { + TOKULOGCURSOR cursor; + int r = lc_create(&cursor, log_dir); + if ( r!=0 ) + return r; + + r = toku_logger_find_logfiles(cursor->logdir, &(cursor->logfiles), &(cursor->n_logfiles)); + if (r!=0) { + toku_logcursor_destroy(&cursor); + } else { + *lc = cursor; + } + return r; +} + +int toku_logcursor_create_for_file(TOKULOGCURSOR *lc, const char *log_dir, const char *log_file) { + int r = lc_create(lc, log_dir); + if ( r!=0 ) + return r; + + TOKULOGCURSOR cursor = *lc; + int fullnamelen = strlen(cursor->logdir) + strlen(log_file) + 3; + char *XMALLOC_N(fullnamelen, log_file_fullname); + sprintf(log_file_fullname, "%s/%s", cursor->logdir, log_file); + + cursor->n_logfiles=1; + + char **XMALLOC(logfiles); + cursor->logfiles = logfiles; + cursor->logfiles[0] = log_file_fullname; + *lc = cursor; + return 0; +} + +int toku_logcursor_destroy(TOKULOGCURSOR *lc) { + int r=0; + if ( *lc ) { + if ( (*lc)->entry_valid ) { + toku_log_free_log_entry_resources(&((*lc)->entry)); + (*lc)->entry_valid = false; + } + r = lc_close_cur_logfile(*lc); + toku_logger_free_logfiles((*lc)->logfiles, (*lc)->n_logfiles); + if ( (*lc)->logdir ) toku_free((*lc)->logdir); + if ( (*lc)->buffer ) toku_free((*lc)->buffer); + toku_free(*lc); + *lc = NULL; + } + return r; +} + +static int lc_log_read(TOKULOGCURSOR lc) +{ + int r = toku_log_fread(lc->cur_fp, &(lc->entry)); + while ( r == EOF ) { + // move to next file + r = lc_close_cur_logfile(lc); + if (r!=0) return r; + if ( lc->cur_logfiles_index == lc->n_logfiles-1) return DB_NOTFOUND; + lc->cur_logfiles_index++; + r = lc_open_logfile(lc, lc->cur_logfiles_index); + if (r!=0) return r; + r = toku_log_fread(lc->cur_fp, &(lc->entry)); + } + if (r!=0) { + toku_log_free_log_entry_resources(&(lc->entry)); + time_t tnow = time(NULL); + if (r==DB_BADFORMAT) { + fprintf(stderr, "%.24s TokuFT bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]); + } + else { + fprintf(stderr, "%.24s TokuFT unexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]); + } + } + return r; +} + +static int lc_log_read_backward(TOKULOGCURSOR lc) +{ + int r = toku_log_fread_backward(lc->cur_fp, &(lc->entry)); + while ( -1 == r) { // if within header length of top of file + // move to previous file + r = lc_close_cur_logfile(lc); + if (r!=0) + return r; + if ( lc->cur_logfiles_index == 0 ) + return DB_NOTFOUND; + lc->cur_logfiles_index--; + r = lc_open_logfile(lc, lc->cur_logfiles_index); + if (r!=0) + return r; + // seek to end + r = fseek(lc->cur_fp, 0, SEEK_END); + assert(0==r); + r = toku_log_fread_backward(lc->cur_fp, &(lc->entry)); + } + if (r!=0) { + toku_log_free_log_entry_resources(&(lc->entry)); + time_t tnow = time(NULL); + if (r==DB_BADFORMAT) { + fprintf(stderr, "%.24s TokuFT bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]); + } + else { + fprintf(stderr, "%.24s TokuFT uUnexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]); + } + } + return r; +} + +int toku_logcursor_next(TOKULOGCURSOR lc, struct log_entry **le) { + int r=0; + if ( lc->entry_valid ) { + toku_log_free_log_entry_resources(&(lc->entry)); + lc->entry_valid = false; + if (lc->last_direction == LC_BACKWARD) { + struct log_entry junk; + r = toku_log_fread(lc->cur_fp, &junk); + assert(r == 0); + toku_log_free_log_entry_resources(&junk); + } + } else { + r = toku_logcursor_first(lc, le); + return r; + } + // read the entry + r = lc_log_read(lc); + if (r!=0) return r; + r = lc_check_lsn(lc, LC_FORWARD); + if (r!=0) return r; + lc->last_direction = LC_FORWARD; + lc->entry_valid = true; + *le = &(lc->entry); + return r; +} + +int toku_logcursor_prev(TOKULOGCURSOR lc, struct log_entry **le) { + int r=0; + if ( lc->entry_valid ) { + toku_log_free_log_entry_resources(&(lc->entry)); + lc->entry_valid = false; + if (lc->last_direction == LC_FORWARD) { + struct log_entry junk; + r = toku_log_fread_backward(lc->cur_fp, &junk); + assert(r == 0); + toku_log_free_log_entry_resources(&junk); + } + } else { + r = toku_logcursor_last(lc, le); + return r; + } + // read the entry + r = lc_log_read_backward(lc); + if (r!=0) return r; + r = lc_check_lsn(lc, LC_BACKWARD); + if (r!=0) return r; + lc->last_direction = LC_BACKWARD; + lc->entry_valid = true; + *le = &(lc->entry); + return r; +} + +int toku_logcursor_first(TOKULOGCURSOR lc, struct log_entry **le) { + int r=0; + if ( lc->entry_valid ) { + toku_log_free_log_entry_resources(&(lc->entry)); + lc->entry_valid = false; + } + // close any but the first log file + if ( lc->cur_logfiles_index != 0 ) { + lc_close_cur_logfile(lc); + } + // open first log file if needed + if ( !lc->is_open ) { + r = lc_open_logfile(lc, 0); + if (r!=0) + return r; + lc->cur_logfiles_index = 0; + } + // read the entry + r = lc_log_read(lc); + if (r!=0) return r; + + r = lc_check_lsn(lc, LC_FIRST); + if (r!=0) return r; + lc->last_direction = LC_FIRST; + lc->entry_valid = true; + *le = &(lc->entry); + return r; +} + +//get last entry in the logfile specified by logcursor +int toku_logcursor_last(TOKULOGCURSOR lc, struct log_entry **le) { + int r=0; + if ( lc->entry_valid ) { + toku_log_free_log_entry_resources(&(lc->entry)); + lc->entry_valid = false; + } + // close any but last log file + if ( lc->cur_logfiles_index != lc->n_logfiles-1 ) { + lc_close_cur_logfile(lc); + } + // open last log file if needed + if ( !lc->is_open ) { + r = lc_open_logfile(lc, lc->n_logfiles-1); + if (r!=0) + return r; + lc->cur_logfiles_index = lc->n_logfiles-1; + } + while (1) { + // seek to end + r = fseek(lc->cur_fp, 0, SEEK_END); assert(r==0); + // read backward + r = toku_log_fread_backward(lc->cur_fp, &(lc->entry)); + if (r==0) // got a good entry + break; + if (r>0) { + toku_log_free_log_entry_resources(&(lc->entry)); + // got an error, + // probably a corrupted last log entry due to a crash + // try scanning forward from the beginning to find the last good entry + time_t tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery repairing log\n", ctime(&tnow)); + r = lc_fix_bad_logfile(lc); + if ( r != 0 ) { + fprintf(stderr, "%.24s TokuFT recovery repair unsuccessful\n", ctime(&tnow)); + return DB_BADFORMAT; + } + // try reading again + r = toku_log_fread_backward(lc->cur_fp, &(lc->entry)); + if (r==0) // got a good entry + break; + } + // move to previous file + r = lc_close_cur_logfile(lc); + if (r!=0) + return r; + if ( lc->cur_logfiles_index == 0 ) + return DB_NOTFOUND; + lc->cur_logfiles_index--; + r = lc_open_logfile(lc, lc->cur_logfiles_index); + if (r!=0) + return r; + } + r = lc_check_lsn(lc, LC_LAST); + if (r!=0) + return r; + lc->last_direction = LC_LAST; + lc->entry_valid = true; + *le = &(lc->entry); + return r; +} + +// return 0 if log exists, ENOENT if no log +int +toku_logcursor_log_exists(const TOKULOGCURSOR lc) { + int r; + + if (lc->n_logfiles) + r = 0; + else + r = ENOENT; + + return r; +} + +// fix a logfile with a bad last entry +// - return with fp pointing to end-of-file so that toku_logcursor_last can be retried +static int lc_fix_bad_logfile(TOKULOGCURSOR lc) { + struct log_entry le; + unsigned int version=0; + int r = 0; + + r = fseek(lc->cur_fp, 0, SEEK_SET); + if ( r!=0 ) + return r; + r = toku_read_logmagic(lc->cur_fp, &version); + if ( r!=0 ) + return r; + if (version != TOKU_LOG_VERSION) + return -1; + + toku_off_t last_good_pos; + last_good_pos = ftello(lc->cur_fp); + while (1) { + // initialize le + // - reading incomplete entries can result in fields that cannot be freed + memset(&le, 0, sizeof(le)); + r = toku_log_fread(lc->cur_fp, &le); + toku_log_free_log_entry_resources(&le); + if ( r!=0 ) + break; + last_good_pos = ftello(lc->cur_fp); + } + // now have position of last good entry + // 1) close the file + // 2) truncate the file to remove the error + // 3) reopen the file + // 4) set the pos to last + r = lc_close_cur_logfile(lc); + if ( r!=0 ) + return r; + r = truncate(lc->logfiles[lc->n_logfiles - 1], last_good_pos); + if ( r!=0 ) + return r; + r = lc_open_logfile(lc, lc->n_logfiles-1); + if ( r!=0 ) + return r; + r = fseek(lc->cur_fp, 0, SEEK_END); + if ( r!=0 ) + return r; + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logcursor.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logcursor.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logcursor.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logcursor.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,128 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +struct toku_logcursor; +typedef struct toku_logcursor *TOKULOGCURSOR; + +// All routines return 0 on success + +// toku_logcursor_create() +// - creates a logcursor (lc) +// - following toku_logcursor_create() +// if toku_logcursor_next() is called, it returns the first entry in the log +// if toku_logcursor_prev() is called, it returns the last entry in the log +int toku_logcursor_create(TOKULOGCURSOR *lc, const char *log_dir); +// toku_logcursor_create_for_file() +// - creates a logcusor (lc) that only knows about the file log_file +int toku_logcursor_create_for_file(TOKULOGCURSOR *lc, const char *log_dir, const char *log_file); +// toku_logcursor_destroy() +// - frees all resources associated with the logcursor, including the log_entry +// associated with the latest cursor action +int toku_logcursor_destroy(TOKULOGCURSOR *lc); + +// toku_logcursor_[next,prev,first,last] take care of malloc'ing and free'ing log_entrys. +// - routines NULL out the **le pointers on entry, then set the **le pointers to +// the malloc'ed entries when successful, +int toku_logcursor_next(TOKULOGCURSOR lc, struct log_entry **le); +int toku_logcursor_prev(TOKULOGCURSOR lc, struct log_entry **le); + +int toku_logcursor_first(const TOKULOGCURSOR lc, struct log_entry **le); +int toku_logcursor_last(const TOKULOGCURSOR lc, struct log_entry **le); + +// return 0 if log exists, ENOENT if no log +int toku_logcursor_log_exists(const TOKULOGCURSOR lc); + +void toku_logcursor_print(TOKULOGCURSOR lc); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logfilemgr.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logfilemgr.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logfilemgr.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logfilemgr.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,259 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "logger/log-internal.h" +#include "logger/logcursor.h" +#include "logger/logfilemgr.h" + +// for now, implement with singlely-linked-list +// first = oldest (delete from beginning) +// last = newest (add to end) + +struct lfm_entry { + TOKULOGFILEINFO lf_info; + struct lfm_entry *next; +}; + +struct toku_logfilemgr { + struct lfm_entry *first; + struct lfm_entry *last; + int n_entries; +}; + +int toku_logfilemgr_create(TOKULOGFILEMGR *lfm) { + // malloc a logfilemgr + TOKULOGFILEMGR XMALLOC(mgr); + mgr->first = NULL; + mgr->last = NULL; + mgr->n_entries = 0; + *lfm = mgr; + return 0; +} + +int toku_logfilemgr_destroy(TOKULOGFILEMGR *lfm) { + int r=0; + if ( *lfm != NULL ) { // be tolerant of being passed a NULL + TOKULOGFILEMGR mgr = *lfm; + while ( mgr->n_entries > 0 ) { + toku_logfilemgr_delete_oldest_logfile_info(mgr); + } + toku_free(*lfm); + *lfm = NULL; + } + return r; +} + +int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir, TXNID *last_xid_if_clean_shutdown) { + invariant_notnull(lfm); + invariant_notnull(last_xid_if_clean_shutdown); + + int r; + int n_logfiles; + char **logfiles; + r = toku_logger_find_logfiles(log_dir, &logfiles, &n_logfiles); + if (r!=0) + return r; + + TOKULOGCURSOR cursor; + struct log_entry *entry; + TOKULOGFILEINFO lf_info; + long long index = -1; + char *basename; + LSN tmp_lsn = {0}; + TXNID last_xid = TXNID_NONE; + for(int i=0;i=TOKU_LOG_MIN_SUPPORTED_VERSION); + assert(version<=TOKU_LOG_VERSION); + lf_info->index = index; + lf_info->version = version; + // find last LSN in logfile + r = toku_logcursor_create_for_file(&cursor, log_dir, basename); + if (r!=0) { + return r; + } + r = toku_logcursor_last(cursor, &entry); // set "entry" to last log entry in logfile + if (r == 0) { + lf_info->maxlsn = toku_log_entry_get_lsn(entry); + + invariant(lf_info->maxlsn.lsn >= tmp_lsn.lsn); + tmp_lsn = lf_info->maxlsn; + if (entry->cmd == LT_shutdown) { + last_xid = entry->u.shutdown.last_xid; + } else { + last_xid = TXNID_NONE; + } + } + else { + lf_info->maxlsn = tmp_lsn; // handle empty logfile (no LSN in file) case + } + + // add to logfilemgr + toku_logfilemgr_add_logfile_info(lfm, lf_info); + toku_logcursor_destroy(&cursor); + } + toku_logger_free_logfiles(logfiles, n_logfiles); + *last_xid_if_clean_shutdown = last_xid; + return 0; +} + +int toku_logfilemgr_num_logfiles(TOKULOGFILEMGR lfm) { + assert(lfm); + return lfm->n_entries; +} + +int toku_logfilemgr_add_logfile_info(TOKULOGFILEMGR lfm, TOKULOGFILEINFO lf_info) { + assert(lfm); + struct lfm_entry *XMALLOC(entry); + entry->lf_info = lf_info; + entry->next = NULL; + if ( lfm->n_entries != 0 ) + lfm->last->next = entry; + lfm->last = entry; + lfm->n_entries++; + if (lfm->n_entries == 1 ) { + lfm->first = lfm->last; + } + return 0; +} + +TOKULOGFILEINFO toku_logfilemgr_get_oldest_logfile_info(TOKULOGFILEMGR lfm) { + assert(lfm); + return lfm->first->lf_info; +} + +void toku_logfilemgr_delete_oldest_logfile_info(TOKULOGFILEMGR lfm) { + assert(lfm); + if ( lfm->n_entries > 0 ) { + struct lfm_entry *entry = lfm->first; + toku_free(entry->lf_info); + lfm->first = entry->next; + toku_free(entry); + lfm->n_entries--; + if ( lfm->n_entries == 0 ) { + lfm->last = lfm->first = NULL; + } + } +} + +LSN toku_logfilemgr_get_last_lsn(TOKULOGFILEMGR lfm) { + assert(lfm); + if ( lfm->n_entries == 0 ) { + LSN lsn; + lsn.lsn = 0; + return lsn; + } + return lfm->last->lf_info->maxlsn; +} + +void toku_logfilemgr_update_last_lsn(TOKULOGFILEMGR lfm, LSN lsn) { + assert(lfm); + assert(lfm->last!=NULL); + lfm->last->lf_info->maxlsn = lsn; +} + +void toku_logfilemgr_print(TOKULOGFILEMGR lfm) { + assert(lfm); + printf("toku_logfilemgr_print [%p] : %d entries \n", lfm, lfm->n_entries); + struct lfm_entry *entry = lfm->first; + for (int i=0;in_entries;i++) { + printf(" entry %d : index = %" PRId64 ", maxlsn = %" PRIu64 "\n", i, entry->lf_info->index, entry->lf_info->maxlsn.lsn); + entry = entry->next; + } +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logfilemgr.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logfilemgr.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logfilemgr.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logfilemgr.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,119 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +// this is the basic information we need to keep per logfile +struct toku_logfile_info { + int64_t index; + LSN maxlsn; + uint32_t version; +}; +typedef struct toku_logfile_info *TOKULOGFILEINFO; + +struct toku_logfilemgr; +typedef struct toku_logfilemgr *TOKULOGFILEMGR; + +int toku_logfilemgr_create(TOKULOGFILEMGR *lfm); +int toku_logfilemgr_destroy(TOKULOGFILEMGR *lfm); + +int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir, TXNID *last_xid_if_clean_shutdown); +int toku_logfilemgr_num_logfiles(TOKULOGFILEMGR lfm); +int toku_logfilemgr_add_logfile_info(TOKULOGFILEMGR lfm, TOKULOGFILEINFO lf_info); +TOKULOGFILEINFO toku_logfilemgr_get_oldest_logfile_info(TOKULOGFILEMGR lfm); +void toku_logfilemgr_delete_oldest_logfile_info(TOKULOGFILEMGR lfm); +LSN toku_logfilemgr_get_last_lsn(TOKULOGFILEMGR lfm); +void toku_logfilemgr_update_last_lsn(TOKULOGFILEMGR lfm, LSN lsn); + +void toku_logfilemgr_print(TOKULOGFILEMGR lfm); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logformat.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logformat.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logformat.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logformat.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,880 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +/* This file defines the logformat in an executable fashion. + * This code is used to generate + * The code that writes into the log. + * The code that reads the log and prints it to stdout (the log_print utility) + * The code that reads the log for recovery. + * The struct definitions. + * The Latex documentation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +typedef struct field { + const char *type; + const char *name; + const char *format; // optional format string +} F; + +#define NULLFIELD {0,0,0} +#define FA (F[]) + +enum log_begin_action { + IGNORE_LOG_BEGIN, + SHOULD_LOG_BEGIN, + ASSERT_BEGIN_WAS_LOGGED, + LOG_BEGIN_ACTION_NA = IGNORE_LOG_BEGIN +}; + +struct logtype { + const char *name; + unsigned int command_and_flags; + struct field *fields; + enum log_begin_action log_begin_action; +}; + +// In the fields, don't mention the command, the LSN, the CRC or the trailing LEN. + +const struct logtype rollbacks[] = { + //TODO: #2037 Add dname + {"fdelete", 'U', FA{{"FILENUM", "filenum", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + //TODO: #2037 Add dname + {"fcreate", 'F', FA{{"FILENUM", "filenum", 0}, + {"BYTESTRING", "iname", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + // cmdinsert is used to insert a key-value pair into a DB. For rollback we don't need the data. + {"cmdinsert", 'i', FA{ + {"FILENUM", "filenum", 0}, + {"BYTESTRING", "key", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + {"cmddelete", 'd', FA{ + {"FILENUM", "filenum", 0}, + {"BYTESTRING", "key", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + {"rollinclude", 'r', FA{{"TXNID_PAIR", "xid", 0}, + {"uint64_t", "num_nodes", 0}, + {"BLOCKNUM", "spilled_head", 0}, + {"BLOCKNUM", "spilled_tail", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + {"load", 'l', FA{{"FILENUM", "old_filenum", 0}, + {"BYTESTRING", "new_iname", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + // #2954 + {"hot_index", 'h', FA{{"FILENUMS", "hot_index_filenums", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + {"dictionary_redirect", 'R', FA{{"FILENUM", "old_filenum", 0}, + {"FILENUM", "new_filenum", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + {"cmdupdate", 'u', FA{{"FILENUM", "filenum", 0}, + {"BYTESTRING", "key", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + {"cmdupdatebroadcast", 'B', FA{{"FILENUM", "filenum", 0}, + {"bool", "is_resetting_op", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + {"change_fdescriptor", 'D', FA{{"FILENUM", "filenum", 0}, + {"BYTESTRING", "old_descriptor", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, + {0,0,FA{NULLFIELD}, LOG_BEGIN_ACTION_NA} +}; + +const struct logtype logtypes[] = { + // Records produced by checkpoints +#if 0 // no longer used, but reserve the type + {"local_txn_checkpoint", 'c', FA{{"TXNID", "xid", 0}, NULLFIELD}}, +#endif + {"begin_checkpoint", 'x', FA{{"uint64_t", "timestamp", 0}, {"TXNID", "last_xid", 0}, NULLFIELD}, IGNORE_LOG_BEGIN}, + {"end_checkpoint", 'X', FA{{"LSN", "lsn_begin_checkpoint", 0}, + {"uint64_t", "timestamp", 0}, + {"uint32_t", "num_fassociate_entries", 0}, // how many files were checkpointed + {"uint32_t", "num_xstillopen_entries", 0}, // how many txns were checkpointed + NULLFIELD}, IGNORE_LOG_BEGIN}, + //TODO: #2037 Add dname + {"fassociate", 'f', FA{{"FILENUM", "filenum", 0}, + {"uint32_t", "treeflags", 0}, + {"BYTESTRING", "iname", 0}, // pathname of file + {"uint8_t", "unlink_on_close", 0}, + NULLFIELD}, IGNORE_LOG_BEGIN}, + //We do not use a txninfo struct since recovery log has + //FILENUMS and TOKUTXN has FTs (for open_fts) + {"xstillopen", 's', FA{{"TXNID_PAIR", "xid", 0}, + {"TXNID_PAIR", "parentxid", 0}, + {"uint64_t", "rollentry_raw_count", 0}, + {"FILENUMS", "open_filenums", 0}, + {"uint8_t", "force_fsync_on_commit", 0}, + {"uint64_t", "num_rollback_nodes", 0}, + {"uint64_t", "num_rollentries", 0}, + {"BLOCKNUM", "spilled_rollback_head", 0}, + {"BLOCKNUM", "spilled_rollback_tail", 0}, + {"BLOCKNUM", "current_rollback", 0}, + NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, // record all transactions + // prepared txns need a gid + {"xstillopenprepared", 'p', FA{{"TXNID_PAIR", "xid", 0}, + {"XIDP", "xa_xid", 0}, // prepared transactions need a gid, and have no parentxid. + {"uint64_t", "rollentry_raw_count", 0}, + {"FILENUMS", "open_filenums", 0}, + {"uint8_t", "force_fsync_on_commit", 0}, + {"uint64_t", "num_rollback_nodes", 0}, + {"uint64_t", "num_rollentries", 0}, + {"BLOCKNUM", "spilled_rollback_head", 0}, + {"BLOCKNUM", "spilled_rollback_tail", 0}, + {"BLOCKNUM", "current_rollback", 0}, + NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, // record all transactions + // Records produced by transactions + {"xbegin", 'b', FA{{"TXNID_PAIR", "xid", 0},{"TXNID_PAIR", "parentxid", 0},NULLFIELD}, IGNORE_LOG_BEGIN}, + {"xcommit",'C', FA{{"TXNID_PAIR", "xid", 0},NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, + {"xprepare",'P', FA{{"TXNID_PAIR", "xid", 0}, {"XIDP", "xa_xid", 0}, NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, + {"xabort", 'q', FA{{"TXNID_PAIR", "xid", 0},NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, + //TODO: #2037 Add dname + {"fcreate", 'F', FA{{"TXNID_PAIR", "xid", 0}, + {"FILENUM", "filenum", 0}, + {"BYTESTRING", "iname", 0}, + {"uint32_t", "mode", "0%o"}, + {"uint32_t", "treeflags", 0}, + {"uint32_t", "nodesize", 0}, + {"uint32_t", "basementnodesize", 0}, + {"uint32_t", "compression_method", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + //TODO: #2037 Add dname + {"fopen", 'O', FA{{"BYTESTRING", "iname", 0}, + {"FILENUM", "filenum", 0}, + {"uint32_t", "treeflags", 0}, + NULLFIELD}, IGNORE_LOG_BEGIN}, + //TODO: #2037 Add dname + {"fclose", 'e', FA{{"BYTESTRING", "iname", 0}, + {"FILENUM", "filenum", 0}, + NULLFIELD}, IGNORE_LOG_BEGIN}, + //TODO: #2037 Add dname + {"fdelete", 'U', FA{{"TXNID_PAIR", "xid", 0}, + {"FILENUM", "filenum", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + {"enq_insert", 'I', FA{{"FILENUM", "filenum", 0}, + {"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "key", 0}, + {"BYTESTRING", "value", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + {"enq_insert_no_overwrite", 'i', FA{{"FILENUM", "filenum", 0}, + {"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "key", 0}, + {"BYTESTRING", "value", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + {"enq_delete_any", 'E', FA{{"FILENUM", "filenum", 0}, + {"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "key", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + {"enq_insert_multiple", 'm', FA{{"FILENUM", "src_filenum", 0}, + {"FILENUMS", "dest_filenums", 0}, + {"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "src_key", 0}, + {"BYTESTRING", "src_val", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + {"enq_delete_multiple", 'M', FA{{"FILENUM", "src_filenum", 0}, + {"FILENUMS", "dest_filenums", 0}, + {"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "src_key", 0}, + {"BYTESTRING", "src_val", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + {"comment", 'T', FA{{"uint64_t", "timestamp", 0}, + {"BYTESTRING", "comment", 0}, + NULLFIELD}, IGNORE_LOG_BEGIN}, + // Note: shutdown_up_to_19 log entry is NOT ALLOWED TO BE CHANGED. + // Do not change the letter ('Q'), do not add fields, + // do not remove fields. + // TODO: Kill this logentry entirely once we no longer support version 19. + {"shutdown_up_to_19", 'Q', FA{{"uint64_t", "timestamp", 0}, + NULLFIELD}, IGNORE_LOG_BEGIN}, + // Note: Shutdown log entry is NOT ALLOWED TO BE CHANGED. + // Do not change the letter ('0'), do not add fields, + // do not remove fields. + // You CAN leave this alone and add a new one, but then you have + // to deal with the upgrade mechanism again. + // This is how we detect clean shutdowns from OLDER VERSIONS. + // This log entry must always be readable for future versions. + // If you DO change it, you need to write a separate log upgrade mechanism. + {"shutdown", '0', FA{{"uint64_t", "timestamp", 0}, + {"TXNID", "last_xid", 0}, + NULLFIELD}, IGNORE_LOG_BEGIN}, + {"load", 'l', FA{{"TXNID_PAIR", "xid", 0}, + {"FILENUM", "old_filenum", 0}, + {"BYTESTRING", "new_iname", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + // #2954 + {"hot_index", 'h', FA{{"TXNID_PAIR", "xid", 0}, + {"FILENUMS", "hot_index_filenums", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + {"enq_update", 'u', FA{{"FILENUM", "filenum", 0}, + {"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "key", 0}, + {"BYTESTRING", "extra", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + {"enq_updatebroadcast", 'B', FA{{"FILENUM", "filenum", 0}, + {"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "extra", 0}, + {"bool", "is_resetting_op", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + {"change_fdescriptor", 'D', FA{{"FILENUM", "filenum", 0}, + {"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "old_descriptor", 0}, + {"BYTESTRING", "new_descriptor", 0}, + {"bool", "update_cmp_descriptor", 0}, + NULLFIELD}, SHOULD_LOG_BEGIN}, + {0,0,FA{NULLFIELD}, (enum log_begin_action) 0} +}; + + +#define DO_STRUCTS(lt, array, body) do { \ + const struct logtype *lt; \ + for (lt=&array[0]; lt->name; lt++) { \ + body; \ + } } while (0) + +#define DO_ROLLBACKS(lt, body) DO_STRUCTS(lt, rollbacks, body) + +#define DO_LOGTYPES(lt, body) DO_STRUCTS(lt, logtypes, body) + +#define DO_LOGTYPES_AND_ROLLBACKS(lt, body) (DO_ROLLBACKS(lt,body), DO_LOGTYPES(lt, body)) + +#define DO_FIELDS(fld, lt, body) do { \ + struct field *fld; \ + for (fld=lt->fields; fld->type; fld++) { \ + body; \ + } } while (0) + + +static void __attribute__((format (printf, 3, 4))) fprintf2 (FILE *f1, FILE *f2, const char *format, ...) { + va_list ap; + int r; + va_start(ap, format); + r=vfprintf(f1, format, ap); assert(r>=0); + va_end(ap); + va_start(ap, format); + r=vfprintf(f2, format, ap); assert(r>=0); + va_end(ap); +} + +FILE *hf=0, *cf=0, *pf=0; + +static void +generate_enum_internal (const char *enum_name, const char *enum_prefix, const struct logtype *lts) { + char used_cmds[256]; + int count=0; + memset(used_cmds, 0, 256); + fprintf(hf, "enum %s {", enum_name); + DO_STRUCTS(lt, lts, + { + unsigned char cmd = (unsigned char)(lt->command_and_flags&0xff); + if (count!=0) fprintf(hf, ","); + count++; + fprintf(hf, "\n"); + fprintf(hf," %s_%-16s = '%c'", enum_prefix, lt->name, cmd); + if (used_cmds[cmd]!=0) { fprintf(stderr, "%s:%d: error: Command %d (%c) was used twice (second time for %s)\n", __FILE__, __LINE__, cmd, cmd, lt->name); abort(); } + used_cmds[cmd]=1; + }); + fprintf(hf, "\n};\n\n"); + +} + +static void +generate_enum (void) { + generate_enum_internal("lt_cmd", "LT", logtypes); + generate_enum_internal("rt_cmd", "RT", rollbacks); +} + +static void +generate_log_struct (void) { + DO_LOGTYPES(lt, + { fprintf(hf, "struct logtype_%s {\n", lt->name); + fprintf(hf, " %-16s lsn;\n", "LSN"); + DO_FIELDS(field_type, lt, + fprintf(hf, " %-16s %s;\n", field_type->type, field_type->name)); + fprintf(hf, " %-16s crc;\n", "uint32_t"); + fprintf(hf, " %-16s len;\n", "uint32_t"); + fprintf(hf, "};\n"); + //fprintf(hf, "void toku_recover_%s (LSN lsn", lt->name); + //DO_FIELDS(field_type, lt, fprintf(hf, ", %s %s", field_type->type, field_type->name)); + //fprintf(hf, ");\n"); + }); + DO_ROLLBACKS(lt, + { fprintf(hf, "struct rolltype_%s {\n", lt->name); + DO_FIELDS(field_type, lt, + fprintf(hf, " %-16s %s;\n", field_type->type, field_type->name)); + fprintf(hf, "};\n"); + fprintf(hf, "int toku_rollback_%s (", lt->name); + DO_FIELDS(field_type, lt, fprintf(hf, "%s %s,", field_type->type, field_type->name)); + fprintf(hf, "TOKUTXN txn, LSN oplsn);\n"); + fprintf(hf, "int toku_commit_%s (", lt->name); + DO_FIELDS(field_type, lt, fprintf(hf, "%s %s,", field_type->type, field_type->name)); + fprintf(hf, "TOKUTXN txn, LSN oplsn);\n"); + }); + fprintf(hf, "struct log_entry {\n"); + fprintf(hf, " enum lt_cmd cmd;\n"); + fprintf(hf, " union {\n"); + DO_LOGTYPES(lt, fprintf(hf," struct logtype_%s %s;\n", lt->name, lt->name)); + fprintf(hf, " } u;\n"); + fprintf(hf, "};\n"); + + fprintf(hf, "struct roll_entry {\n"); + fprintf(hf, " enum rt_cmd cmd;\n"); + fprintf(hf, " struct roll_entry *prev; /* for in-memory list of log entries. Threads from newest to oldest. */\n"); + fprintf(hf, " union {\n"); + DO_ROLLBACKS(lt, fprintf(hf," struct rolltype_%s %s;\n", lt->name, lt->name)); + fprintf(hf, " } u;\n"); + fprintf(hf, "};\n"); + +} + +static void +generate_dispatch (void) { + fprintf(hf, "#define rolltype_dispatch(s, funprefix) ({ switch((s)->cmd) {\\\n"); + DO_ROLLBACKS(lt, fprintf(hf, " case RT_%s: funprefix ## %s (&(s)->u.%s); break;\\\n", lt->name, lt->name, lt->name)); + fprintf(hf, " }})\n"); + + fprintf(hf, "#define logtype_dispatch_assign(s, funprefix, var, ...) do { switch((s)->cmd) {\\\n"); + DO_LOGTYPES(lt, fprintf(hf, " case LT_%s: var = funprefix ## %s (&(s)->u.%s, __VA_ARGS__); break;\\\n", lt->name, lt->name, lt->name)); + fprintf(hf, " }} while (0)\n"); + + fprintf(hf, "#define rolltype_dispatch_assign(s, funprefix, var, ...) do { \\\n"); + fprintf(hf, " switch((s)->cmd) {\\\n"); + DO_ROLLBACKS(lt, { + fprintf(hf, " case RT_%s: var = funprefix ## %s (", lt->name, lt->name); + int fieldcount=0; + DO_FIELDS(field_type, lt, { + if (fieldcount>0) fprintf(hf, ","); + fprintf(hf, "(s)->u.%s.%s", lt->name, field_type->name); + fieldcount++; + }); + fprintf(hf, ", __VA_ARGS__); break;\\\n"); + }); + fprintf(hf, " default: assert(0);} } while (0)\n"); + + fprintf(hf, "#define logtype_dispatch_args(s, funprefix, ...) do { switch((s)->cmd) {\\\n"); + DO_LOGTYPES(lt, + { + fprintf(hf, " case LT_%s: funprefix ## %s ((s)->u.%s.lsn", lt->name, lt->name, lt->name); + DO_FIELDS(field_type, lt, fprintf(hf, ",(s)->u.%s.%s", lt->name, field_type->name)); + fprintf(hf, ", __VA_ARGS__); break;\\\n"); + }); + fprintf(hf, " }} while (0)\n"); +} + +static void +generate_get_timestamp(void) { + fprintf(cf, "static uint64_t toku_get_timestamp(void) {\n"); + fprintf(cf, " struct timeval tv; int r = gettimeofday(&tv, NULL);\n"); + fprintf(cf, " assert(r==0);\n"); + fprintf(cf, " return tv.tv_sec * 1000000ULL + tv.tv_usec;\n"); + fprintf(cf, "}\n"); +} + +static void +generate_log_writer (void) { + generate_get_timestamp(); + DO_LOGTYPES(lt, { + //TODO(yoni): The overhead variables are NOT correct for BYTESTRING, FILENUMS (or any other variable length type) + // We should switch to something like using toku_logsizeof_*. + fprintf(hf, "static const size_t toku_log_%s_overhead = (+4+1+8", lt->name); + DO_FIELDS(field_type, lt, fprintf(hf, "+sizeof(%s)", field_type->type)); + fprintf(hf, "+8);\n"); + fprintf2(cf, hf, "void toku_log_%s (TOKULOGGER logger, LSN *lsnp, int do_fsync", lt->name); + switch (lt->log_begin_action) { + case SHOULD_LOG_BEGIN: + case ASSERT_BEGIN_WAS_LOGGED: { + fprintf2(cf, hf, ", TOKUTXN txn"); + break; + } + case IGNORE_LOG_BEGIN: break; + } + DO_FIELDS(field_type, lt, fprintf2(cf, hf, ", %s %s", field_type->type, field_type->name)); + fprintf(hf, ");\n"); + fprintf(cf, ") {\n"); + fprintf(cf, " if (logger == NULL) {\n"); + fprintf(cf, " return;\n"); + fprintf(cf, " }\n"); + switch (lt->log_begin_action) { + case SHOULD_LOG_BEGIN: { + fprintf(cf, " //txn can be NULL during tests\n"); + fprintf(cf, " //never null when not checkpoint.\n"); + fprintf(cf, " if (txn && !txn->begin_was_logged) {\n"); + fprintf(cf, " invariant(!txn_declared_read_only(txn));\n"); + fprintf(cf, " toku_maybe_log_begin_txn_for_write_operation(txn);\n"); + fprintf(cf, " }\n"); + break; + } + case ASSERT_BEGIN_WAS_LOGGED: { + fprintf(cf, " //txn can be NULL during tests\n"); + fprintf(cf, " invariant(!txn || txn->begin_was_logged);\n"); + fprintf(cf, " invariant(!txn || !txn_declared_read_only(txn));\n"); + break; + } + case IGNORE_LOG_BEGIN: break; + } + fprintf(cf, " if (!logger->write_log_files) {\n"); + fprintf(cf, " ml_lock(&logger->input_lock);\n"); + fprintf(cf, " logger->lsn.lsn++;\n"); + fprintf(cf, " if (lsnp) *lsnp=logger->lsn;\n"); + fprintf(cf, " ml_unlock(&logger->input_lock);\n"); + fprintf(cf, " return;\n"); + fprintf(cf, " }\n"); + fprintf(cf, " const unsigned int buflen= (+4 // len at the beginning\n"); + fprintf(cf, " +1 // log command\n"); + fprintf(cf, " +8 // lsn\n"); + DO_FIELDS(field_type, lt, + fprintf(cf, " +toku_logsizeof_%s(%s)\n", field_type->type, field_type->name)); + fprintf(cf, " +8 // crc + len\n"); + fprintf(cf, " );\n"); + fprintf(cf, " struct wbuf wbuf;\n"); + fprintf(cf, " ml_lock(&logger->input_lock);\n"); + fprintf(cf, " toku_logger_make_space_in_inbuf(logger, buflen);\n"); + fprintf(cf, " wbuf_nocrc_init(&wbuf, logger->inbuf.buf+logger->inbuf.n_in_buf, buflen);\n"); + fprintf(cf, " wbuf_nocrc_int(&wbuf, buflen);\n"); + fprintf(cf, " wbuf_nocrc_char(&wbuf, '%c');\n", (char)(0xff<->command_and_flags)); + fprintf(cf, " logger->lsn.lsn++;\n"); + fprintf(cf, " logger->inbuf.max_lsn_in_buf = logger->lsn;\n"); + fprintf(cf, " wbuf_nocrc_LSN(&wbuf, logger->lsn);\n"); + fprintf(cf, " if (lsnp) *lsnp=logger->lsn;\n"); + DO_FIELDS(field_type, lt, + if (strcmp(field_type->name, "timestamp") == 0) + fprintf(cf, " if (timestamp == 0) timestamp = toku_get_timestamp();\n"); + fprintf(cf, " wbuf_nocrc_%s(&wbuf, %s);\n", field_type->type, field_type->name)); + fprintf(cf, " wbuf_nocrc_int(&wbuf, toku_x1764_memory(wbuf.buf, wbuf.ndone));\n"); + fprintf(cf, " wbuf_nocrc_int(&wbuf, buflen);\n"); + fprintf(cf, " assert(wbuf.ndone==buflen);\n"); + fprintf(cf, " logger->inbuf.n_in_buf += buflen;\n"); + fprintf(cf, " toku_logger_maybe_fsync(logger, logger->lsn, do_fsync, true);\n"); + fprintf(cf, "}\n\n"); + }); +} + +static void +generate_log_reader (void) { + DO_LOGTYPES(lt, { + fprintf(cf, "static int toku_log_fread_%s (FILE *infile, uint32_t len1, struct logtype_%s *data, struct x1764 *checksum)", lt->name, lt->name); + fprintf(cf, " {\n"); + fprintf(cf, " int r=0;\n"); + fprintf(cf, " uint32_t actual_len=5; // 1 for the command, 4 for the first len.\n"); + fprintf(cf, " r=toku_fread_%-16s(infile, &data->%-16s, checksum, &actual_len); if (r!=0) return r;\n", "LSN", "lsn"); + DO_FIELDS(field_type, lt, + fprintf(cf, " r=toku_fread_%-16s(infile, &data->%-16s, checksum, &actual_len); if (r!=0) return r;\n", field_type->type, field_type->name)); + fprintf(cf, " uint32_t checksum_in_file, len_in_file;\n"); + fprintf(cf, " r=toku_fread_uint32_t_nocrclen(infile, &checksum_in_file); actual_len+=4; if (r!=0) return r;\n"); + fprintf(cf, " r=toku_fread_uint32_t_nocrclen(infile, &len_in_file); actual_len+=4; if (r!=0) return r;\n"); + fprintf(cf, " if (checksum_in_file!=toku_x1764_finish(checksum) || len_in_file!=actual_len || len1 != len_in_file) return DB_BADFORMAT;\n"); + fprintf(cf, " return 0;\n"); + fprintf(cf, "}\n\n"); + }); + fprintf2(cf, hf, "int toku_log_fread (FILE *infile, struct log_entry *le)"); + fprintf(hf, ";\n"); + fprintf(cf, " {\n"); + fprintf(cf, " uint32_t len1; int r;\n"); + fprintf(cf, " uint32_t ignorelen=0;\n"); + fprintf(cf, " struct x1764 checksum;\n"); + fprintf(cf, " toku_x1764_init(&checksum);\n"); + fprintf(cf, " r = toku_fread_uint32_t(infile, &len1, &checksum, &ignorelen); if (r!=0) return r;\n"); + fprintf(cf, " int cmd=fgetc(infile);\n"); + fprintf(cf, " if (cmd==EOF) return EOF;\n"); + fprintf(cf, " char cmdchar = (char)cmd;\n"); + fprintf(cf, " toku_x1764_add(&checksum, &cmdchar, 1);\n"); + fprintf(cf, " le->cmd=(enum lt_cmd)cmd;\n"); + fprintf(cf, " switch ((enum lt_cmd)cmd) {\n"); + DO_LOGTYPES(lt, { + fprintf(cf, " case LT_%s:\n", lt->name); + fprintf(cf, " return toku_log_fread_%s (infile, len1, &le->u.%s, &checksum);\n", lt->name, lt->name); + }); + fprintf(cf, " };\n"); + fprintf(cf, " return DB_BADFORMAT;\n"); // Should read past the record using the len field. + fprintf(cf, "}\n\n"); + //fprintf2(cf, hf, "// Return 0 if there is something to read, return -1 if nothing to read, abort if an error.\n"); + fprintf2(cf, hf, "// Return 0 if there is something to read, -1 if nothing to read, >0 on error\n"); + fprintf2(cf, hf, "int toku_log_fread_backward (FILE *infile, struct log_entry *le)"); + fprintf(hf, ";\n"); + fprintf(cf, "{\n"); + fprintf(cf, " memset(le, 0, sizeof(*le));\n"); + fprintf(cf, " long pos = ftell(infile);\n"); + fprintf(cf, " if (pos<=12) return -1;\n"); + fprintf(cf, " int r = fseek(infile, -4, SEEK_CUR); \n");// assert(r==0);\n"); + fprintf(cf, " if (r!=0) return get_error_errno();\n"); + fprintf(cf, " uint32_t len;\n"); + fprintf(cf, " r = toku_fread_uint32_t_nocrclen(infile, &len); \n");// assert(r==0);\n"); + fprintf(cf, " if (r!=0) return 1;\n"); + fprintf(cf, " r = fseek(infile, -(int)len, SEEK_CUR) ; \n");// assert(r==0);\n"); + fprintf(cf, " if (r!=0) return get_error_errno();\n"); + fprintf(cf, " r = toku_log_fread(infile, le); \n");// assert(r==0);\n"); + fprintf(cf, " if (r!=0) return 1;\n"); + fprintf(cf, " long afterpos = ftell(infile);\n"); + fprintf(cf, " if (afterpos != pos) return 1;\n"); + fprintf(cf, " r = fseek(infile, -(int)len, SEEK_CUR); \n");// assert(r==0);\n"); + fprintf(cf, " if (r!=0) return get_error_errno();\n"); + fprintf(cf, " return 0;\n"); + fprintf(cf, "}\n\n"); + + DO_LOGTYPES(lt, ({ + fprintf(cf, "static void toku_log_free_log_entry_%s_resources (struct logtype_%s *data", lt->name, lt->name); + if (!lt->fields->type) fprintf(cf, " __attribute__((__unused__))"); + fprintf(cf, ") {\n"); + DO_FIELDS(field_type, lt, + fprintf(cf, " toku_free_%s(data->%s);\n", field_type->type, field_type->name); + ); + fprintf(cf, "}\n\n"); + })); + fprintf2(cf, hf, "void toku_log_free_log_entry_resources (struct log_entry *le)"); + fprintf(hf, ";\n"); + fprintf(cf, " {\n"); + fprintf(cf, " switch ((enum lt_cmd)le->cmd) {\n"); + DO_LOGTYPES(lt, { + fprintf(cf, " case LT_%s:\n", lt->name); + fprintf(cf, " return toku_log_free_log_entry_%s_resources (&(le->u.%s));\n", lt->name, lt->name); + }); + fprintf(cf, " };\n"); + fprintf(cf, " return;\n"); + fprintf(cf, "}\n\n"); +} + +static void +generate_logprint (void) { + unsigned maxnamelen=0; + fprintf2(pf, hf, "int toku_logprint_one_record(FILE *outf, FILE *f)"); + fprintf(hf, ";\n"); + fprintf(pf, " {\n"); + fprintf(pf, " int cmd, r;\n"); + fprintf(pf, " uint32_t len1, crc_in_file;\n"); + fprintf(pf, " uint32_t ignorelen=0;\n"); + fprintf(pf, " struct x1764 checksum;\n"); + fprintf(pf, " toku_x1764_init(&checksum);\n"); + fprintf(pf, " r=toku_fread_uint32_t(f, &len1, &checksum, &ignorelen);\n"); + fprintf(pf, " if (r==EOF) return EOF;\n"); + fprintf(pf, " cmd=fgetc(f);\n"); + fprintf(pf, " if (cmd==EOF) return DB_BADFORMAT;\n"); + fprintf(pf, " uint32_t len_in_file, len=1+4; // cmd + len1\n"); + fprintf(pf, " char charcmd = (char)cmd;\n"); + fprintf(pf, " toku_x1764_add(&checksum, &charcmd, 1);\n"); + fprintf(pf, " switch ((enum lt_cmd)cmd) {\n"); + DO_LOGTYPES(lt, { if (strlen(lt->name)>maxnamelen) maxnamelen=strlen(lt->name); }); + DO_LOGTYPES(lt, { + unsigned char cmd = (unsigned char)(0xff<->command_and_flags); + fprintf(pf, " case LT_%s: \n", lt->name); + // We aren't using the log reader here because we want better diagnostics as soon as things go wrong. + fprintf(pf, " fprintf(outf, \"%%-%us \", \"%s\");\n", maxnamelen, lt->name); + if (isprint(cmd)) fprintf(pf," fprintf(outf, \" '%c':\");\n", cmd); + else fprintf(pf," fprintf(outf, \"0%03o:\");\n", cmd); + fprintf(pf, " r = toku_logprint_%-16s(outf, f, \"lsn\", &checksum, &len, 0); if (r!=0) return r;\n", "LSN"); + DO_FIELDS(field_type, lt, { + fprintf(pf, " r = toku_logprint_%-16s(outf, f, \"%s\", &checksum, &len,", field_type->type, field_type->name); + if (field_type->format) fprintf(pf, "\"%s\"", field_type->format); + else fprintf(pf, "0"); + fprintf(pf, "); if (r!=0) return r;\n"); + }); + fprintf(pf, " {\n"); + fprintf(pf, " uint32_t actual_murmur = toku_x1764_finish(&checksum);\n"); + fprintf(pf, " r = toku_fread_uint32_t_nocrclen (f, &crc_in_file); len+=4; if (r!=0) return r;\n"); + fprintf(pf, " fprintf(outf, \" crc=%%08x\", crc_in_file);\n"); + fprintf(pf, " if (crc_in_file!=actual_murmur) fprintf(outf, \" checksum=%%08x\", actual_murmur);\n"); + fprintf(pf, " r = toku_fread_uint32_t_nocrclen (f, &len_in_file); len+=4; if (r!=0) return r;\n"); + fprintf(pf, " fprintf(outf, \" len=%%u\", len_in_file);\n"); + fprintf(pf, " if (len_in_file!=len) fprintf(outf, \" actual_len=%%u\", len);\n"); + fprintf(pf, " if (len_in_file!=len || crc_in_file!=actual_murmur) return DB_BADFORMAT;\n"); + fprintf(pf, " };\n"); + fprintf(pf, " fprintf(outf, \"\\n\");\n"); + fprintf(pf, " return 0;\n\n"); + }); + fprintf(pf, " }\n"); + fprintf(pf, " fprintf(outf, \"Unknown command %%d ('%%c')\", cmd, cmd);\n"); + fprintf(pf, " return DB_BADFORMAT;\n"); + fprintf(pf, "}\n\n"); +} + +static void +generate_rollbacks (void) { + DO_ROLLBACKS(lt, { + fprintf2(cf, hf, "void toku_logger_save_rollback_%s (TOKUTXN txn", lt->name); + DO_FIELDS(field_type, lt, { + if ( strcmp(field_type->type, "BYTESTRING") == 0 ) { + fprintf2(cf, hf, ", BYTESTRING *%s_ptr", field_type->name); + } + else if ( strcmp(field_type->type, "FILENUMS") == 0 ) { + fprintf2(cf, hf, ", FILENUMS *%s_ptr", field_type->name); + } + else { + fprintf2(cf, hf, ", %s %s", field_type->type, field_type->name); + } + }); + + fprintf(hf, ");\n"); + fprintf(cf, ") {\n"); + fprintf(cf, " toku_txn_lock(txn);\n"); + fprintf(cf, " ROLLBACK_LOG_NODE log;\n"); + fprintf(cf, " toku_get_and_pin_rollback_log_for_new_entry(txn, &log);\n"); + // 'memdup' all BYTESTRINGS here + DO_FIELDS(field_type, lt, { + if ( strcmp(field_type->type, "BYTESTRING") == 0 ) { + fprintf(cf, " BYTESTRING %s = {\n" + " .len = %s_ptr->len,\n" + " .data = cast_to_typeof(%s.data) toku_memdup_in_rollback(log, %s_ptr->data, %s_ptr->len)\n" + " };\n", + field_type->name, field_type->name, field_type->name, field_type->name, field_type->name); + } + if ( strcmp(field_type->type, "FILENUMS") == 0 ) { + fprintf(cf, " FILENUMS %s = {\n" + " .num = %s_ptr->num,\n" + " .filenums = cast_to_typeof(%s.filenums) toku_memdup_in_rollback(log, %s_ptr->filenums, %s_ptr->num * (sizeof (FILENUM)))\n" + " };\n", + field_type->name, field_type->name, field_type->name, field_type->name, field_type->name); + } + }); + { + int count=0; + fprintf(cf, " uint32_t rollback_fsize = toku_logger_rollback_fsize_%s(", lt->name); + DO_FIELDS(field_type, lt, fprintf(cf, "%s%s", (count++>0)?", ":"", field_type->name)); + fprintf(cf, ");\n"); + } + fprintf(cf, " struct roll_entry *v;\n"); + fprintf(cf, " size_t mem_needed = sizeof(v->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name); + fprintf(cf, " CAST_FROM_VOIDP(v, toku_malloc_in_rollback(log, mem_needed));\n"); + fprintf(cf, " assert(v);\n"); + fprintf(cf, " v->cmd = (enum rt_cmd)%u;\n", lt->command_and_flags&0xff); + DO_FIELDS(field_type, lt, fprintf(cf, " v->u.%s.%s = %s;\n", lt->name, field_type->name, field_type->name)); + fprintf(cf, " v->prev = log->newest_logentry;\n"); + fprintf(cf, " if (log->oldest_logentry==NULL) log->oldest_logentry=v;\n"); + fprintf(cf, " log->newest_logentry = v;\n"); + fprintf(cf, " log->rollentry_resident_bytecount += rollback_fsize;\n"); + fprintf(cf, " txn->roll_info.rollentry_raw_count += rollback_fsize;\n"); + fprintf(cf, " txn->roll_info.num_rollentries++;\n"); + fprintf(cf, " log->dirty = true;\n"); + fprintf(cf, " // spill and unpin assert success internally\n"); + fprintf(cf, " toku_maybe_spill_rollbacks(txn, log);\n"); + fprintf(cf, " toku_rollback_log_unpin(txn, log);\n"); + fprintf(cf, " toku_txn_unlock(txn);\n"); + fprintf(cf, "}\n"); + }); + + DO_ROLLBACKS(lt, { + fprintf2(cf, hf, "void toku_logger_rollback_wbuf_nocrc_write_%s (struct wbuf *wbuf", lt->name); + DO_FIELDS(field_type, lt, fprintf2(cf, hf, ", %s %s", field_type->type, field_type->name)); + fprintf2(cf, hf, ")"); + fprintf(hf, ";\n"); + fprintf(cf, " {\n"); + + { + int count=0; + fprintf(cf, " uint32_t rollback_fsize = toku_logger_rollback_fsize_%s(", lt->name); + DO_FIELDS(field_type, lt, fprintf(cf, "%s%s", (count++>0)?", ":"", field_type->name)); + fprintf(cf, ");\n"); + fprintf(cf, " wbuf_nocrc_int(wbuf, rollback_fsize);\n"); + } + fprintf(cf, " wbuf_nocrc_char(wbuf, '%c');\n", (char)(0xff<->command_and_flags)); + DO_FIELDS(field_type, lt, fprintf(cf, " wbuf_nocrc_%s(wbuf, %s);\n", field_type->type, field_type->name)); + fprintf(cf, "}\n"); + }); + fprintf2(cf, hf, "void toku_logger_rollback_wbuf_nocrc_write (struct wbuf *wbuf, struct roll_entry *r)"); + fprintf(hf, ";\n"); + fprintf(cf, " {\n switch (r->cmd) {\n"); + DO_ROLLBACKS(lt, { + fprintf(cf, " case RT_%s: toku_logger_rollback_wbuf_nocrc_write_%s(wbuf", lt->name, lt->name); + DO_FIELDS(field_type, lt, fprintf(cf, ", r->u.%s.%s", lt->name, field_type->name)); + fprintf(cf, "); return;\n"); + }); + fprintf(cf, " }\n assert(0);\n"); + fprintf(cf, "}\n"); + DO_ROLLBACKS(lt, { + fprintf2(cf, hf, "uint32_t toku_logger_rollback_fsize_%s (", lt->name); + int count=0; + DO_FIELDS(field_type, lt, fprintf2(cf, hf, "%s%s %s", (count++>0)?", ":"", field_type->type, field_type->name)); + fprintf(hf, ");\n"); + fprintf(cf, ") {\n"); + fprintf(cf, " return 1 /* the cmd*/\n"); + fprintf(cf, " + 4 /* the int at the end saying the size */"); + DO_FIELDS(field_type, lt, + fprintf(cf, "\n + toku_logsizeof_%s(%s)", field_type->type, field_type->name)); + fprintf(cf, ";\n}\n"); + }); + fprintf2(cf, hf, "uint32_t toku_logger_rollback_fsize(struct roll_entry *item)"); + fprintf(hf, ";\n"); + fprintf(cf, "{\n switch(item->cmd) {\n"); + DO_ROLLBACKS(lt, { + fprintf(cf, " case RT_%s: return toku_logger_rollback_fsize_%s(", lt->name, lt->name); + int count=0; + DO_FIELDS(field_type, lt, fprintf(cf, "%sitem->u.%s.%s", (count++>0)?", ":"", lt->name, field_type->name)); + fprintf(cf, ");\n"); + }); + fprintf(cf, " }\n assert(0);\n return 0;\n"); + fprintf(cf, "}\n"); + + fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, memarena *ma)"); + fprintf(hf, ";\n"); + fprintf(cf, " {\n assert(n_bytes>0);\n struct roll_entry *item;\n enum rt_cmd cmd = (enum rt_cmd)(buf[0]);\n size_t mem_needed;\n"); + fprintf(cf, " struct rbuf rc = {buf, n_bytes, 1};\n"); + fprintf(cf, " switch(cmd) {\n"); + DO_ROLLBACKS(lt, { + fprintf(cf, " case RT_%s:\n", lt->name); + fprintf(cf, " mem_needed = sizeof(item->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name); + fprintf(cf, " CAST_FROM_VOIDP(item, ma->malloc_from_arena(mem_needed));\n"); + fprintf(cf, " item->cmd = cmd;\n"); + DO_FIELDS(field_type, lt, fprintf(cf, " rbuf_ma_%s(&rc, ma, &item->u.%s.%s);\n", field_type->type, lt->name, field_type->name)); + fprintf(cf, " *itemp = item;\n"); + fprintf(cf, " return 0;\n"); + }); + fprintf(cf, " }\n return EINVAL;\n}\n"); +} + +static void +generate_log_entry_functions(void) { + fprintf(hf, "LSN toku_log_entry_get_lsn(struct log_entry *);\n"); + fprintf(cf, "LSN toku_log_entry_get_lsn(struct log_entry *le) {\n"); + fprintf(cf, " return le->u.begin_checkpoint.lsn;\n"); + fprintf(cf, "}\n"); +} + +const char codefile[] = "log_code.cc"; +const char printfile[] = "log_print.cc"; +const char headerfile[] = "log_header.h"; +int main (int argc, const char *const argv[]) { + assert(argc==2); // the single argument is the directory into which to put things + const char *dir = argv[1]; + size_t codepathlen = sizeof(codefile) + strlen(dir) + 4; + size_t printpathlen = sizeof(printfile) + strlen(dir) + 4; + size_t headerpathlen = sizeof(headerfile) + strlen(dir) + 4; + char codepath[codepathlen]; + char printpath[printpathlen]; + char headerpath[headerpathlen]; + { int r = snprintf(codepath, codepathlen, "%s/%s", argv[1], codefile); assert(r<(int)codepathlen); } + { int r = snprintf(printpath, printpathlen, "%s/%s", argv[1], printfile); assert(r<(int)printpathlen); } + { int r = snprintf(headerpath, headerpathlen, "%s/%s", argv[1], headerfile); assert(r<(int)headerpathlen); } + chmod(codepath, S_IRUSR|S_IWUSR); + chmod(headerpath, S_IRUSR|S_IWUSR); + unlink(codepath); + unlink(headerpath); + cf = fopen(codepath, "w"); + if (cf==0) { int r = get_error_errno(); printf("fopen of %s failed because of errno=%d (%s)\n", codepath, r, strerror(r)); } // sometimes this is failing, so let's make a better diagnostic + assert(cf!=0); + hf = fopen(headerpath, "w"); assert(hf!=0); + pf = fopen(printpath, "w"); assert(pf!=0); + fprintf2(cf, hf, "/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */\n"); + fprintf2(cf, hf, "// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:\n"); + fprintf(hf, "#pragma once\n"); + fprintf2(cf, hf, "/* Do not edit this file. This code generated by logformat.c. Copyright (c) 2007-2013 Tokutek Inc. */\n"); + fprintf2(cf, hf, "#ident \"Copyright (c) 2007-2013 Tokutek Inc. All rights reserved.\"\n"); + fprintf2(cf, pf, "#include \n"); + fprintf2(cf, pf, "#include \n"); + fprintf2(cf, pf, "#include \n"); + fprintf2(cf, pf, "#include \n"); + fprintf(hf, "#include \n"); + fprintf(hf, "#include \n"); + fprintf(hf, "#include \n"); + generate_enum(); + generate_log_struct(); + generate_dispatch(); + generate_log_writer(); + generate_log_reader(); + generate_rollbacks(); + generate_log_entry_functions(); + generate_logprint(); + { + int r=fclose(hf); assert(r==0); + r=fclose(cf); assert(r==0); + r=fclose(pf); assert(r==0); + // Make it tougher to modify by mistake + chmod(codepath, S_IRUSR|S_IRGRP|S_IROTH); + chmod(headerpath, S_IRUSR|S_IRGRP|S_IROTH); + } + return 0; +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logger.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logger.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logger.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logger.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,1483 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include +#include +#include +#include + +#include "ft/serialize/block_table.h" +#include "ft/ft.h" +#include "ft/logger/log-internal.h" +#include "ft/txn/txn_manager.h" +#include "ft/txn/rollback_log_node_cache.h" + +#include "util/status.h" + +static const int log_format_version=TOKU_LOG_VERSION; + +static int open_logfile (TOKULOGGER logger); +static void logger_write_buffer (TOKULOGGER logger, LSN *fsynced_lsn); +static void delete_logfile(TOKULOGGER logger, long long index, uint32_t version); +static void grab_output(TOKULOGGER logger, LSN *fsynced_lsn); +static void release_output(TOKULOGGER logger, LSN fsynced_lsn); + +static void toku_print_bytes (FILE *outf, uint32_t len, char *data) { + fprintf(outf, "\""); + uint32_t i; + for (i=0; iis_open=false; + result->write_log_files = true; + result->trim_log_files = true; + result->directory=0; + // fd is uninitialized on purpose + // ct is uninitialized on purpose + result->lg_max = 100<<20; // 100MB default + // lsn is uninitialized + result->inbuf = (struct logbuf) {0, LOGGER_MIN_BUF_SIZE, (char *) toku_xmalloc(LOGGER_MIN_BUF_SIZE), ZERO_LSN}; + result->outbuf = (struct logbuf) {0, LOGGER_MIN_BUF_SIZE, (char *) toku_xmalloc(LOGGER_MIN_BUF_SIZE), ZERO_LSN}; + // written_lsn is uninitialized + // fsynced_lsn is uninitialized + result->last_completed_checkpoint_lsn = ZERO_LSN; + // next_log_file_number is uninitialized + // n_in_file is uninitialized + result->write_block_size = FT_DEFAULT_NODE_SIZE; // default logging size is the same as the default ft block size + toku_logfilemgr_create(&result->logfilemgr); + *resultp=result; + ml_init(&result->input_lock); + toku_mutex_init(&result->output_condition_lock, NULL); + toku_cond_init(&result->output_condition, NULL); + result->rollback_cachefile = NULL; + result->output_is_available = true; + toku_txn_manager_init(&result->txn_manager); + return 0; +} + +static void fsync_logdir(TOKULOGGER logger) { + toku_fsync_dirfd_without_accounting(logger->dir); +} + +static int open_logdir(TOKULOGGER logger, const char *directory) { + if (toku_os_is_absolute_name(directory)) { + logger->directory = toku_strdup(directory); + } else { + char cwdbuf[PATH_MAX]; + char *cwd = getcwd(cwdbuf, PATH_MAX); + if (cwd == NULL) + return -1; + char *MALLOC_N(strlen(cwd) + strlen(directory) + 2, new_log_dir); + if (new_log_dir == NULL) { + return -2; + } + sprintf(new_log_dir, "%s/%s", cwd, directory); + logger->directory = new_log_dir; + } + if (logger->directory==0) return get_error_errno(); + + logger->dir = opendir(logger->directory); + if ( logger->dir == NULL ) return -1; + return 0; +} + +static int close_logdir(TOKULOGGER logger) { + return closedir(logger->dir); +} + +int +toku_logger_open_with_last_xid(const char *directory, TOKULOGGER logger, TXNID last_xid) { + if (logger->is_open) return EINVAL; + + int r; + TXNID last_xid_if_clean_shutdown = TXNID_NONE; + r = toku_logfilemgr_init(logger->logfilemgr, directory, &last_xid_if_clean_shutdown); + if ( r!=0 ) + return r; + logger->lsn = toku_logfilemgr_get_last_lsn(logger->logfilemgr); + logger->written_lsn = logger->lsn; + logger->fsynced_lsn = logger->lsn; + logger->inbuf.max_lsn_in_buf = logger->lsn; + logger->outbuf.max_lsn_in_buf = logger->lsn; + + // open directory, save pointer for fsyncing t:2445 + r = open_logdir(logger, directory); + if (r!=0) return r; + + long long nexti; + r = toku_logger_find_next_unused_log_file(logger->directory, &nexti); + if (r!=0) return r; + + logger->next_log_file_number = nexti; + r = open_logfile(logger); + if (r!=0) return r; + if (last_xid == TXNID_NONE) { + last_xid = last_xid_if_clean_shutdown; + } + toku_txn_manager_set_last_xid_from_logger(logger->txn_manager, last_xid); + + logger->is_open = true; + return 0; +} + +int toku_logger_open (const char *directory, TOKULOGGER logger) { + return toku_logger_open_with_last_xid(directory, logger, TXNID_NONE); +} + +bool toku_logger_rollback_is_open (TOKULOGGER logger) { + return logger->rollback_cachefile != NULL; +} + +#define MAX_CACHED_ROLLBACK_NODES 4096 + +void toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft) { + ft->blocktable.free_unused_blocknums(ft->h->root_blocknum); + logger->rollback_cache.init(MAX_CACHED_ROLLBACK_NODES); +} + +int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create) { + assert(logger->is_open); + assert(!logger->rollback_cachefile); + + FT_HANDLE ft_handle = nullptr; // Note, there is no DB associated with this FT. + toku_ft_handle_create(&ft_handle); + int r = toku_ft_handle_open(ft_handle, toku_product_name_strings.rollback_cachefile, create, create, cachetable, nullptr); + if (r == 0) { + FT ft = ft_handle->ft; + logger->rollback_cachefile = ft->cf; + toku_logger_initialize_rollback_cache(logger, ft_handle->ft); + + // Verify it is empty + // Must have no data blocks (rollback logs or otherwise). + ft->blocktable.verify_no_data_blocks_except_root(ft->h->root_blocknum); + bool is_empty = toku_ft_is_empty_fast(ft_handle); + assert(is_empty); + } else { + toku_ft_handle_close(ft_handle); + } + return r; +} + + +// Requires: Rollback cachefile can only be closed immediately after a checkpoint, +// so it will always be clean (!h->dirty) when about to be closed. +// Rollback log can only be closed when there are no open transactions, +// so it will always be empty (no data blocks) when about to be closed. +void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdown) { + CACHEFILE cf = logger->rollback_cachefile; // stored in logger at rollback cachefile open + if (cf) { + FT_HANDLE ft_to_close; + { //Find "ft_to_close" + logger->rollback_cache.destroy(); + FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf)); + if (clean_shutdown) { + //Verify it is safe to close it. + assert(!ft->h->dirty); //Must not be dirty. + ft->blocktable.free_unused_blocknums(ft->h->root_blocknum); + // Must have no data blocks (rollback logs or otherwise). + ft->blocktable.verify_no_data_blocks_except_root(ft->h->root_blocknum); + assert(!ft->h->dirty); + } else { + ft->h->dirty = 0; + } + ft_to_close = toku_ft_get_only_existing_ft_handle(ft); + if (clean_shutdown) { + bool is_empty; + is_empty = toku_ft_is_empty_fast(ft_to_close); + assert(is_empty); + assert(!ft->h->dirty); // it should not have been dirtied by the toku_ft_is_empty test. + } + } + + toku_ft_handle_close(ft_to_close); + //Set as dealt with already. + logger->rollback_cachefile = NULL; + } +} + +void toku_logger_close_rollback(TOKULOGGER logger) { + toku_logger_close_rollback_check_empty(logger, true); +} + +// No locks held on entry +// No locks held on exit. +// No locks are needed, since you cannot legally close the log concurrently with doing anything else. +// TODO: can't fail +int toku_logger_close(TOKULOGGER *loggerp) { + int r; + TOKULOGGER logger = *loggerp; + if (!logger->is_open) { + goto is_closed; + } + ml_lock(&logger->input_lock); + LSN fsynced_lsn; + grab_output(logger, &fsynced_lsn); + logger_write_buffer(logger, &fsynced_lsn); + if (logger->fd!=-1) { + if ( logger->write_log_files ) { + toku_file_fsync_without_accounting(logger->fd); + } + r = close(logger->fd); + assert(r == 0); + } + r = close_logdir(logger); + assert(r == 0); + logger->fd=-1; + release_output(logger, fsynced_lsn); + +is_closed: + toku_free(logger->inbuf.buf); + toku_free(logger->outbuf.buf); + // before destroying locks they must be left in the unlocked state. + ml_destroy(&logger->input_lock); + toku_mutex_destroy(&logger->output_condition_lock); + toku_cond_destroy(&logger->output_condition); + toku_txn_manager_destroy(logger->txn_manager); + if (logger->directory) toku_free(logger->directory); + toku_logfilemgr_destroy(&logger->logfilemgr); + toku_free(logger); + *loggerp=0; + return 0; +} + +void toku_logger_shutdown(TOKULOGGER logger) { + if (logger->is_open) { + TXN_MANAGER mgr = logger->txn_manager; + if (toku_txn_manager_num_live_root_txns(mgr) == 0) { + TXNID last_xid = toku_txn_manager_get_last_xid(mgr); + toku_log_shutdown(logger, NULL, true, 0, last_xid); + } + } +} + +static int close_and_open_logfile (TOKULOGGER logger, LSN *fsynced_lsn) +// Effect: close the current file, and open the next one. +// Entry: This thread has permission to modify the output. +// Exit: This thread has permission to modify the output. +{ + int r; + if (logger->write_log_files) { + toku_file_fsync_without_accounting(logger->fd); + *fsynced_lsn = logger->written_lsn; + toku_logfilemgr_update_last_lsn(logger->logfilemgr, logger->written_lsn); // fixes t:2294 + } + r = close(logger->fd); if (r!=0) return get_error_errno(); + return open_logfile(logger); +} + +static int +max_int (int a, int b) +{ + if (a>b) return a; + return b; +} + +// *********************************************************** +// output mutex/condition manipulation routines +// *********************************************************** + +static void +wait_till_output_available (TOKULOGGER logger) +// Effect: Wait until output becomes available. +// Implementation hint: Use a pthread_cond_wait. +// Entry: Holds the output_condition_lock (but not the inlock) +// Exit: Holds the output_condition_lock and logger->output_is_available +// +{ + tokutime_t t0 = toku_time_now(); + while (!logger->output_is_available) { + toku_cond_wait(&logger->output_condition, &logger->output_condition_lock); + } + if (tokutime_to_seconds(toku_time_now() - t0) >= 0.100) { + logger->num_wait_buf_long++; + } +} + +static void +grab_output(TOKULOGGER logger, LSN *fsynced_lsn) +// Effect: Wait until output becomes available and get permission to modify output. +// Entry: Holds no lock (including not holding the input lock, since we never hold both at once). +// Exit: Hold permission to modify output (but none of the locks). +{ + toku_mutex_lock(&logger->output_condition_lock); + wait_till_output_available(logger); + logger->output_is_available = false; + if (fsynced_lsn) { + *fsynced_lsn = logger->fsynced_lsn; + } + toku_mutex_unlock(&logger->output_condition_lock); +} + +static bool +wait_till_output_already_written_or_output_buffer_available (TOKULOGGER logger, LSN lsn, LSN *fsynced_lsn) +// Effect: Wait until either the output is available or the lsn has been written. +// Return true iff the lsn has been written. +// If returning true, then on exit we don't hold output permission. +// If returning false, then on exit we do hold output permission. +// Entry: Hold no locks. +// Exit: Hold the output permission if returns false. +{ + bool result; + toku_mutex_lock(&logger->output_condition_lock); + while (1) { + if (logger->fsynced_lsn.lsn >= lsn.lsn) { // we can look at the fsynced lsn since we have the lock. + result = true; + break; + } + if (logger->output_is_available) { + logger->output_is_available = false; + result = false; + break; + } + // otherwise wait for a good time to look again. + toku_cond_wait(&logger->output_condition, &logger->output_condition_lock); + } + *fsynced_lsn = logger->fsynced_lsn; + toku_mutex_unlock(&logger->output_condition_lock); + return result; +} + +static void +release_output (TOKULOGGER logger, LSN fsynced_lsn) +// Effect: Release output permission. +// Entry: Holds output permissions, but no locks. +// Exit: Holds neither locks nor output permission. +{ + toku_mutex_lock(&logger->output_condition_lock); + logger->output_is_available = true; + if (logger->fsynced_lsn.lsn < fsynced_lsn.lsn) { + logger->fsynced_lsn = fsynced_lsn; + } + toku_cond_broadcast(&logger->output_condition); + toku_mutex_unlock(&logger->output_condition_lock); +} + +static void +swap_inbuf_outbuf (TOKULOGGER logger) +// Effect: Swap the inbuf and outbuf +// Entry and exit: Hold the input lock and permission to modify output. +{ + struct logbuf tmp = logger->inbuf; + logger->inbuf = logger->outbuf; + logger->outbuf = tmp; + assert(logger->inbuf.n_in_buf == 0); +} + +static void +write_outbuf_to_logfile (TOKULOGGER logger, LSN *fsynced_lsn) +// Effect: Write the contents of outbuf to logfile. Don't necessarily fsync (but it might, in which case fynced_lsn is updated). +// If the logfile gets too big, open the next one (that's the case where an fsync might happen). +// Entry and exit: Holds permission to modify output (and doesn't let it go, so it's ok to also hold the inlock). +{ + if (logger->outbuf.n_in_buf>0) { + // Write the outbuf to disk, take accounting measurements + tokutime_t io_t0 = toku_time_now(); + toku_os_full_write(logger->fd, logger->outbuf.buf, logger->outbuf.n_in_buf); + tokutime_t io_t1 = toku_time_now(); + logger->num_writes_to_disk++; + logger->bytes_written_to_disk += logger->outbuf.n_in_buf; + logger->time_spent_writing_to_disk += (io_t1 - io_t0); + + assert(logger->outbuf.max_lsn_in_buf.lsn > logger->written_lsn.lsn); // since there is something in the buffer, its LSN must be bigger than what's previously written. + logger->written_lsn = logger->outbuf.max_lsn_in_buf; + logger->n_in_file += logger->outbuf.n_in_buf; + logger->outbuf.n_in_buf = 0; + } + // If the file got too big, then open a new file. + if (logger->n_in_file > logger->lg_max) { + int r = close_and_open_logfile(logger, fsynced_lsn); + assert_zero(r); + } +} + +void +toku_logger_make_space_in_inbuf (TOKULOGGER logger, int n_bytes_needed) +// Entry: Holds the inlock +// Exit: Holds the inlock +// Effect: Upon exit, the inlock is held and there are at least n_bytes_needed in the buffer. +// May release the inlock (and then reacquire it), so this is not atomic. +// May obtain the output lock and output permission (but if it does so, it will have released the inlock, since we don't hold both locks at once). +// (But may hold output permission and inlock at the same time.) +// Implementation hint: Makes space in the inbuf, possibly by writing the inbuf to disk or increasing the size of the inbuf. There might not be an fsync. +// Arguments: logger: the logger (side effects) +// n_bytes_needed: how many bytes to make space for. +{ + if (logger->inbuf.n_in_buf + n_bytes_needed <= LOGGER_MIN_BUF_SIZE) { + return; + } + ml_unlock(&logger->input_lock); + LSN fsynced_lsn; + grab_output(logger, &fsynced_lsn); + + ml_lock(&logger->input_lock); + // Some other thread may have written the log out while we didn't have the lock. If we have space now, then be happy. + if (logger->inbuf.n_in_buf + n_bytes_needed <= LOGGER_MIN_BUF_SIZE) { + release_output(logger, fsynced_lsn); + return; + } + if (logger->inbuf.n_in_buf > 0) { + // There isn't enough space, and there is something in the buffer, so write the inbuf. + swap_inbuf_outbuf(logger); + + // Don't release the inlock in this case, because we don't want to get starved. + write_outbuf_to_logfile(logger, &fsynced_lsn); + } + // the inbuf is empty. Make it big enough (just in case it is somehow smaller than a single log entry). + if (n_bytes_needed > logger->inbuf.buf_size) { + assert(n_bytes_needed < (1<<30)); // it seems unlikely to work if a logentry gets that big. + int new_size = max_int(logger->inbuf.buf_size * 2, n_bytes_needed); // make it at least twice as big, and big enough for n_bytes + assert(new_size < (1<<30)); + XREALLOC_N(new_size, logger->inbuf.buf); + logger->inbuf.buf_size = new_size; + } + release_output(logger, fsynced_lsn); +} + +void toku_logger_fsync (TOKULOGGER logger) +// Effect: This is the exported fsync used by ydb.c for env_log_flush. Group commit doesn't have to work. +// Entry: Holds no locks +// Exit: Holds no locks +// Implementation note: Acquire the output condition lock, then the output permission, then release the output condition lock, then get the input lock. +// Then release everything. +{ + toku_logger_maybe_fsync(logger, logger->inbuf.max_lsn_in_buf, true, false); +} + +void toku_logger_fsync_if_lsn_not_fsynced (TOKULOGGER logger, LSN lsn) { + if (logger->write_log_files) { + toku_logger_maybe_fsync(logger, lsn, true, false); + } +} + +int toku_logger_is_open(TOKULOGGER logger) { + if (logger==0) return 0; + return logger->is_open; +} + +void toku_logger_set_cachetable (TOKULOGGER logger, CACHETABLE ct) { + logger->ct = ct; +} + +int toku_logger_set_lg_max(TOKULOGGER logger, uint32_t lg_max) { + if (logger==0) return EINVAL; // no logger + if (logger->is_open) return EINVAL; + if (lg_max>(1<<30)) return EINVAL; // too big + logger->lg_max = lg_max; + return 0; +} +int toku_logger_get_lg_max(TOKULOGGER logger, uint32_t *lg_maxp) { + if (logger==0) return EINVAL; // no logger + *lg_maxp = logger->lg_max; + return 0; +} + +int toku_logger_set_lg_bsize(TOKULOGGER logger, uint32_t bsize) { + if (logger==0) return EINVAL; // no logger + if (logger->is_open) return EINVAL; + if (bsize<=0 || bsize>(1<<30)) return EINVAL; + logger->write_block_size = bsize; + return 0; +} + +int toku_logger_find_next_unused_log_file(const char *directory, long long *result) +// This is called during logger initialalization, and no locks are required. +{ + DIR *d=opendir(directory); + long long maxf=-1; *result = maxf; + struct dirent *de; + if (d==0) return get_error_errno(); + while ((de=readdir(d))) { + if (de==0) return get_error_errno(); + long long thisl = -1; + if ( is_a_logfile(de->d_name, &thisl) ) { + if ((long long)thisl > maxf) maxf = thisl; + } + } + *result=maxf+1; + int r = closedir(d); + return r; +} + +// TODO: Put this in portability layer when ready +// in: file pathname that may have a dirname prefix +// return: file leaf name +static char * fileleafname(char *pathname) { + const char delimiter = '/'; + char *leafname = strrchr(pathname, delimiter); + if (leafname) + leafname++; + else + leafname = pathname; + return leafname; +} + +static int logfilenamecompare (const void *ap, const void *bp) { + char *a=*(char**)ap; + char *a_leafname = fileleafname(a); + char *b=*(char**)bp; + char * b_leafname = fileleafname(b); + int rval; + bool valid; + uint64_t num_a = 0; // placate compiler + uint64_t num_b = 0; + uint32_t ver_a = 0; + uint32_t ver_b = 0; + valid = is_a_logfile_any_version(a_leafname, &num_a, &ver_a); + invariant(valid); + valid = is_a_logfile_any_version(b_leafname, &num_b, &ver_b); + invariant(valid); + if (ver_a < ver_b) rval = -1; + else if (ver_a > ver_b) rval = +1; + else if (num_a < num_b) rval = -1; + else if (num_a > num_b) rval = +1; + else rval = 0; + return rval; +} + +// Return the log files in sorted order +// Return a null_terminated array of strings, and also return the number of strings in the array. +// Requires: Race conditions must be dealt with by caller. Either call during initialization or grab the output permission. +int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_logfiles) +{ + int result_limit=2; + int n_results=0; + char **MALLOC_N(result_limit, result); + assert(result!= NULL); + struct dirent *de; + DIR *d=opendir(directory); + if (d==0) { + int er = get_error_errno(); + toku_free(result); + return er; + } + int dirnamelen = strlen(directory); + while ((de=readdir(d))) { + uint64_t thisl; + uint32_t version_ignore; + if ( !(is_a_logfile_any_version(de->d_name, &thisl, &version_ignore)) ) continue; //#2424: Skip over files that don't match the exact logfile template + if (n_results+1>=result_limit) { + result_limit*=2; + XREALLOC_N(result_limit, result); + } + int fnamelen = dirnamelen + strlen(de->d_name) + 2; // One for the slash and one for the trailing NUL. + char *XMALLOC_N(fnamelen, fname); + snprintf(fname, fnamelen, "%s/%s", directory, de->d_name); + result[n_results++] = fname; + } + // Return them in increasing order. Set width to allow for newer log file names ("xxx.tokulog13") + // which are one character longer than old log file names ("xxx.tokulog2"). The comparison function + // won't look beyond the terminating NUL, so an extra character in the comparison string doesn't matter. + // Allow room for terminating NUL after "xxx.tokulog13" even if result[0] is of form "xxx.tokulog2." + int width = sizeof(result[0]+2); + qsort(result, n_results, width, logfilenamecompare); + *resultp = result; + *n_logfiles = n_results; + result[n_results]=0; // make a trailing null + return d ? closedir(d) : 0; +} + +void toku_logger_free_logfiles(char **logfiles, int n_logfiles) { + for (int i = 0; i < n_logfiles; i++) + toku_free(logfiles[i]); + toku_free(logfiles); +} + +static int open_logfile (TOKULOGGER logger) +// Entry and Exit: This thread has permission to modify the output. +{ + int fnamelen = strlen(logger->directory)+50; + char fname[fnamelen]; + snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, logger->next_log_file_number, TOKU_LOG_VERSION); + long long index = logger->next_log_file_number; + if (logger->write_log_files) { + logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRUSR+S_IWUSR); + if (logger->fd==-1) { + return get_error_errno(); + } + fsync_logdir(logger); + logger->next_log_file_number++; + } else { + logger->fd = open(DEV_NULL_FILE, O_WRONLY+O_BINARY); + if (logger->fd==-1) { + return get_error_errno(); + } + } + toku_os_full_write(logger->fd, "tokulogg", 8); + int version_l = toku_htonl(log_format_version); //version MUST be in network byte order regardless of disk order + toku_os_full_write(logger->fd, &version_l, 4); + if ( logger->write_log_files ) { + TOKULOGFILEINFO XMALLOC(lf_info); + lf_info->index = index; + lf_info->maxlsn = logger->written_lsn; + lf_info->version = TOKU_LOG_VERSION; + toku_logfilemgr_add_logfile_info(logger->logfilemgr, lf_info); + } + logger->fsynced_lsn = logger->written_lsn; + logger->n_in_file = 12; + return 0; +} + +static void delete_logfile(TOKULOGGER logger, long long index, uint32_t version) +// Entry and Exit: This thread has permission to modify the output. +{ + int fnamelen = strlen(logger->directory)+50; + char fname[fnamelen]; + snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, index, version); + int r = remove(fname); + invariant_zero(r); +} + +void toku_logger_maybe_trim_log(TOKULOGGER logger, LSN trim_lsn) +// On entry and exit: No logger locks held. +// Acquires and releases output permission. +{ + LSN fsynced_lsn; + grab_output(logger, &fsynced_lsn); + TOKULOGFILEMGR lfm = logger->logfilemgr; + int n_logfiles = toku_logfilemgr_num_logfiles(lfm); + + TOKULOGFILEINFO lf_info = NULL; + + if ( logger->write_log_files && logger->trim_log_files) { + while ( n_logfiles > 1 ) { // don't delete current logfile + uint32_t log_version; + lf_info = toku_logfilemgr_get_oldest_logfile_info(lfm); + log_version = lf_info->version; + if ( lf_info->maxlsn.lsn >= trim_lsn.lsn ) { + // file contains an open LSN, can't delete this or any newer log files + break; + } + // need to save copy - toku_logfilemgr_delete_oldest_logfile_info free's the lf_info + long index = lf_info->index; + toku_logfilemgr_delete_oldest_logfile_info(lfm); + n_logfiles--; + delete_logfile(logger, index, log_version); + } + } + release_output(logger, fsynced_lsn); +} + +void toku_logger_write_log_files (TOKULOGGER logger, bool write_log_files) +// Called only during initialization (or just after recovery), so no locks are needed. +{ + logger->write_log_files = write_log_files; +} + +void toku_logger_trim_log_files (TOKULOGGER logger, bool trim_log_files) +// Called only during initialization, so no locks are needed. +{ + logger->trim_log_files = trim_log_files; +} + +bool toku_logger_txns_exist(TOKULOGGER logger) +// Called during close of environment to ensure that transactions don't exist +{ + return toku_txn_manager_txns_exist(logger->txn_manager); +} + + +void toku_logger_maybe_fsync(TOKULOGGER logger, LSN lsn, int do_fsync, bool holds_input_lock) +// Effect: If fsync is nonzero, then make sure that the log is flushed and synced at least up to lsn. +// Entry: Holds input lock iff 'holds_input_lock'. The log entry has already been written to the input buffer. +// Exit: Holds no locks. +// The input lock may be released and then reacquired. Thus this function does not run atomically with respect to other threads. +{ + if (holds_input_lock) { + ml_unlock(&logger->input_lock); + } + if (do_fsync) { + // reacquire the locks (acquire output permission first) + LSN fsynced_lsn; + bool already_done = wait_till_output_already_written_or_output_buffer_available(logger, lsn, &fsynced_lsn); + if (already_done) { + return; + } + + // otherwise we now own the output permission, and our lsn isn't outputed. + + ml_lock(&logger->input_lock); + + swap_inbuf_outbuf(logger); + + ml_unlock(&logger->input_lock); // release the input lock now, so other threads can fill the inbuf. (Thus enabling group commit.) + + write_outbuf_to_logfile(logger, &fsynced_lsn); + if (fsynced_lsn.lsn < lsn.lsn) { + // it may have gotten fsynced by the write_outbuf_to_logfile. + toku_file_fsync_without_accounting(logger->fd); + assert(fsynced_lsn.lsn <= logger->written_lsn.lsn); + fsynced_lsn = logger->written_lsn; + } + // the last lsn is only accessed while holding output permission or else when the log file is old. + if (logger->write_log_files) { + toku_logfilemgr_update_last_lsn(logger->logfilemgr, logger->written_lsn); + } + release_output(logger, fsynced_lsn); + } +} + +static void +logger_write_buffer(TOKULOGGER logger, LSN *fsynced_lsn) +// Entry: Holds the input lock and permission to modify output. +// Exit: Holds only the permission to modify output. +// Effect: Write the buffers to the output. If DO_FSYNC is true, then fsync. +// Note: Only called during single-threaded activity from toku_logger_restart, so locks aren't really needed. +{ + swap_inbuf_outbuf(logger); + ml_unlock(&logger->input_lock); + write_outbuf_to_logfile(logger, fsynced_lsn); + if (logger->write_log_files) { + toku_file_fsync_without_accounting(logger->fd); + toku_logfilemgr_update_last_lsn(logger->logfilemgr, logger->written_lsn); // t:2294 + } +} + +int toku_logger_restart(TOKULOGGER logger, LSN lastlsn) +// Entry and exit: Holds no locks (this is called only during single-threaded activity, such as initial start). +{ + int r; + + // flush out the log buffer + LSN fsynced_lsn; + grab_output(logger, &fsynced_lsn); + ml_lock(&logger->input_lock); + logger_write_buffer(logger, &fsynced_lsn); + + // close the log file + if ( logger->write_log_files) { // fsyncs don't work to /dev/null + toku_file_fsync_without_accounting(logger->fd); + } + r = close(logger->fd); assert(r == 0); + logger->fd = -1; + + // reset the LSN's to the lastlsn when the logger was opened + logger->lsn = logger->written_lsn = logger->fsynced_lsn = lastlsn; + logger->write_log_files = true; + logger->trim_log_files = true; + + // open a new log file + r = open_logfile(logger); + release_output(logger, fsynced_lsn); + return r; +} + +// fname is the iname +void toku_logger_log_fcreate (TOKUTXN txn, const char *fname, FILENUM filenum, uint32_t mode, + uint32_t treeflags, uint32_t nodesize, uint32_t basementnodesize, + enum toku_compression_method compression_method) { + if (txn) { + BYTESTRING bs_fname = { .len = (uint32_t) strlen(fname), .data = (char *) fname }; + // fsync log on fcreate + toku_log_fcreate (txn->logger, (LSN*)0, 1, txn, toku_txn_get_txnid(txn), filenum, + bs_fname, mode, treeflags, nodesize, basementnodesize, compression_method); + } +} + + +// We only do fdelete on open ft's, so we pass the filenum here +void toku_logger_log_fdelete (TOKUTXN txn, FILENUM filenum) { + if (txn) { + //No fsync. + toku_log_fdelete (txn->logger, (LSN*)0, 0, txn, toku_txn_get_txnid(txn), filenum); + } +} + + + +/* fopen isn't really an action. It's just for bookkeeping. We need to know the filename that goes with a filenum. */ +void toku_logger_log_fopen (TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags) { + if (txn) { + BYTESTRING bs; + bs.len = strlen(fname); + bs.data = (char*)fname; + toku_log_fopen (txn->logger, (LSN*)0, 0, bs, filenum, treeflags); + } +} + +static int toku_fread_uint8_t_nocrclen (FILE *f, uint8_t *v) { + int vi=fgetc(f); + if (vi==EOF) return -1; + uint8_t vc=(uint8_t)vi; + *v = vc; + return 0; +} + +int toku_fread_uint8_t (FILE *f, uint8_t *v, struct x1764 *mm, uint32_t *len) { + int vi=fgetc(f); + if (vi==EOF) return -1; + uint8_t vc=(uint8_t)vi; + toku_x1764_add(mm, &vc, 1); + (*len)++; + *v = vc; + return 0; +} + +int toku_fread_uint32_t_nocrclen (FILE *f, uint32_t *v) { + uint32_t result; + uint8_t *cp = (uint8_t*)&result; + int r; + r = toku_fread_uint8_t_nocrclen (f, cp+0); if (r!=0) return r; + r = toku_fread_uint8_t_nocrclen (f, cp+1); if (r!=0) return r; + r = toku_fread_uint8_t_nocrclen (f, cp+2); if (r!=0) return r; + r = toku_fread_uint8_t_nocrclen (f, cp+3); if (r!=0) return r; + *v = toku_dtoh32(result); + + return 0; +} +int toku_fread_uint32_t (FILE *f, uint32_t *v, struct x1764 *checksum, uint32_t *len) { + uint32_t result; + uint8_t *cp = (uint8_t*)&result; + int r; + r = toku_fread_uint8_t (f, cp+0, checksum, len); if(r!=0) return r; + r = toku_fread_uint8_t (f, cp+1, checksum, len); if(r!=0) return r; + r = toku_fread_uint8_t (f, cp+2, checksum, len); if(r!=0) return r; + r = toku_fread_uint8_t (f, cp+3, checksum, len); if(r!=0) return r; + *v = toku_dtoh32(result); + return 0; +} + +int toku_fread_uint64_t (FILE *f, uint64_t *v, struct x1764 *checksum, uint32_t *len) { + uint32_t v1,v2; + int r; + r=toku_fread_uint32_t(f, &v1, checksum, len); if (r!=0) return r; + r=toku_fread_uint32_t(f, &v2, checksum, len); if (r!=0) return r; + *v = (((uint64_t)v1)<<32 ) | ((uint64_t)v2); + return 0; +} + +int toku_fread_bool (FILE *f, bool *v, struct x1764 *mm, uint32_t *len) { + uint8_t iv; + int r = toku_fread_uint8_t(f, &iv, mm, len); + if (r == 0) { + *v = (iv!=0); + } + return r; +} + +int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, uint32_t *len) { + return toku_fread_uint64_t (f, &lsn->lsn, checksum, len); +} + +int toku_fread_BLOCKNUM (FILE *f, BLOCKNUM *b, struct x1764 *checksum, uint32_t *len) { + return toku_fread_uint64_t (f, (uint64_t*)&b->b, checksum, len); +} + +int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, uint32_t *len) { + return toku_fread_uint32_t (f, &filenum->fileid, checksum, len); +} + +int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, uint32_t *len) { + return toku_fread_uint64_t (f, txnid, checksum, len); +} + +int toku_fread_TXNID_PAIR (FILE *f, TXNID_PAIR *txnid, struct x1764 *checksum, uint32_t *len) { + TXNID parent; + TXNID child; + int r; + r = toku_fread_TXNID(f, &parent, checksum, len); if (r != 0) { return r; } + r = toku_fread_TXNID(f, &child, checksum, len); if (r != 0) { return r; } + txnid->parent_id64 = parent; + txnid->child_id64 = child; + return 0; +} + + +int toku_fread_XIDP (FILE *f, XIDP *xidp, struct x1764 *checksum, uint32_t *len) { + // These reads are verbose because XA defined the fields as "long", but we use 4 bytes, 1 byte and 1 byte respectively. + TOKU_XA_XID *XMALLOC(xid); + { + uint32_t formatID; + int r = toku_fread_uint32_t(f, &formatID, checksum, len); + if (r!=0) return r; + xid->formatID = formatID; + } + { + uint8_t gtrid_length; + int r = toku_fread_uint8_t (f, >rid_length, checksum, len); + if (r!=0) return r; + xid->gtrid_length = gtrid_length; + } + { + uint8_t bqual_length; + int r = toku_fread_uint8_t (f, &bqual_length, checksum, len); + if (r!=0) return r; + xid->bqual_length = bqual_length; + } + for (int i=0; i< xid->gtrid_length + xid->bqual_length; i++) { + uint8_t byte; + int r = toku_fread_uint8_t(f, &byte, checksum, len); + if (r!=0) return r; + xid->data[i] = byte; + } + *xidp = xid; + return 0; +} + +// fills in the bs with malloced data. +int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, uint32_t *len) { + int r=toku_fread_uint32_t(f, (uint32_t*)&bs->len, checksum, len); + if (r!=0) return r; + XMALLOC_N(bs->len, bs->data); + uint32_t i; + for (i=0; ilen; i++) { + r=toku_fread_uint8_t(f, (uint8_t*)&bs->data[i], checksum, len); + if (r!=0) { + toku_free(bs->data); + bs->data=0; + return r; + } + } + return 0; +} + +// fills in the fs with malloced data. +int toku_fread_FILENUMS (FILE *f, FILENUMS *fs, struct x1764 *checksum, uint32_t *len) { + int r=toku_fread_uint32_t(f, (uint32_t*)&fs->num, checksum, len); + if (r!=0) return r; + XMALLOC_N(fs->num, fs->filenums); + uint32_t i; + for (i=0; inum; i++) { + r=toku_fread_FILENUM (f, &fs->filenums[i], checksum, len); + if (r!=0) { + toku_free(fs->filenums); + fs->filenums=0; + return r; + } + } + return 0; +} + +int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { + LSN v; + int r = toku_fread_LSN(inf, &v, checksum, len); + if (r!=0) return r; + fprintf(outf, " %s=%" PRIu64, fieldname, v.lsn); + return 0; +} + +int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { + TXNID v; + int r = toku_fread_TXNID(inf, &v, checksum, len); + if (r!=0) return r; + fprintf(outf, " %s=%" PRIu64, fieldname, v); + return 0; +} + +int toku_logprint_TXNID_PAIR (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { + TXNID_PAIR v; + int r = toku_fread_TXNID_PAIR(inf, &v, checksum, len); + if (r!=0) return r; + fprintf(outf, " %s=%" PRIu64 ",%" PRIu64, fieldname, v.parent_id64, v.child_id64); + return 0; +} + +int toku_logprint_XIDP (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { + XIDP vp; + int r = toku_fread_XIDP(inf, &vp, checksum, len); + if (r!=0) return r; + fprintf(outf, " %s={formatID=0x%lx gtrid_length=%ld bqual_length=%ld data=", fieldname, vp->formatID, vp->gtrid_length, vp->bqual_length); + toku_print_bytes(outf, vp->gtrid_length + vp->bqual_length, vp->data); + fprintf(outf, "}"); + toku_free(vp); + return 0; +} + +int toku_logprint_uint8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format) { + uint8_t v; + int r = toku_fread_uint8_t(inf, &v, checksum, len); + if (r!=0) return r; + fprintf(outf, " %s=%d", fieldname, v); + if (format) fprintf(outf, format, v); + else if (v=='\'') fprintf(outf, "('\'')"); + else if (isprint(v)) fprintf(outf, "('%c')", v); + else {}/*nothing*/ + return 0; +} + +int toku_logprint_uint32_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format) { + uint32_t v; + int r = toku_fread_uint32_t(inf, &v, checksum, len); + if (r!=0) return r; + fprintf(outf, " %s=", fieldname); + fprintf(outf, format ? format : "%d", v); + return 0; +} + +int toku_logprint_uint64_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format) { + uint64_t v; + int r = toku_fread_uint64_t(inf, &v, checksum, len); + if (r!=0) return r; + fprintf(outf, " %s=", fieldname); + fprintf(outf, format ? format : "%" PRId64, v); + return 0; +} + +int toku_logprint_bool (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { + bool v; + int r = toku_fread_bool(inf, &v, checksum, len); + if (r!=0) return r; + fprintf(outf, " %s=%s", fieldname, v ? "true" : "false"); + return 0; + +} + +void toku_print_BYTESTRING (FILE *outf, uint32_t len, char *data) { + fprintf(outf, "{len=%u data=", len); + toku_print_bytes(outf, len, data); + fprintf(outf, "}"); + +} + +int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { + BYTESTRING bs; + int r = toku_fread_BYTESTRING(inf, &bs, checksum, len); + if (r!=0) return r; + fprintf(outf, " %s=", fieldname); + toku_print_BYTESTRING(outf, bs.len, bs.data); + toku_free(bs.data); + return 0; +} + +int toku_logprint_BLOCKNUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format) { + return toku_logprint_uint64_t(outf, inf, fieldname, checksum, len, format); + +} + +int toku_logprint_FILENUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format) { + return toku_logprint_uint32_t(outf, inf, fieldname, checksum, len, format); + +} + +static void +toku_print_FILENUMS (FILE *outf, uint32_t num, FILENUM *filenums) { + fprintf(outf, "{num=%u filenums=\"", num); + uint32_t i; + for (i=0; i0) + fprintf(outf, ","); + fprintf(outf, "0x%" PRIx32, filenums[i].fileid); + } + fprintf(outf, "\"}"); + +} + +int toku_logprint_FILENUMS (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { + FILENUMS bs; + int r = toku_fread_FILENUMS(inf, &bs, checksum, len); + if (r!=0) return r; + fprintf(outf, " %s=", fieldname); + toku_print_FILENUMS(outf, bs.num, bs.filenums); + toku_free(bs.filenums); + return 0; +} + +int toku_read_and_print_logmagic (FILE *f, uint32_t *versionp) { + { + char magic[8]; + int r=fread(magic, 1, 8, f); + if (r!=8) { + return DB_BADFORMAT; + } + if (memcmp(magic, "tokulogg", 8)!=0) { + return DB_BADFORMAT; + } + } + { + int version; + int r=fread(&version, 1, 4, f); + if (r!=4) { + return DB_BADFORMAT; + } + printf("tokulog v.%u\n", toku_ntohl(version)); + //version MUST be in network order regardless of disk order + *versionp=toku_ntohl(version); + } + return 0; +} + +int toku_read_logmagic (FILE *f, uint32_t *versionp) { + { + char magic[8]; + int r=fread(magic, 1, 8, f); + if (r!=8) { + return DB_BADFORMAT; + } + if (memcmp(magic, "tokulogg", 8)!=0) { + return DB_BADFORMAT; + } + } + { + int version; + int r=fread(&version, 1, 4, f); + if (r!=4) { + return DB_BADFORMAT; + } + *versionp=toku_ntohl(version); + } + return 0; +} + +TXNID_PAIR toku_txn_get_txnid (TOKUTXN txn) { + TXNID_PAIR tp = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE}; + if (txn==0) return tp; + else return txn->txnid; +} + +LSN toku_logger_last_lsn(TOKULOGGER logger) { + return logger->lsn; +} + +TOKULOGGER toku_txn_logger (TOKUTXN txn) { + return txn ? txn->logger : 0; +} + +void toku_txnid2txn(TOKULOGGER logger, TXNID_PAIR txnid, TOKUTXN *result) { + TOKUTXN root_txn = NULL; + toku_txn_manager_suspend(logger->txn_manager); + toku_txn_manager_id2txn_unlocked(logger->txn_manager, txnid, &root_txn); + if (root_txn == NULL || root_txn->txnid.child_id64 == txnid.child_id64) { + *result = root_txn; + } + else if (root_txn != NULL) { + root_txn->child_manager->suspend(); + root_txn->child_manager->find_tokutxn_by_xid_unlocked(txnid, result); + root_txn->child_manager->resume(); + } + toku_txn_manager_resume(logger->txn_manager); +} + +// Find the earliest LSN in a log. No locks are needed. +static int peek_at_log (TOKULOGGER logger, char* filename, LSN *first_lsn) { + int fd = open(filename, O_RDONLY+O_BINARY); + if (fd<0) { + int er = get_error_errno(); + if (logger->write_log_files) printf("couldn't open: %s\n", strerror(er)); + return er; + } + enum { SKIP = 12+1+4 }; // read the 12 byte header, the first message, and the first len + unsigned char header[SKIP+8]; + int r = read(fd, header, SKIP+8); + if (r!=SKIP+8) return 0; // cannot determine that it's archivable, so we'll assume no. If a later-log is archivable is then this one will be too. + + uint64_t lsn; + { + struct rbuf rb; + rb.buf = header+SKIP; + rb.size = 8; + rb.ndone = 0; + lsn = rbuf_ulonglong(&rb); + } + + r=close(fd); + if (r!=0) { return 0; } + + first_lsn->lsn=lsn; + return 0; +} + +// Return a malloc'd array of malloc'd strings which are the filenames that can be archived. +// Output permission are obtained briefly so we can get a list of the log files without conflicting. +int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags) { + if (flags!=0) return EINVAL; // don't know what to do. + int all_n_logs; + int i; + char **all_logs; + int n_logfiles; + LSN fsynced_lsn; + grab_output(logger, &fsynced_lsn); + int r = toku_logger_find_logfiles (logger->directory, &all_logs, &n_logfiles); + release_output(logger, fsynced_lsn); + if (r!=0) return r; + + for (i=0; all_logs[i]; i++); + all_n_logs=i; + // get them into increasing order + qsort(all_logs, all_n_logs, sizeof(all_logs[0]), logfilenamecompare); + + LSN save_lsn = logger->last_completed_checkpoint_lsn; + + // Now starting at the last one, look for archivable ones. + // Count the total number of bytes, because we have to return a single big array. (That's the BDB interface. Bleah...) + LSN earliest_lsn_in_logfile={(unsigned long long)(-1LL)}; + r = peek_at_log(logger, all_logs[all_n_logs-1], &earliest_lsn_in_logfile); // try to find the lsn that's in the most recent log + if (earliest_lsn_in_logfile.lsn <= save_lsn.lsn) { + i=all_n_logs-1; + } else { + for (i=all_n_logs-2; i>=0; i--) { // start at all_n_logs-2 because we never archive the most recent log + r = peek_at_log(logger, all_logs[i], &earliest_lsn_in_logfile); + if (r!=0) continue; // In case of error, just keep going + + if (earliest_lsn_in_logfile.lsn <= save_lsn.lsn) { + break; + } + } + } + + // all log files up to, but but not including, i can be archived. + int n_to_archive=i; + int count_bytes=0; + for (i=0; iparent; +} + +void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn) { + logger->last_completed_checkpoint_lsn = lsn; +} + +/////////////////////////////////////////////////////////////////////////////////// +// Engine status +// +// Status is intended for display to humans to help understand system behavior. +// It does not need to be perfectly thread-safe. + +static LOGGER_STATUS_S logger_status; + +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(logger_status, k, c, t, "logger: " l, inc) + +static void +status_init(void) { + // Note, this function initializes the keyname, type, and legend fields. + // Value fields are initialized to zero by compiler. + STATUS_INIT(LOGGER_NEXT_LSN, nullptr, UINT64, "next LSN", TOKU_ENGINE_STATUS); + STATUS_INIT(LOGGER_NUM_WRITES, LOGGER_WRITES, UINT64, "writes", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(LOGGER_BYTES_WRITTEN, LOGGER_WRITES_BYTES, UINT64, "writes (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN, LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(LOGGER_TOKUTIME_WRITES, LOGGER_WRITES_SECONDS, TOKUTIME, "writes (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(LOGGER_WAIT_BUF_LONG, LOGGER_WAIT_LONG, UINT64, "number of long logger write operations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + logger_status.initialized = true; +} +#undef STATUS_INIT + +#define STATUS_VALUE(x) logger_status.status[x].value.num + +void +toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS statp) { + if (!logger_status.initialized) + status_init(); + if (logger) { + STATUS_VALUE(LOGGER_NEXT_LSN) = logger->lsn.lsn; + STATUS_VALUE(LOGGER_NUM_WRITES) = logger->num_writes_to_disk; + STATUS_VALUE(LOGGER_BYTES_WRITTEN) = logger->bytes_written_to_disk; + // No compression on logfiles so the uncompressed size is just number of bytes written + STATUS_VALUE(LOGGER_UNCOMPRESSED_BYTES_WRITTEN) = logger->bytes_written_to_disk; + STATUS_VALUE(LOGGER_TOKUTIME_WRITES) = logger->time_spent_writing_to_disk; + STATUS_VALUE(LOGGER_WAIT_BUF_LONG) = logger->num_wait_buf_long; + } + *statp = logger_status; +} + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// Used for upgrade: +// if any valid log files exist in log_dir, then +// set *found_any_logs to true and set *version_found to version number of latest log +int +toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs, uint32_t *version_found) { + bool found = false; + uint32_t highest_version = 0; + int r = 0; + + struct dirent *de; + DIR *d=opendir(log_dir); + if (d==NULL) { + r = get_error_errno(); + } + else { + // Examine every file in the directory and find highest version + while ((de=readdir(d))) { + uint32_t this_log_version; + uint64_t this_log_number; + bool is_log = is_a_logfile_any_version(de->d_name, &this_log_number, &this_log_version); + if (is_log) { + if (!found) { // first log file found + found = true; + highest_version = this_log_version; + } + else + highest_version = highest_version > this_log_version ? highest_version : this_log_version; + } + } + int r2 = closedir(d); + if (r==0) r = r2; + } + if (r==0) { + *found_any_logs = found; + if (found) + *version_found = highest_version; + } + return r; +} + +TXN_MANAGER toku_logger_get_txn_manager(TOKULOGGER logger) { + return logger->txn_manager; +} + +#undef STATUS_VALUE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logger.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logger.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/logger.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/logger.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,341 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "ft/serialize/block_table.h" +#include "ft/serialize/ft_layout_version.h" +#include "ft/txn/txn.h" + +typedef struct tokulogger *TOKULOGGER; + +enum { + TOKU_LOG_VERSION_1 = 1, + TOKU_LOG_VERSION_2 = 2, + //After 2 we linked the log version to the FT_LAYOUT VERSION. + //So it went from 2 to 13 (3-12 do not exist) + TOKU_LOG_VERSION_24 = 24, + TOKU_LOG_VERSION_25 = 25, // change rollinclude rollback log entry + TOKU_LOG_VERSION_26 = 26, // no change from 25 + TOKU_LOG_VERSION_27 = 27, // no change from 26 + TOKU_LOG_VERSION = FT_LAYOUT_VERSION, + TOKU_LOG_MIN_SUPPORTED_VERSION = FT_LAYOUT_MIN_SUPPORTED_VERSION, +}; + +int toku_logger_create (TOKULOGGER *resultp); +int toku_logger_open (const char *directory, TOKULOGGER logger); +int toku_logger_open_with_last_xid(const char *directory, TOKULOGGER logger, TXNID last_xid); +void toku_logger_shutdown(TOKULOGGER logger); +int toku_logger_close(TOKULOGGER *loggerp); +void toku_logger_initialize_rollback_cache(TOKULOGGER logger, struct ft *ft); +int toku_logger_open_rollback(TOKULOGGER logger, struct cachetable *ct, bool create); +void toku_logger_close_rollback(TOKULOGGER logger); +void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdown); +bool toku_logger_rollback_is_open (TOKULOGGER); // return true iff the rollback is open. + +void toku_logger_fsync (TOKULOGGER logger); +void toku_logger_fsync_if_lsn_not_fsynced(TOKULOGGER logger, LSN lsn); +int toku_logger_is_open(TOKULOGGER logger); +void toku_logger_set_cachetable (TOKULOGGER logger, struct cachetable *ct); +int toku_logger_set_lg_max(TOKULOGGER logger, uint32_t lg_max); +int toku_logger_get_lg_max(TOKULOGGER logger, uint32_t *lg_maxp); +int toku_logger_set_lg_bsize(TOKULOGGER logger, uint32_t bsize); + +void toku_logger_write_log_files (TOKULOGGER logger, bool write_log_files); +void toku_logger_trim_log_files(TOKULOGGER logger, bool trim_log_files); +bool toku_logger_txns_exist(TOKULOGGER logger); + +// Restart the logger. This function is used by recovery to really start +// logging. +// Effects: Flush the current log buffer, reset the logger's lastlsn, and +// open a new log file. +// Returns: 0 if success +int toku_logger_restart(TOKULOGGER logger, LSN lastlsn); + +// Maybe trim the log entries from the log that are older than the given LSN +// Effect: find all of the log files whose largest LSN is smaller than the +// given LSN and delete them. +void toku_logger_maybe_trim_log(TOKULOGGER logger, LSN oldest_open_lsn); + +// At the ft layer, a FILENUM uniquely identifies an open file. +struct FILENUM { + uint32_t fileid; +}; +static const FILENUM FILENUM_NONE = { .fileid = UINT32_MAX }; + +struct FILENUMS { + uint32_t num; + FILENUM *filenums; +}; + +void toku_logger_log_fcreate(TOKUTXN txn, const char *fname, FILENUM filenum, uint32_t mode, uint32_t flags, uint32_t nodesize, uint32_t basementnodesize, enum toku_compression_method compression_method); +void toku_logger_log_fdelete(TOKUTXN txn, FILENUM filenum); +void toku_logger_log_fopen(TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags); + +// the log generation code requires a typedef if we want to pass by pointer +typedef TOKU_XA_XID *XIDP; + +int toku_fread_uint8_t (FILE *f, uint8_t *v, struct x1764 *mm, uint32_t *len); +int toku_fread_uint32_t_nocrclen (FILE *f, uint32_t *v); +int toku_fread_uint32_t (FILE *f, uint32_t *v, struct x1764 *checksum, uint32_t *len); +int toku_fread_uint64_t (FILE *f, uint64_t *v, struct x1764 *checksum, uint32_t *len); +int toku_fread_bool (FILE *f, bool *v, struct x1764 *checksum, uint32_t *len); +int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, uint32_t *len); +int toku_fread_BLOCKNUM (FILE *f, BLOCKNUM *lsn, struct x1764 *checksum, uint32_t *len); +int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, uint32_t *len); +int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, uint32_t *len); +int toku_fread_TXNID_PAIR (FILE *f, TXNID_PAIR *txnid, struct x1764 *checksum, uint32_t *len); +int toku_fread_XIDP (FILE *f, XIDP *xidp, struct x1764 *checksum, uint32_t *len); +int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, uint32_t *len); +int toku_fread_FILENUMS (FILE *f, FILENUMS *fs, struct x1764 *checksum, uint32_t *len); + +int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); +int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); +int toku_logprint_TXNID_PAIR (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); +int toku_logprint_XIDP (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); +int toku_logprint_uint8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); +int toku_logprint_uint32_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); +int toku_logprint_BLOCKNUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); +int toku_logprint_uint64_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); +int toku_logprint_bool (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); +void toku_print_BYTESTRING (FILE *outf, uint32_t len, char *data); +int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); +int toku_logprint_FILENUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); +int toku_logprint_FILENUMS (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); +int toku_read_and_print_logmagic (FILE *f, uint32_t *versionp); +int toku_read_logmagic (FILE *f, uint32_t *versionp); + +TXNID_PAIR toku_txn_get_txnid (TOKUTXN txn); +LSN toku_logger_last_lsn(TOKULOGGER logger); +TOKULOGGER toku_txn_logger (TOKUTXN txn); + +void toku_txnid2txn (TOKULOGGER logger, TXNID_PAIR txnid, TOKUTXN *result); + +int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags); + +TOKUTXN toku_logger_txn_parent (TOKUTXN txn); +void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn); + +void toku_logger_make_space_in_inbuf (TOKULOGGER logger, int n_bytes_needed); + +int toku_logger_write_inbuf (TOKULOGGER logger); +// Effect: Write the buffered data (from the inbuf) to a file. No fsync, however. +// As a side effect, the inbuf will be made empty. +// Return 0 on success, otherwise return an error number. +// Requires: The inbuf lock is currently held, and the outbuf lock is not held. +// Upon return, the inbuf lock will be held, and the outbuf lock is not held. +// However, no side effects should have been made to the logger. The lock was acquired simply to determine that the buffer will overflow if we try to put something into it. +// The inbuf lock will be released, so the operations before and after this function call will not be atomic. +// Rationale: When the buffer becomes nearly full, call this function so that more can be put in. +// Implementation note: Since the output lock is acquired first, we must release the input lock, and then grab both in the right order. + +void toku_logger_maybe_fsync (TOKULOGGER logger, LSN lsn, int do_fsync, bool holds_input_lock); +// Effect: If fsync is nonzero, then make sure that the log is flushed and synced at least up to lsn. +// Entry: Holds input lock iff 'holds_input_lock'. +// Exit: Holds no locks. + +// Discussion: How does the logger work: +// The logger has two buffers: an inbuf and an outbuf. +// There are two locks, called the inlock, and the outlock. To write, both locks must be held, and the outlock is acquired first. +// Roughly speaking, the inbuf is used to accumulate logged data, and the outbuf is used to write to disk. +// When something is to be logged we do the following: +// acquire the inlock. +// Make sure there is space in the inbuf for the logentry. (We know the size of the logentry in advance): +// if the inbuf doesn't have enough space then +// release the inlock +// acquire the outlock +// acquire the inlock +// it's possible that some other thread made space. +// if there still isn't space +// swap the inbuf and the outbuf +// release the inlock +// write the outbuf +// acquire the inlock +// release the outlock +// if the inbuf is still too small, then increase the size of the inbuf +// Increment the LSN and fill the inbuf. +// If fsync is required then +// release the inlock +// acquire the outlock +// acquire the inlock +// if the LSN has been flushed and fsynced (if so we are done. Some other thread did the flush.) +// release the locks +// if the LSN has been flushed but not fsynced up to the LSN: +// release the inlock +// fsync +// release the outlock +// otherwise: +// swap the outbuf and the inbuf +// release the inlock +// write the outbuf +// fsync +// release the outlock + +typedef enum { + LOGGER_NEXT_LSN = 0, + LOGGER_NUM_WRITES, + LOGGER_BYTES_WRITTEN, + LOGGER_UNCOMPRESSED_BYTES_WRITTEN, + LOGGER_TOKUTIME_WRITES, + LOGGER_WAIT_BUF_LONG, + LOGGER_STATUS_NUM_ROWS +} logger_status_entry; + +typedef struct { + bool initialized; + TOKU_ENGINE_STATUS_ROW_S status[LOGGER_STATUS_NUM_ROWS]; +} LOGGER_STATUS_S, *LOGGER_STATUS; + +void toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s); + +int toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs, uint32_t *version_found); + +struct txn_manager *toku_logger_get_txn_manager(TOKULOGGER logger); + +// For serialize / deserialize + +#include "ft/serialize/wbuf.h" + +static inline void wbuf_nocrc_FILENUM(struct wbuf *wb, FILENUM fileid) { + wbuf_nocrc_uint(wb, fileid.fileid); +} + +static inline void wbuf_FILENUM(struct wbuf *wb, FILENUM fileid) { + wbuf_uint(wb, fileid.fileid); +} + +static inline void wbuf_nocrc_FILENUMS(struct wbuf *wb, FILENUMS v) { + wbuf_nocrc_uint(wb, v.num); + for (uint32_t i = 0; i < v.num; i++) { + wbuf_nocrc_FILENUM(wb, v.filenums[i]); + } +} + +static inline void wbuf_FILENUMS(struct wbuf *wb, FILENUMS v) { + wbuf_uint(wb, v.num); + for (uint32_t i = 0; i < v.num; i++) { + wbuf_FILENUM(wb, v.filenums[i]); + } +} + +static inline void wbuf_nocrc_XIDP (struct wbuf *w, TOKU_XA_XID *xid) { + wbuf_nocrc_uint32_t(w, xid->formatID); + wbuf_nocrc_uint8_t(w, xid->gtrid_length); + wbuf_nocrc_uint8_t(w, xid->bqual_length); + wbuf_nocrc_literal_bytes(w, xid->data, xid->gtrid_length+xid->bqual_length); +} + +#include "ft/serialize/rbuf.h" + +static inline void rbuf_FILENUM(struct rbuf *rb, FILENUM *filenum) { + filenum->fileid = rbuf_int(rb); +} +static inline void rbuf_ma_FILENUM(struct rbuf *rb, memarena *UU(ma), FILENUM *filenum) { + rbuf_FILENUM(rb, filenum); +} + +static inline void rbuf_FILENUMS(struct rbuf *rb, FILENUMS *filenums) { + filenums->num = rbuf_int(rb); + XMALLOC_N(filenums->num, filenums->filenums); + for (uint32_t i = 0; i < filenums->num; i++) { + rbuf_FILENUM(rb, &(filenums->filenums[i])); + } +} + +static inline void rbuf_ma_FILENUMS(struct rbuf *rb, memarena *ma, FILENUMS *filenums) { + rbuf_ma_uint32_t(rb, ma, &(filenums->num)); + filenums->filenums = (FILENUM *) ma->malloc_from_arena(filenums->num * sizeof(FILENUM)); + assert(filenums->filenums != NULL); + for (uint32_t i = 0; i < filenums->num; i++) { + rbuf_ma_FILENUM(rb, ma, &(filenums->filenums[i])); + } +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/log.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/log.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/log.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/log.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,123 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include + +#include "portability/memory.h" +#include "portability/toku_portability.h" + +#include "ft/logger/recover.h" +#include "ft/txn/rollback.h" +#include "ft/txn/txn.h" +#include "util/bytestring.h" + +struct roll_entry; + +static inline void toku_free_TXNID(TXNID txnid __attribute__((__unused__))) {} +static inline void toku_free_TXNID_PAIR(TXNID_PAIR txnid __attribute__((__unused__))) {} + +static inline void toku_free_LSN(LSN lsn __attribute__((__unused__))) {} +static inline void toku_free_uint64_t(uint64_t u __attribute__((__unused__))) {} +static inline void toku_free_uint32_t(uint32_t u __attribute__((__unused__))) {} +static inline void toku_free_uint8_t(uint8_t u __attribute__((__unused__))) {} +static inline void toku_free_FILENUM(FILENUM u __attribute__((__unused__))) {} +static inline void toku_free_BLOCKNUM(BLOCKNUM u __attribute__((__unused__))) {} +static inline void toku_free_bool(bool u __attribute__((__unused__))) {} +static inline void toku_free_XIDP(XIDP xidp) { toku_free(xidp); } +static inline void toku_free_BYTESTRING(BYTESTRING val) { toku_free(val.data); } +static inline void toku_free_FILENUMS(FILENUMS val) { toku_free(val.filenums); } + +int toku_maybe_upgrade_log (const char *env_dir, const char *log_dir, LSN * lsn_of_clean_shutdown, bool * upgrade_in_progress); +uint64_t toku_log_upgrade_get_footprint(void); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/log-internal.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/log-internal.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/log-internal.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/log-internal.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,279 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include +#include +#include + +#include "portability/toku_list.h" +#include "portability/toku_pthread.h" +#include "ft/ft-internal.h" +#include "ft/logger/log.h" +#include "ft/logger/logfilemgr.h" +#include "ft/txn/txn.h" +#include "ft/txn/txn_manager.h" +#include "ft/txn/rollback_log_node_cache.h" + +#include "util/memarena.h" +#include "util/omt.h" + +using namespace toku; +// Locking for the logger +// For most purposes we use the big ydb lock. +// To log: grab the buf lock +// If the buf would overflow, then grab the file lock, swap file&buf, release buf lock, write the file, write the entry, release the file lock +// else append to buf & release lock + +#define LOGGER_MIN_BUF_SIZE (1<<24) + +// TODO: Remove mylock, it has no value +struct mylock { + toku_mutex_t lock; +}; + +static inline void ml_init(struct mylock *l) { + toku_mutex_init(&l->lock, 0); +} +static inline void ml_lock(struct mylock *l) { + toku_mutex_lock(&l->lock); +} +static inline void ml_unlock(struct mylock *l) { + toku_mutex_unlock(&l->lock); +} +static inline void ml_destroy(struct mylock *l) { + toku_mutex_destroy(&l->lock); +} + +struct logbuf { + int n_in_buf; + int buf_size; + char *buf; + LSN max_lsn_in_buf; +}; + +struct tokulogger { + struct mylock input_lock; + + toku_mutex_t output_condition_lock; // if you need both this lock and input_lock, acquire the output_lock first, then input_lock. More typical is to get the output_is_available condition to be false, and then acquire the input_lock. + toku_cond_t output_condition; // + bool output_is_available; // this is part of the predicate for the output condition. It's true if no thread is modifying the output (either doing an fsync or otherwise fiddling with the output). + + bool is_open; + bool write_log_files; + bool trim_log_files; // for test purposes + char *directory; // file system directory + DIR *dir; // descriptor for directory + int fd; + CACHETABLE ct; + int lg_max; // The size of the single file in the log. Default is 100MB. + + // To access these, you must have the input lock + LSN lsn; // the next available lsn + struct logbuf inbuf; // data being accumulated for the write + + // To access these, you must have the output condition lock. + LSN written_lsn; // the last lsn written + LSN fsynced_lsn; // What is the LSN of the highest fsynced log entry (accessed only while holding the output lock, and updated only when the output lock and output permission are held) + LSN last_completed_checkpoint_lsn; // What is the LSN of the most recent completed checkpoint. + long long next_log_file_number; + struct logbuf outbuf; // data being written to the file + int n_in_file; // The amount of data in the current file + + // To access the logfilemgr you must have the output condition lock. + TOKULOGFILEMGR logfilemgr; + + uint32_t write_block_size; // How big should the blocks be written to various logs? + + uint64_t num_writes_to_disk; // how many times did we write to disk? + uint64_t bytes_written_to_disk; // how many bytes have been written to disk? + tokutime_t time_spent_writing_to_disk; // how much tokutime did we spend writing to disk? + uint64_t num_wait_buf_long; // how many times we waited >= 100ms for the in buf + + CACHEFILE rollback_cachefile; + rollback_log_node_cache rollback_cache; + TXN_MANAGER txn_manager; +}; + +int toku_logger_find_next_unused_log_file(const char *directory, long long *result); +int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_logfiles); +void toku_logger_free_logfiles (char **logfiles, int n_logfiles); + +static inline int +txn_has_current_rollback_log(TOKUTXN txn) { + return txn->roll_info.current_rollback.b != ROLLBACK_NONE.b; +} + +static inline int +txn_has_spilled_rollback_logs(TOKUTXN txn) { + return txn->roll_info.spilled_rollback_tail.b != ROLLBACK_NONE.b; +} + +struct txninfo { + uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children. + uint32_t num_fts; + FT *open_fts; + bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn) + uint64_t num_rollback_nodes; + uint64_t num_rollentries; + BLOCKNUM spilled_rollback_head; + BLOCKNUM spilled_rollback_tail; + BLOCKNUM current_rollback; +}; + +static inline int toku_logsizeof_uint8_t (uint32_t v __attribute__((__unused__))) { + return 1; +} + +static inline int toku_logsizeof_uint32_t (uint32_t v __attribute__((__unused__))) { + return 4; +} + +static inline int toku_logsizeof_uint64_t (uint32_t v __attribute__((__unused__))) { + return 8; +} + +static inline int toku_logsizeof_bool (uint32_t v __attribute__((__unused__))) { + return 1; +} + +static inline int toku_logsizeof_FILENUM (FILENUM v __attribute__((__unused__))) { + return 4; +} + +static inline int toku_logsizeof_DISKOFF (DISKOFF v __attribute__((__unused__))) { + return 8; +} +static inline int toku_logsizeof_BLOCKNUM (BLOCKNUM v __attribute__((__unused__))) { + return 8; +} + +static inline int toku_logsizeof_LSN (LSN lsn __attribute__((__unused__))) { + return 8; +} + +static inline int toku_logsizeof_TXNID (TXNID txnid __attribute__((__unused__))) { + return 8; +} + +static inline int toku_logsizeof_TXNID_PAIR (TXNID_PAIR txnid __attribute__((__unused__))) { + return 16; +} + +static inline int toku_logsizeof_XIDP (XIDP xid) { + assert(0<=xid->gtrid_length && xid->gtrid_length<=64); + assert(0<=xid->bqual_length && xid->bqual_length<=64); + return xid->gtrid_length + + xid->bqual_length + + 4 // formatID + + 1 // gtrid_length + + 1; // bqual_length +} + +static inline int toku_logsizeof_FILENUMS (FILENUMS fs) { + static const FILENUM f = {0}; //fs could have .num==0 and then we cannot dereference + return 4 + fs.num * toku_logsizeof_FILENUM(f); +} + +static inline int toku_logsizeof_BYTESTRING (BYTESTRING bs) { + return 4+bs.len; +} + +static inline char *fixup_fname(BYTESTRING *f) { + assert(f->len>0); + char *fname = (char*)toku_xmalloc(f->len+1); + memcpy(fname, f->data, f->len); + fname[f->len]=0; + return fname; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/log_upgrade.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/log_upgrade.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/log_upgrade.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/log_upgrade.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,348 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include + +#include "log-internal.h" +#include "logger/logcursor.h" +#include "cachetable/checkpoint.h" + +static uint64_t footprint = 0; // for debug and accountability + +uint64_t +toku_log_upgrade_get_footprint(void) { + return footprint; +} + +// Footprint concept here is that each function increments a different decimal digit. +// The cumulative total shows the path taken for the upgrade. +// Each function must have a single return for this to work. +#define FOOTPRINT(x) function_footprint=(x*footprint_increment) +#define FOOTPRINTSETUP(increment) uint64_t function_footprint = 0; uint64_t footprint_increment=increment; +#define FOOTPRINTCAPTURE footprint+=function_footprint; + + +// return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown +static int +verify_clean_shutdown_of_log_version_current(const char *log_dir, LSN * last_lsn, TXNID *last_xid) { + int rval = TOKUDB_UPGRADE_FAILURE; + TOKULOGCURSOR cursor = NULL; + int r; + FOOTPRINTSETUP(100); + + FOOTPRINT(1); + + r = toku_logcursor_create(&cursor, log_dir); + assert(r == 0); + struct log_entry *le = NULL; + r = toku_logcursor_last(cursor, &le); + if (r == 0) { + FOOTPRINT(2); + if (le->cmd==LT_shutdown) { + LSN lsn = le->u.shutdown.lsn; + if (last_lsn) { + *last_lsn = lsn; + } + if (last_xid) { + *last_xid = le->u.shutdown.last_xid; + } + rval = 0; + } + } + r = toku_logcursor_destroy(&cursor); + assert(r == 0); + FOOTPRINTCAPTURE; + return rval; +} + + +// return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown +static int +verify_clean_shutdown_of_log_version_old(const char *log_dir, LSN * last_lsn, TXNID *last_xid, uint32_t version) { + int rval = TOKUDB_UPGRADE_FAILURE; + int r; + FOOTPRINTSETUP(10); + + FOOTPRINT(1); + + int n_logfiles; + char **logfiles; + r = toku_logger_find_logfiles(log_dir, &logfiles, &n_logfiles); + if (r!=0) return r; + + char *basename; + TOKULOGCURSOR cursor; + struct log_entry *entry; + // Only look at newest log + // basename points to first char after last / in file pathname + basename = strrchr(logfiles[n_logfiles-1], '/') + 1; + uint32_t version_name; + long long index = -1; + r = sscanf(basename, "log%lld.tokulog%u", &index, &version_name); + assert(r==2); // found index and version + invariant(version_name == version); + assert(version>=TOKU_LOG_MIN_SUPPORTED_VERSION); + assert(version< TOKU_LOG_VERSION); //Must be old + // find last LSN + r = toku_logcursor_create_for_file(&cursor, log_dir, basename); + if (r != 0) { + goto cleanup_no_logcursor; + } + r = toku_logcursor_last(cursor, &entry); + if (r != 0) { + goto cleanup; + } + FOOTPRINT(2); + //TODO: Remove this special case once FT_LAYOUT_VERSION_19 (and older) are not supported. + if (version <= FT_LAYOUT_VERSION_19) { + if (entry->cmd==LT_shutdown_up_to_19) { + LSN lsn = entry->u.shutdown_up_to_19.lsn; + if (last_lsn) { + *last_lsn = lsn; + } + if (last_xid) { + // Use lsn as last_xid. + *last_xid = lsn.lsn; + } + rval = 0; + } + } + else if (entry->cmd==LT_shutdown) { + LSN lsn = entry->u.shutdown.lsn; + if (last_lsn) { + *last_lsn = lsn; + } + if (last_xid) { + *last_xid = entry->u.shutdown.last_xid; + } + rval = 0; + } +cleanup: + r = toku_logcursor_destroy(&cursor); + assert(r == 0); +cleanup_no_logcursor: + toku_logger_free_logfiles(logfiles, n_logfiles); + FOOTPRINTCAPTURE; + return rval; +} + + +static int +verify_clean_shutdown_of_log_version(const char *log_dir, uint32_t version, LSN *last_lsn, TXNID *last_xid) { + // return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown + int r = 0; + FOOTPRINTSETUP(1000); + + if (version < TOKU_LOG_VERSION) { + FOOTPRINT(1); + r = verify_clean_shutdown_of_log_version_old(log_dir, last_lsn, last_xid, version); + } + else { + FOOTPRINT(2); + assert(version == TOKU_LOG_VERSION); + r = verify_clean_shutdown_of_log_version_current(log_dir, last_lsn, last_xid); + } + FOOTPRINTCAPTURE; + return r; +} + + +// Actually create a log file of the current version, making the environment be of the current version. +// TODO: can't fail +static int +upgrade_log(const char *env_dir, const char *log_dir, LSN last_lsn, TXNID last_xid) { // the real deal + int r; + FOOTPRINTSETUP(10000); + + LSN initial_lsn = last_lsn; + initial_lsn.lsn++; + CACHETABLE ct; + TOKULOGGER logger; + + FOOTPRINT(1); + + { //Create temporary environment + toku_cachetable_create(&ct, 1<<25, initial_lsn, NULL); + toku_cachetable_set_env_dir(ct, env_dir); + r = toku_logger_create(&logger); + assert(r == 0); + toku_logger_set_cachetable(logger, ct); + r = toku_logger_open_with_last_xid(log_dir, logger, last_xid); + assert(r==0); + } + { //Checkpoint + CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct); + r = toku_checkpoint(cp, logger, NULL, NULL, NULL, NULL, UPGRADE_CHECKPOINT); //fsyncs log dir + assert(r == 0); + } + { //Close cachetable and logger + toku_logger_shutdown(logger); + toku_cachetable_close(&ct); + r = toku_logger_close(&logger); + assert(r==0); + } + { + r = verify_clean_shutdown_of_log_version(log_dir, TOKU_LOG_VERSION, NULL, NULL); + assert(r==0); + } + FOOTPRINTCAPTURE; + return 0; +} + +// If log on disk is old (environment is old) and clean shutdown, then create log of current version, +// which will make the environment of the current version (and delete the old logs). +int +toku_maybe_upgrade_log(const char *env_dir, const char *log_dir, LSN * lsn_of_clean_shutdown, bool * upgrade_in_progress) { + int r; + int lockfd = -1; + FOOTPRINTSETUP(100000); + + footprint = 0; + *upgrade_in_progress = false; // set true only if all criteria are met and we're actually doing an upgrade + + FOOTPRINT(1); + r = toku_recover_lock(log_dir, &lockfd); + if (r != 0) { + goto cleanup_no_lock; + } + FOOTPRINT(2); + assert(log_dir); + assert(env_dir); + + uint32_t version_of_logs_on_disk; + bool found_any_logs; + r = toku_get_version_of_logs_on_disk(log_dir, &found_any_logs, &version_of_logs_on_disk); + if (r != 0) { + goto cleanup; + } + FOOTPRINT(3); + if (!found_any_logs) + r = 0; //No logs means no logs to upgrade. + else if (version_of_logs_on_disk > TOKU_LOG_VERSION) + r = TOKUDB_DICTIONARY_TOO_NEW; + else if (version_of_logs_on_disk < TOKU_LOG_MIN_SUPPORTED_VERSION) + r = TOKUDB_DICTIONARY_TOO_OLD; + else if (version_of_logs_on_disk == TOKU_LOG_VERSION) + r = 0; //Logs are up to date + else { + FOOTPRINT(4); + LSN last_lsn = ZERO_LSN; + TXNID last_xid = TXNID_NONE; + r = verify_clean_shutdown_of_log_version(log_dir, version_of_logs_on_disk, &last_lsn, &last_xid); + if (r != 0) { + if (TOKU_LOG_VERSION_25 <= version_of_logs_on_disk && version_of_logs_on_disk <= TOKU_LOG_VERSION_27 + && TOKU_LOG_VERSION_27 == TOKU_LOG_VERSION) { + r = 0; // can do recovery on dirty shutdown + } else { + fprintf(stderr, "Cannot upgrade TokuFT version %d database.", version_of_logs_on_disk); + fprintf(stderr, " Previous improper shutdown detected.\n"); + } + goto cleanup; + } + FOOTPRINT(5); + *lsn_of_clean_shutdown = last_lsn; + *upgrade_in_progress = true; + r = upgrade_log(env_dir, log_dir, last_lsn, last_xid); + } +cleanup: + { + //Clean up + int rc; + rc = toku_recover_unlock(lockfd); + if (r==0) r = rc; + } +cleanup_no_lock: + FOOTPRINTCAPTURE; + return r; +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/recover.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/recover.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/recover.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/recover.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,1679 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + + +#include + +#include "ft/cachetable/cachetable.h" +#include "ft/cachetable/checkpoint.h" +#include "ft/ft.h" +#include "ft/log_header.h" +#include "ft/logger/log-internal.h" +#include "ft/logger/logcursor.h" +#include "ft/txn/txn_manager.h" +#include "util/omt.h" + +int tokuft_recovery_trace = 0; // turn on recovery tracing, default off. + +//#define DO_VERIFY_COUNTS +#ifdef DO_VERIFY_COUNTS +#define VERIFY_COUNTS(n) toku_verify_or_set_counts(n, false) +#else +#define VERIFY_COUNTS(n) ((void)0) +#endif + +// time in seconds between recovery progress reports +#define TOKUDB_RECOVERY_PROGRESS_TIME 15 + +enum ss { + BACKWARD_NEWER_CHECKPOINT_END = 1, + BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END, + FORWARD_BETWEEN_CHECKPOINT_BEGIN_END, + FORWARD_NEWER_CHECKPOINT_END, +}; + +struct scan_state { + enum ss ss; + LSN checkpoint_begin_lsn; + LSN checkpoint_end_lsn; + uint64_t checkpoint_end_timestamp; + uint64_t checkpoint_begin_timestamp; + uint32_t checkpoint_num_fassociate; + uint32_t checkpoint_num_xstillopen; + TXNID last_xid; +}; + +static const char *scan_state_strings[] = { + "?", "bw_newer", "bw_between", "fw_between", "fw_newer", +}; + +static void scan_state_init(struct scan_state *ss) { + ss->ss = BACKWARD_NEWER_CHECKPOINT_END; + ss->checkpoint_begin_lsn = ZERO_LSN; + ss->checkpoint_end_lsn = ZERO_LSN; + ss->checkpoint_num_fassociate = 0; + ss->checkpoint_num_xstillopen = 0; + ss->last_xid = 0; +} + +static const char *scan_state_string(struct scan_state *ss) { + assert(BACKWARD_NEWER_CHECKPOINT_END <= ss->ss && ss->ss <= FORWARD_NEWER_CHECKPOINT_END); + return scan_state_strings[ss->ss]; +} + +// File map tuple +struct file_map_tuple { + FILENUM filenum; + FT_HANDLE ft_handle; // NULL ft_handle means it's a rollback file. + char *iname; + struct __toku_db fake_db; +}; + +static void file_map_tuple_init(struct file_map_tuple *tuple, FILENUM filenum, FT_HANDLE ft_handle, char *iname) { + tuple->filenum = filenum; + tuple->ft_handle = ft_handle; + tuple->iname = iname; + // use a fake DB for comparisons, using the ft's cmp descriptor + memset(&tuple->fake_db, 0, sizeof(tuple->fake_db)); + tuple->fake_db.cmp_descriptor = &tuple->ft_handle->ft->cmp_descriptor; + tuple->fake_db.descriptor = &tuple->ft_handle->ft->descriptor; +} + +static void file_map_tuple_destroy(struct file_map_tuple *tuple) { + if (tuple->iname) { + toku_free(tuple->iname); + tuple->iname = NULL; + } +} + +// Map filenum to ft_handle +struct file_map { + toku::omt *filenums; +}; + +// The recovery environment +struct recover_env { + DB_ENV *env; + prepared_txn_callback_t prepared_txn_callback; // at the end of recovery, all the prepared txns are passed back to the ydb layer to make them into valid transactions. + keep_cachetable_callback_t keep_cachetable_callback; // after recovery, store the cachetable into the environment. + CACHETABLE ct; + TOKULOGGER logger; + CHECKPOINTER cp; + ft_compare_func bt_compare; + ft_update_func update_function; + generate_row_for_put_func generate_row_for_put; + generate_row_for_del_func generate_row_for_del; + DBT_ARRAY dest_keys; + DBT_ARRAY dest_vals; + struct scan_state ss; + struct file_map fmap; + bool goforward; + bool destroy_logger_at_end; // If true then destroy the logger when we are done. If false then set the logger into write-files mode when we are done with recovery.*/ +}; +typedef struct recover_env *RECOVER_ENV; + + +static void file_map_init(struct file_map *fmap) { + XMALLOC(fmap->filenums); + fmap->filenums->create(); +} + +static void file_map_destroy(struct file_map *fmap) { + fmap->filenums->destroy(); + toku_free(fmap->filenums); + fmap->filenums = nullptr; +} + +static uint32_t file_map_get_num_dictionaries(struct file_map *fmap) { + return fmap->filenums->size(); +} + +static void file_map_close_dictionaries(struct file_map *fmap, LSN oplsn) { + int r; + + while (1) { + uint32_t n = fmap->filenums->size(); + if (n == 0) { + break; + } + struct file_map_tuple *tuple; + r = fmap->filenums->fetch(n - 1, &tuple); + assert(r == 0); + r = fmap->filenums->delete_at(n - 1); + assert(r == 0); + assert(tuple->ft_handle); + // Logging is on again, but we must pass the right LSN into close. + if (tuple->ft_handle) { // it's a DB, not a rollback file + toku_ft_handle_close_recovery(tuple->ft_handle, oplsn); + } + file_map_tuple_destroy(tuple); + toku_free(tuple); + } +} + +static int file_map_h(struct file_map_tuple *const &a, const FILENUM &b) { + if (a->filenum.fileid < b.fileid) { + return -1; + } else if (a->filenum.fileid > b.fileid) { + return 1; + } else { + return 0; + } +} + +static int file_map_insert (struct file_map *fmap, FILENUM fnum, FT_HANDLE ft_handle, char *iname) { + struct file_map_tuple *XMALLOC(tuple); + file_map_tuple_init(tuple, fnum, ft_handle, iname); + int r = fmap->filenums->insert(tuple, fnum, nullptr); + return r; +} + +static void file_map_remove(struct file_map *fmap, FILENUM fnum) { + uint32_t idx; + struct file_map_tuple *tuple; + int r = fmap->filenums->find_zero(fnum, &tuple, &idx); + if (r == 0) { + r = fmap->filenums->delete_at(idx); + file_map_tuple_destroy(tuple); + toku_free(tuple); + } +} + +// Look up file info: given FILENUM, return file_map_tuple (or DB_NOTFOUND) +static int file_map_find(struct file_map *fmap, FILENUM fnum, struct file_map_tuple **file_map_tuple) { + uint32_t idx; + struct file_map_tuple *tuple; + int r = fmap->filenums->find_zero(fnum, &tuple, &idx); + if (r == 0) { + assert(tuple->filenum.fileid == fnum.fileid); + *file_map_tuple = tuple; + } else { + assert(r == DB_NOTFOUND); + } + return r; +} + +static int recover_env_init (RECOVER_ENV renv, + const char *env_dir, + DB_ENV *env, + prepared_txn_callback_t prepared_txn_callback, + keep_cachetable_callback_t keep_cachetable_callback, + TOKULOGGER logger, + ft_compare_func bt_compare, + ft_update_func update_function, + generate_row_for_put_func generate_row_for_put, + generate_row_for_del_func generate_row_for_del, + size_t cachetable_size) { + int r = 0; + + // If we are passed a logger use it, otherwise create one. + renv->destroy_logger_at_end = logger==NULL; + if (logger) { + renv->logger = logger; + } else { + r = toku_logger_create(&renv->logger); + assert(r == 0); + } + toku_logger_write_log_files(renv->logger, false); + toku_cachetable_create(&renv->ct, cachetable_size ? cachetable_size : 1<<25, (LSN){0}, renv->logger); + toku_cachetable_set_env_dir(renv->ct, env_dir); + if (keep_cachetable_callback) keep_cachetable_callback(env, renv->ct); + toku_logger_set_cachetable(renv->logger, renv->ct); + renv->env = env; + renv->prepared_txn_callback = prepared_txn_callback; + renv->keep_cachetable_callback = keep_cachetable_callback; + renv->bt_compare = bt_compare; + renv->update_function = update_function; + renv->generate_row_for_put = generate_row_for_put; + renv->generate_row_for_del = generate_row_for_del; + file_map_init(&renv->fmap); + renv->goforward = false; + renv->cp = toku_cachetable_get_checkpointer(renv->ct); + toku_dbt_array_init(&renv->dest_keys, 1); + toku_dbt_array_init(&renv->dest_vals, 1); + if (tokuft_recovery_trace) + fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__); + return r; +} + +static void recover_env_cleanup (RECOVER_ENV renv) { + int r; + + invariant_zero(renv->fmap.filenums->size()); + file_map_destroy(&renv->fmap); + + if (renv->destroy_logger_at_end) { + toku_logger_close_rollback(renv->logger); + r = toku_logger_close(&renv->logger); + assert(r == 0); + } else { + toku_logger_write_log_files(renv->logger, true); + } + + if (renv->keep_cachetable_callback) { + renv->ct = NULL; + } else { + toku_cachetable_close(&renv->ct); + } + toku_dbt_array_destroy(&renv->dest_keys); + toku_dbt_array_destroy(&renv->dest_vals); + + if (tokuft_recovery_trace) + fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__); +} + +static const char *recover_state(RECOVER_ENV renv) { + return scan_state_string(&renv->ss); +} + +// Open the file if it is not already open. If it is already open, then do nothing. +static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, bool must_create, int UU(mode), BYTESTRING *bs_iname, FILENUM filenum, uint32_t treeflags, + TOKUTXN txn, uint32_t nodesize, uint32_t basementnodesize, enum toku_compression_method compression_method, LSN max_acceptable_lsn) { + int r = 0; + FT_HANDLE ft_handle = NULL; + char *iname = fixup_fname(bs_iname); + + toku_ft_handle_create(&ft_handle); + toku_ft_set_flags(ft_handle, treeflags); + + if (nodesize != 0) { + toku_ft_handle_set_nodesize(ft_handle, nodesize); + } + + if (basementnodesize != 0) { + toku_ft_handle_set_basementnodesize(ft_handle, basementnodesize); + } + + if (compression_method != TOKU_DEFAULT_COMPRESSION_METHOD) { + toku_ft_handle_set_compression_method(ft_handle, compression_method); + } + + // set the key compare functions + if (!(treeflags & TOKU_DB_KEYCMP_BUILTIN) && renv->bt_compare) { + toku_ft_set_bt_compare(ft_handle, renv->bt_compare); + } + + if (renv->update_function) { + toku_ft_set_update(ft_handle, renv->update_function); + } + + // TODO mode (FUTURE FEATURE) + //mode = mode; + + r = toku_ft_handle_open_recovery(ft_handle, iname, must_create, must_create, renv->ct, txn, filenum, max_acceptable_lsn); + if (r != 0) { + //Note: If ft_handle_open fails, then close_ft will NOT write a header to disk. + //No need to provide lsn, so use the regular toku_ft_handle_close function + toku_ft_handle_close(ft_handle); + toku_free(iname); + if (r == ENOENT) //Not an error to simply be missing. + r = 0; + return r; + } + + file_map_insert(&renv->fmap, filenum, ft_handle, iname); + return 0; +} + +static int toku_recover_begin_checkpoint (struct logtype_begin_checkpoint *l, RECOVER_ENV renv) { + int r; + TXN_MANAGER mgr = toku_logger_get_txn_manager(renv->logger); + switch (renv->ss.ss) { + case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: + assert(l->lsn.lsn == renv->ss.checkpoint_begin_lsn.lsn); + invariant(renv->ss.last_xid == TXNID_NONE); + renv->ss.last_xid = l->last_xid; + toku_txn_manager_set_last_xid_from_recovered_checkpoint(mgr, l->last_xid); + + r = 0; + break; + case FORWARD_NEWER_CHECKPOINT_END: + assert(l->lsn.lsn > renv->ss.checkpoint_end_lsn.lsn); + // Verify last_xid is no older than the previous begin + invariant(l->last_xid >= renv->ss.last_xid); + // Verify last_xid is no older than the newest txn + invariant(l->last_xid >= toku_txn_manager_get_last_xid(mgr)); + + r = 0; // ignore it (log only has a begin checkpoint) + break; + default: + fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); + abort(); + break; + } + return r; +} + +static int toku_recover_backward_begin_checkpoint (struct logtype_begin_checkpoint *l, RECOVER_ENV renv) { + int r; + time_t tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery bw_begin_checkpoint at %" PRIu64 " timestamp %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, recover_state(renv)); + switch (renv->ss.ss) { + case BACKWARD_NEWER_CHECKPOINT_END: + // incomplete checkpoint, nothing to do + r = 0; + break; + case BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END: + assert(l->lsn.lsn == renv->ss.checkpoint_begin_lsn.lsn); + renv->ss.ss = FORWARD_BETWEEN_CHECKPOINT_BEGIN_END; + renv->ss.checkpoint_begin_timestamp = l->timestamp; + renv->goforward = true; + tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery turning around at begin checkpoint %" PRIu64 " time %" PRIu64 "\n", + ctime(&tnow), l->lsn.lsn, + renv->ss.checkpoint_end_timestamp - renv->ss.checkpoint_begin_timestamp); + r = 0; + break; + default: + fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); + abort(); + break; + } + return r; +} + +static int toku_recover_end_checkpoint (struct logtype_end_checkpoint *l, RECOVER_ENV renv) { + int r; + switch (renv->ss.ss) { + case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: + assert(l->lsn_begin_checkpoint.lsn == renv->ss.checkpoint_begin_lsn.lsn); + assert(l->lsn.lsn == renv->ss.checkpoint_end_lsn.lsn); + assert(l->num_fassociate_entries == renv->ss.checkpoint_num_fassociate); + assert(l->num_xstillopen_entries == renv->ss.checkpoint_num_xstillopen); + renv->ss.ss = FORWARD_NEWER_CHECKPOINT_END; + r = 0; + break; + case FORWARD_NEWER_CHECKPOINT_END: + assert(0); + return 0; + default: + assert(0); + return 0; + } + return r; +} + +static int toku_recover_backward_end_checkpoint (struct logtype_end_checkpoint *l, RECOVER_ENV renv) { + time_t tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery bw_end_checkpoint at %" PRIu64 " timestamp %" PRIu64 " xid %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, l->lsn_begin_checkpoint.lsn, recover_state(renv)); + switch (renv->ss.ss) { + case BACKWARD_NEWER_CHECKPOINT_END: + renv->ss.ss = BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END; + renv->ss.checkpoint_begin_lsn.lsn = l->lsn_begin_checkpoint.lsn; + renv->ss.checkpoint_end_lsn.lsn = l->lsn.lsn; + renv->ss.checkpoint_end_timestamp = l->timestamp; + return 0; + case BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END: + fprintf(stderr, "TokuFT recovery %s:%d Should not see two end_checkpoint log entries without an intervening begin_checkpoint\n", __FILE__, __LINE__); + abort(); + default: + break; + } + fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); + abort(); +} + +static int toku_recover_fassociate (struct logtype_fassociate *l, RECOVER_ENV renv) { + struct file_map_tuple *tuple = NULL; + int r = file_map_find(&renv->fmap, l->filenum, &tuple); + char *fname = fixup_fname(&l->iname); + switch (renv->ss.ss) { + case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: + renv->ss.checkpoint_num_fassociate++; + assert(r==DB_NOTFOUND); //Not open + // Open it if it exists. + // If rollback file, specify which checkpointed version of file we need (not just the latest) + // because we cannot use a rollback log that is later than the last complete checkpoint. See #3113. + { + bool rollback_file = (0==strcmp(fname, toku_product_name_strings.rollback_cachefile)); + LSN max_acceptable_lsn = MAX_LSN; + if (rollback_file) { + max_acceptable_lsn = renv->ss.checkpoint_begin_lsn; + FT_HANDLE t; + toku_ft_handle_create(&t); + r = toku_ft_handle_open_recovery(t, toku_product_name_strings.rollback_cachefile, false, false, renv->ct, (TOKUTXN)NULL, l->filenum, max_acceptable_lsn); + renv->logger->rollback_cachefile = t->ft->cf; + toku_logger_initialize_rollback_cache(renv->logger, t->ft); + } else { + r = internal_recover_fopen_or_fcreate(renv, false, 0, &l->iname, l->filenum, l->treeflags, NULL, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, max_acceptable_lsn); + assert(r==0); + } + } + // try to open the file again and if we get it, restore + // the unlink on close bit. + int ret; + ret = file_map_find(&renv->fmap, l->filenum, &tuple); + if (ret == 0 && l->unlink_on_close) { + toku_cachefile_unlink_on_close(tuple->ft_handle->ft->cf); + } + break; + case FORWARD_NEWER_CHECKPOINT_END: + if (r == 0) { //IF it is open + // assert that the filenum maps to the correct iname + assert(strcmp(fname, tuple->iname) == 0); + } + r = 0; + break; + default: + assert(0); + return 0; + } + toku_free(fname); + + return r; +} + +static int toku_recover_backward_fassociate (struct logtype_fassociate *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int +recover_transaction(TOKUTXN *txnp, TXNID_PAIR xid, TXNID_PAIR parentxid, TOKULOGGER logger) { + int r; + + // lookup the parent + TOKUTXN parent = NULL; + if (!txn_pair_is_none(parentxid)) { + toku_txnid2txn(logger, parentxid, &parent); + assert(parent!=NULL); + } + else { + invariant(xid.child_id64 == TXNID_NONE); + } + + // create a transaction and bind it to the transaction id + TOKUTXN txn = NULL; + { + //Verify it does not yet exist. + toku_txnid2txn(logger, xid, &txn); + assert(txn==NULL); + } + r = toku_txn_begin_with_xid( + parent, + &txn, + logger, + xid, + TXN_SNAPSHOT_NONE, + NULL, + true, // for_recovery + false // read_only + ); + assert(r == 0); + // We only know about it because it was logged. Restore the log bit. + // Logging is 'off' but it will still set the bit. + toku_maybe_log_begin_txn_for_write_operation(txn); + if (txnp) *txnp = txn; + return 0; +} + +static int recover_xstillopen_internal (TOKUTXN *txnp, + LSN UU(lsn), + TXNID_PAIR xid, + TXNID_PAIR parentxid, + uint64_t rollentry_raw_count, + FILENUMS open_filenums, + bool force_fsync_on_commit, + uint64_t num_rollback_nodes, + uint64_t num_rollentries, + BLOCKNUM spilled_rollback_head, + BLOCKNUM spilled_rollback_tail, + BLOCKNUM current_rollback, + uint32_t UU(crc), + uint32_t UU(len), + RECOVER_ENV renv) { + int r; + *txnp = NULL; + switch (renv->ss.ss) { + case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: { + renv->ss.checkpoint_num_xstillopen++; + invariant(renv->ss.last_xid != TXNID_NONE); + invariant(xid.parent_id64 <= renv->ss.last_xid); + TOKUTXN txn = NULL; + { //Create the transaction. + r = recover_transaction(&txn, xid, parentxid, renv->logger); + assert(r==0); + assert(txn!=NULL); + *txnp = txn; + } + { //Recover rest of transaction. +#define COPY_TO_INFO(field) .field = field + struct txninfo info = { + COPY_TO_INFO(rollentry_raw_count), + .num_fts = 0, //Set afterwards + .open_fts = NULL, //Set afterwards + COPY_TO_INFO(force_fsync_on_commit), + COPY_TO_INFO(num_rollback_nodes), + COPY_TO_INFO(num_rollentries), + COPY_TO_INFO(spilled_rollback_head), + COPY_TO_INFO(spilled_rollback_tail), + COPY_TO_INFO(current_rollback) + }; +#undef COPY_TO_INFO + //Generate open_fts + FT array[open_filenums.num]; //Allocate maximum possible requirement + info.open_fts = array; + uint32_t i; + for (i = 0; i < open_filenums.num; i++) { + //open_filenums.filenums[] + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, open_filenums.filenums[i], &tuple); + if (r==0) { + info.open_fts[info.num_fts++] = tuple->ft_handle->ft; + } + else { + assert(r==DB_NOTFOUND); + } + } + r = toku_txn_load_txninfo(txn, &info); + assert(r==0); + } + break; + } + case FORWARD_NEWER_CHECKPOINT_END: { + // assert that the transaction exists + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, xid, &txn); + r = 0; + *txnp = txn; + break; + } + default: + assert(0); + return 0; + } + return r; +} + +static int toku_recover_xstillopen (struct logtype_xstillopen *l, RECOVER_ENV renv) { + TOKUTXN txn; + return recover_xstillopen_internal (&txn, + l->lsn, + l->xid, + l->parentxid, + l->rollentry_raw_count, + l->open_filenums, + l->force_fsync_on_commit, + l->num_rollback_nodes, + l->num_rollentries, + l->spilled_rollback_head, + l->spilled_rollback_tail, + l->current_rollback, + l->crc, + l->len, + renv); +} + +static int toku_recover_xstillopenprepared (struct logtype_xstillopenprepared *l, RECOVER_ENV renv) { + TOKUTXN txn; + int r = recover_xstillopen_internal (&txn, + l->lsn, + l->xid, + TXNID_PAIR_NONE, + l->rollentry_raw_count, + l->open_filenums, + l->force_fsync_on_commit, + l->num_rollback_nodes, + l->num_rollentries, + l->spilled_rollback_head, + l->spilled_rollback_tail, + l->current_rollback, + l->crc, + l->len, + renv); + if (r != 0) { + goto exit; + } + switch (renv->ss.ss) { + case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: { + toku_txn_prepare_txn(txn, l->xa_xid); + break; + } + case FORWARD_NEWER_CHECKPOINT_END: { + assert(txn->state == TOKUTXN_PREPARING); + break; + } + default: { + assert(0); + } + } +exit: + return r; +} + +static int toku_recover_backward_xstillopen (struct logtype_xstillopen *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} +static int toku_recover_backward_xstillopenprepared (struct logtype_xstillopenprepared *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_xbegin (struct logtype_xbegin *l, RECOVER_ENV renv) { + int r; + r = recover_transaction(NULL, l->xid, l->parentxid, renv->logger); + return r; +} + +static int toku_recover_backward_xbegin (struct logtype_xbegin *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_xcommit (struct logtype_xcommit *l, RECOVER_ENV renv) { + // find the transaction by transaction id + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn!=NULL); + + // commit the transaction + int r = toku_txn_commit_with_lsn(txn, true, l->lsn, + NULL, NULL); + assert(r == 0); + + // close the transaction + toku_txn_close_txn(txn); + + return 0; +} + +static int toku_recover_backward_xcommit (struct logtype_xcommit *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_xprepare (struct logtype_xprepare *l, RECOVER_ENV renv) { + // find the transaction by transaction id + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn!=NULL); + + // Save the transaction + toku_txn_prepare_txn(txn, l->xa_xid); + + return 0; +} + +static int toku_recover_backward_xprepare (struct logtype_xprepare *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + + + +static int toku_recover_xabort (struct logtype_xabort *l, RECOVER_ENV renv) { + int r; + + // find the transaction by transaction id + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn!=NULL); + + // abort the transaction + r = toku_txn_abort_with_lsn(txn, l->lsn, NULL, NULL); + assert(r == 0); + + // close the transaction + toku_txn_close_txn(txn); + + return 0; +} + +static int toku_recover_backward_xabort (struct logtype_xabort *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +// fcreate is like fopen except that the file must be created. +static int toku_recover_fcreate (struct logtype_fcreate *l, RECOVER_ENV renv) { + int r; + + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + + // assert that filenum is closed + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->filenum, &tuple); + assert(r==DB_NOTFOUND); + + assert(txn!=NULL); + + //unlink if it exists (recreate from scratch). + char *iname = fixup_fname(&l->iname); + char *iname_in_cwd = toku_cachetable_get_fname_in_cwd(renv->ct, iname); + r = unlink(iname_in_cwd); + if (r != 0) { + int er = get_error_errno(); + if (er != ENOENT) { + fprintf(stderr, "TokuFT recovery %s:%d unlink %s %d\n", __FUNCTION__, __LINE__, iname, er); + toku_free(iname); + return r; + } + } + assert(0!=strcmp(iname, toku_product_name_strings.rollback_cachefile)); //Creation of rollback cachefile never gets logged. + toku_free(iname_in_cwd); + toku_free(iname); + + bool must_create = true; + r = internal_recover_fopen_or_fcreate(renv, must_create, l->mode, &l->iname, l->filenum, l->treeflags, txn, l->nodesize, l->basementnodesize, (enum toku_compression_method) l->compression_method, MAX_LSN); + return r; +} + +static int toku_recover_backward_fcreate (struct logtype_fcreate *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + + + +static int toku_recover_fopen (struct logtype_fopen *l, RECOVER_ENV renv) { + int r; + + // assert that filenum is closed + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->filenum, &tuple); + assert(r==DB_NOTFOUND); + + bool must_create = false; + TOKUTXN txn = NULL; + char *fname = fixup_fname(&l->iname); + + assert(0!=strcmp(fname, toku_product_name_strings.rollback_cachefile)); //Rollback cachefile can be opened only via fassociate. + r = internal_recover_fopen_or_fcreate(renv, must_create, 0, &l->iname, l->filenum, l->treeflags, txn, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, MAX_LSN); + + toku_free(fname); + return r; +} + +static int toku_recover_backward_fopen (struct logtype_fopen *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_change_fdescriptor (struct logtype_change_fdescriptor *l, RECOVER_ENV renv) { + int r; + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->filenum, &tuple); + if (r==0) { + TOKUTXN txn = NULL; + //Maybe do the descriptor (lsn filter) + toku_txnid2txn(renv->logger, l->xid, &txn); + DBT old_descriptor, new_descriptor; + toku_fill_dbt( + &old_descriptor, + l->old_descriptor.data, + l->old_descriptor.len + ); + toku_fill_dbt( + &new_descriptor, + l->new_descriptor.data, + l->new_descriptor.len + ); + toku_ft_change_descriptor( + tuple->ft_handle, + &old_descriptor, + &new_descriptor, + false, + txn, + l->update_cmp_descriptor + ); + } + return 0; +} + +static int toku_recover_backward_change_fdescriptor (struct logtype_change_fdescriptor *UU(l), RECOVER_ENV UU(renv)) { + return 0; +} + + +// if file referred to in l is open, close it +static int toku_recover_fclose (struct logtype_fclose *l, RECOVER_ENV renv) { + struct file_map_tuple *tuple = NULL; + int r = file_map_find(&renv->fmap, l->filenum, &tuple); + if (r == 0) { // if file is open + char *iname = fixup_fname(&l->iname); + assert(strcmp(tuple->iname, iname) == 0); // verify that file_map has same iname as log entry + + if (0!=strcmp(iname, toku_product_name_strings.rollback_cachefile)) { + //Rollback cachefile is closed manually at end of recovery, not here + toku_ft_handle_close_recovery(tuple->ft_handle, l->lsn); + } + file_map_remove(&renv->fmap, l->filenum); + toku_free(iname); + } + return 0; +} + +static int toku_recover_backward_fclose (struct logtype_fclose *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +// fdelete is a transactional file delete. +static int toku_recover_fdelete (struct logtype_fdelete *l, RECOVER_ENV renv) { + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn != NULL); + + // if the forward scan in recovery found this file and opened it, we + // need to mark the txn to remove the ft on commit. if the file was + // not found and not opened, we don't need to do anything - the ft + // is already gone, so we're happy. + struct file_map_tuple *tuple; + int r = file_map_find(&renv->fmap, l->filenum, &tuple); + if (r == 0) { + toku_ft_unlink_on_commit(tuple->ft_handle, txn); + } + return 0; +} + +static int toku_recover_backward_fdelete (struct logtype_fdelete *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_enq_insert (struct logtype_enq_insert *l, RECOVER_ENV renv) { + int r; + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn!=NULL); + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->filenum, &tuple); + if (r==0) { + //Maybe do the insertion if we found the cachefile. + DBT keydbt, valdbt; + toku_fill_dbt(&keydbt, l->key.data, l->key.len); + toku_fill_dbt(&valdbt, l->value.data, l->value.len); + toku_ft_maybe_insert(tuple->ft_handle, &keydbt, &valdbt, txn, true, l->lsn, false, FT_INSERT); + toku_txn_maybe_note_ft(txn, tuple->ft_handle->ft); + } + return 0; +} + +static int toku_recover_backward_enq_insert (struct logtype_enq_insert *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_enq_insert_no_overwrite (struct logtype_enq_insert_no_overwrite *l, RECOVER_ENV renv) { + int r; + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn!=NULL); + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->filenum, &tuple); + if (r==0) { + //Maybe do the insertion if we found the cachefile. + DBT keydbt, valdbt; + toku_fill_dbt(&keydbt, l->key.data, l->key.len); + toku_fill_dbt(&valdbt, l->value.data, l->value.len); + toku_ft_maybe_insert(tuple->ft_handle, &keydbt, &valdbt, txn, true, l->lsn, false, FT_INSERT_NO_OVERWRITE); + } + return 0; +} + +static int toku_recover_backward_enq_insert_no_overwrite (struct logtype_enq_insert_no_overwrite *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_enq_delete_any (struct logtype_enq_delete_any *l, RECOVER_ENV renv) { + int r; + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn!=NULL); + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->filenum, &tuple); + if (r==0) { + //Maybe do the deletion if we found the cachefile. + DBT keydbt; + toku_fill_dbt(&keydbt, l->key.data, l->key.len); + toku_ft_maybe_delete(tuple->ft_handle, &keydbt, txn, true, l->lsn, false); + } + return 0; +} + +static int toku_recover_backward_enq_delete_any (struct logtype_enq_delete_any *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_enq_insert_multiple (struct logtype_enq_insert_multiple *l, RECOVER_ENV renv) { + int r; + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn!=NULL); + DB *src_db = NULL; + bool do_inserts = true; + { + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->src_filenum, &tuple); + if (l->src_filenum.fileid == FILENUM_NONE.fileid) + assert(r==DB_NOTFOUND); + else { + if (r == 0) + src_db = &tuple->fake_db; + else + do_inserts = false; // src file was probably deleted, #3129 + } + } + + if (do_inserts) { + DBT src_key, src_val; + + toku_fill_dbt(&src_key, l->src_key.data, l->src_key.len); + toku_fill_dbt(&src_val, l->src_val.data, l->src_val.len); + + for (uint32_t file = 0; file < l->dest_filenums.num; file++) { + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->dest_filenums.filenums[file], &tuple); + if (r==0) { + // We found the cachefile. (maybe) Do the insert. + DB *db = &tuple->fake_db; + + DBT_ARRAY key_array; + DBT_ARRAY val_array; + if (db != src_db) { + r = renv->generate_row_for_put(db, src_db, &renv->dest_keys, &renv->dest_vals, &src_key, &src_val); + assert(r==0); + invariant(renv->dest_keys.size <= renv->dest_keys.capacity); + invariant(renv->dest_vals.size <= renv->dest_vals.capacity); + invariant(renv->dest_keys.size == renv->dest_vals.size); + key_array = renv->dest_keys; + val_array = renv->dest_vals; + } else { + key_array.size = key_array.capacity = 1; + key_array.dbts = &src_key; + + val_array.size = val_array.capacity = 1; + val_array.dbts = &src_val; + } + for (uint32_t i = 0; i < key_array.size; i++) { + toku_ft_maybe_insert(tuple->ft_handle, &key_array.dbts[i], &val_array.dbts[i], txn, true, l->lsn, false, FT_INSERT); + } + } + } + } + + return 0; +} + +static int toku_recover_backward_enq_insert_multiple (struct logtype_enq_insert_multiple *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_enq_delete_multiple (struct logtype_enq_delete_multiple *l, RECOVER_ENV renv) { + int r; + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn!=NULL); + DB *src_db = NULL; + bool do_deletes = true; + { + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->src_filenum, &tuple); + if (l->src_filenum.fileid == FILENUM_NONE.fileid) + assert(r==DB_NOTFOUND); + else { + if (r == 0) { + src_db = &tuple->fake_db; + } else { + do_deletes = false; // src file was probably deleted, #3129 + } + } + } + + if (do_deletes) { + DBT src_key, src_val; + toku_fill_dbt(&src_key, l->src_key.data, l->src_key.len); + toku_fill_dbt(&src_val, l->src_val.data, l->src_val.len); + + for (uint32_t file = 0; file < l->dest_filenums.num; file++) { + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->dest_filenums.filenums[file], &tuple); + if (r==0) { + // We found the cachefile. (maybe) Do the delete. + DB *db = &tuple->fake_db; + + DBT_ARRAY key_array; + if (db != src_db) { + r = renv->generate_row_for_del(db, src_db, &renv->dest_keys, &src_key, &src_val); + assert(r==0); + invariant(renv->dest_keys.size <= renv->dest_keys.capacity); + key_array = renv->dest_keys; + } else { + key_array.size = key_array.capacity = 1; + key_array.dbts = &src_key; + } + for (uint32_t i = 0; i < key_array.size; i++) { + toku_ft_maybe_delete(tuple->ft_handle, &key_array.dbts[i], txn, true, l->lsn, false); + } + } + } + } + + return 0; +} + +static int toku_recover_backward_enq_delete_multiple (struct logtype_enq_delete_multiple *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_enq_update(struct logtype_enq_update *l, RECOVER_ENV renv) { + int r; + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn != NULL); + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->filenum, &tuple); + if (r == 0) { + // Maybe do the update if we found the cachefile. + DBT key, extra; + toku_fill_dbt(&key, l->key.data, l->key.len); + toku_fill_dbt(&extra, l->extra.data, l->extra.len); + toku_ft_maybe_update(tuple->ft_handle, &key, &extra, txn, true, l->lsn, false); + } + return 0; +} + +static int toku_recover_enq_updatebroadcast(struct logtype_enq_updatebroadcast *l, RECOVER_ENV renv) { + int r; + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn != NULL); + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->filenum, &tuple); + if (r == 0) { + // Maybe do the update broadcast if we found the cachefile. + DBT extra; + toku_fill_dbt(&extra, l->extra.data, l->extra.len); + toku_ft_maybe_update_broadcast(tuple->ft_handle, &extra, txn, true, + l->lsn, false, l->is_resetting_op); + } + return 0; +} + +static int toku_recover_backward_enq_update(struct logtype_enq_update *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_backward_enq_updatebroadcast(struct logtype_enq_updatebroadcast *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_comment (struct logtype_comment *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_backward_comment (struct logtype_comment *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_shutdown_up_to_19 (struct logtype_shutdown_up_to_19 *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_backward_shutdown_up_to_19 (struct logtype_shutdown_up_to_19 *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_shutdown (struct logtype_shutdown *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_backward_shutdown (struct logtype_shutdown *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +static int toku_recover_load(struct logtype_load *UU(l), RECOVER_ENV UU(renv)) { + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn!=NULL); + char *new_iname = fixup_fname(&l->new_iname); + + toku_ft_load_recovery(txn, l->old_filenum, new_iname, 0, 0, (LSN*)NULL); + + toku_free(new_iname); + return 0; +} + +static int toku_recover_backward_load(struct logtype_load *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +// #2954 +static int toku_recover_hot_index(struct logtype_hot_index *UU(l), RECOVER_ENV UU(renv)) { + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + assert(txn!=NULL); + // just make an entry in the rollback log + // - set do_log = 0 -> don't write to recovery log + toku_ft_hot_index_recovery(txn, l->hot_index_filenums, 0, 0, (LSN*)NULL); + return 0; +} + +// #2954 +static int toku_recover_backward_hot_index(struct logtype_hot_index *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + +// Effects: If there are no log files, or if there is a clean "shutdown" at +// the end of the log, then we don't need recovery to run. +// Returns: true if we need recovery, otherwise false. +int tokuft_needs_recovery(const char *log_dir, bool ignore_log_empty) { + int needs_recovery; + int r; + TOKULOGCURSOR logcursor = NULL; + + r = toku_logcursor_create(&logcursor, log_dir); + if (r != 0) { + needs_recovery = true; goto exit; + } + + struct log_entry *le; + le = NULL; + r = toku_logcursor_last(logcursor, &le); + if (r == 0) { + needs_recovery = le->cmd != LT_shutdown; + } + else { + needs_recovery = !(r == DB_NOTFOUND && ignore_log_empty); + } + exit: + if (logcursor) { + r = toku_logcursor_destroy(&logcursor); + assert(r == 0); + } + return needs_recovery; +} + +static uint32_t recover_get_num_live_txns(RECOVER_ENV renv) { + return toku_txn_manager_num_live_root_txns(renv->logger->txn_manager); +} + +static int is_txn_unprepared(TOKUTXN txn, void* extra) { + TOKUTXN* ptxn = (TOKUTXN *)extra; + if (txn->state != TOKUTXN_PREPARING) { + *ptxn = txn; + return -1; // return -1 to get iterator to return + } + return 0; +} + + +static int find_an_unprepared_txn (RECOVER_ENV renv, TOKUTXN *txnp) { + TOKUTXN txn = nullptr; + int r = toku_txn_manager_iter_over_live_root_txns( + renv->logger->txn_manager, + is_txn_unprepared, + &txn + ); + assert(r == 0 || r == -1); + if (txn != nullptr) { + *txnp = txn; + return 0; + } + return DB_NOTFOUND; +} + +static int call_prepare_txn_callback_iter(TOKUTXN txn, void* extra) { + RECOVER_ENV* renv = (RECOVER_ENV *)extra; + invariant(txn->state == TOKUTXN_PREPARING); + invariant(txn->child == NULL); + (*renv)->prepared_txn_callback((*renv)->env, txn); + return 0; +} + +static void recover_abort_live_txn(TOKUTXN txn) { + // recursively abort all children first + if (txn->child != NULL) { + recover_abort_live_txn(txn->child); + } + // sanity check that the recursive call successfully NULLs out txn->child + invariant(txn->child == NULL); + // abort the transaction + int r = toku_txn_abort_txn(txn, NULL, NULL); + assert(r == 0); + + // close the transaction + toku_txn_close_txn(txn); +} + +// abort all of the remaining live transactions in descending transaction id order +static void recover_abort_all_live_txns(RECOVER_ENV renv) { + while (1) { + TOKUTXN txn; + int r = find_an_unprepared_txn(renv, &txn); + if (r==0) { + recover_abort_live_txn(txn); + } else if (r==DB_NOTFOUND) { + break; + } else { + abort(); + } + } + + // Now we have only prepared txns. These prepared txns don't have full DB_TXNs in them, so we need to make some. + int r = toku_txn_manager_iter_over_live_root_txns( + renv->logger->txn_manager, + call_prepare_txn_callback_iter, + &renv + ); + assert_zero(r); +} + +static void recover_trace_le(const char *f, int l, int r, struct log_entry *le) { + if (le) { + LSN thislsn = toku_log_entry_get_lsn(le); + fprintf(stderr, "%s:%d r=%d cmd=%c lsn=%" PRIu64 "\n", f, l, r, le->cmd, thislsn.lsn); + } else + fprintf(stderr, "%s:%d r=%d cmd=?\n", f, l, r); +} + +// For test purposes only. +static void (*recover_callback_fx)(void*) = NULL; +static void * recover_callback_args = NULL; +static void (*recover_callback2_fx)(void*) = NULL; +static void * recover_callback2_args = NULL; + + +static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_dir) { + int r; + int rr = 0; + TOKULOGCURSOR logcursor = NULL; + struct log_entry *le = NULL; + + time_t tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery starting in env %s\n", ctime(&tnow), env_dir); + + char org_wd[1000]; + { + char *wd=getcwd(org_wd, sizeof(org_wd)); + assert(wd!=0); + } + + r = toku_logger_open(log_dir, renv->logger); + assert(r == 0); + + // grab the last LSN so that it can be restored when the log is restarted + LSN lastlsn = toku_logger_last_lsn(renv->logger); + LSN thislsn; + + // there must be at least one log entry + r = toku_logcursor_create(&logcursor, log_dir); + assert(r == 0); + + r = toku_logcursor_last(logcursor, &le); + if (r != 0) { + if (tokuft_recovery_trace) + fprintf(stderr, "RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r); + rr = DB_RUNRECOVERY; goto errorexit; + } + + r = toku_logcursor_destroy(&logcursor); + assert(r == 0); + + r = toku_logcursor_create(&logcursor, log_dir); + assert(r == 0); + + { + toku_struct_stat buf; + if (toku_stat(env_dir, &buf)!=0) { + rr = get_error_errno(); + fprintf(stderr, "%.24s TokuFT recovery error: directory does not exist: %s\n", ctime(&tnow), env_dir); + goto errorexit; + } else if (!S_ISDIR(buf.st_mode)) { + fprintf(stderr, "%.24s TokuFT recovery error: this file is supposed to be a directory, but is not: %s\n", ctime(&tnow), env_dir); + rr = ENOTDIR; goto errorexit; + } + } + // scan backwards + scan_state_init(&renv->ss); + tnow = time(NULL); + time_t tlast; + tlast = tnow; + fprintf(stderr, "%.24s TokuFT recovery scanning backward from %" PRIu64 "\n", ctime(&tnow), lastlsn.lsn); + for (unsigned i=0; 1; i++) { + + // get the previous log entry (first time gets the last one) + le = NULL; + r = toku_logcursor_prev(logcursor, &le); + if (tokuft_recovery_trace) + recover_trace_le(__FUNCTION__, __LINE__, r, le); + if (r != 0) { + if (r == DB_NOTFOUND) + break; + rr = DB_RUNRECOVERY; + goto errorexit; + } + + // trace progress + if ((i % 1000) == 0) { + tnow = time(NULL); + if (tnow - tlast >= TOKUDB_RECOVERY_PROGRESS_TIME) { + thislsn = toku_log_entry_get_lsn(le); + fprintf(stderr, "%.24s TokuFT recovery scanning backward from %" PRIu64 " at %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, recover_state(renv)); + tlast = tnow; + } + } + + // dispatch the log entry handler + assert(renv->ss.ss == BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END || + renv->ss.ss == BACKWARD_NEWER_CHECKPOINT_END); + logtype_dispatch_assign(le, toku_recover_backward_, r, renv); + if (tokuft_recovery_trace) + recover_trace_le(__FUNCTION__, __LINE__, r, le); + if (r != 0) { + if (tokuft_recovery_trace) + fprintf(stderr, "DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r); + rr = DB_RUNRECOVERY; + goto errorexit; + } + if (renv->goforward) + break; + } + + // run first callback + if (recover_callback_fx) + recover_callback_fx(recover_callback_args); + + // scan forwards + assert(le); + thislsn = toku_log_entry_get_lsn(le); + tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery starts scanning forward to %" PRIu64 " from %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv)); + + for (unsigned i=0; 1; i++) { + + // trace progress + if ((i % 1000) == 0) { + tnow = time(NULL); + if (tnow - tlast >= TOKUDB_RECOVERY_PROGRESS_TIME) { + thislsn = toku_log_entry_get_lsn(le); + fprintf(stderr, "%.24s TokuFT recovery scanning forward to %" PRIu64 " at %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv)); + tlast = tnow; + } + } + + // dispatch the log entry handler (first time calls the forward handler for the log entry at the turnaround + assert(renv->ss.ss == FORWARD_BETWEEN_CHECKPOINT_BEGIN_END || + renv->ss.ss == FORWARD_NEWER_CHECKPOINT_END); + logtype_dispatch_assign(le, toku_recover_, r, renv); + if (tokuft_recovery_trace) + recover_trace_le(__FUNCTION__, __LINE__, r, le); + if (r != 0) { + if (tokuft_recovery_trace) + fprintf(stderr, "DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r); + rr = DB_RUNRECOVERY; + goto errorexit; + } + + // get the next log entry + le = NULL; + r = toku_logcursor_next(logcursor, &le); + if (tokuft_recovery_trace) + recover_trace_le(__FUNCTION__, __LINE__, r, le); + if (r != 0) { + if (r == DB_NOTFOUND) + break; + rr = DB_RUNRECOVERY; + goto errorexit; + } + } + + // verify the final recovery state + assert(renv->ss.ss == FORWARD_NEWER_CHECKPOINT_END); + + r = toku_logcursor_destroy(&logcursor); + assert(r == 0); + + // run second callback + if (recover_callback2_fx) + recover_callback2_fx(recover_callback2_args); + + // restart logging + toku_logger_restart(renv->logger, lastlsn); + + // abort the live transactions + { + uint32_t n = recover_get_num_live_txns(renv); + if (n > 0) { + tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery has %" PRIu32 " live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : ""); + } + } + recover_abort_all_live_txns(renv); + { + uint32_t n = recover_get_num_live_txns(renv); + if (n > 0) { + tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery has %" PRIu32 " prepared transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : ""); + } + } + + // close the open dictionaries + uint32_t n; + n = file_map_get_num_dictionaries(&renv->fmap); + if (n > 0) { + tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery closing %" PRIu32 " dictionar%s\n", ctime(&tnow), n, n > 1 ? "ies" : "y"); + } + file_map_close_dictionaries(&renv->fmap, lastlsn); + + { + // write a recovery log entry + BYTESTRING recover_comment = { static_cast(strlen("recover")), (char *) "recover" }; + toku_log_comment(renv->logger, NULL, true, 0, recover_comment); + } + + // checkpoint + tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery making a checkpoint\n", ctime(&tnow)); + r = toku_checkpoint(renv->cp, renv->logger, NULL, NULL, NULL, NULL, RECOVERY_CHECKPOINT); + assert(r == 0); + tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery done\n", ctime(&tnow)); + + return 0; + + errorexit: + tnow = time(NULL); + fprintf(stderr, "%.24s TokuFT recovery failed %d\n", ctime(&tnow), rr); + + if (logcursor) { + r = toku_logcursor_destroy(&logcursor); + assert(r == 0); + } + + return rr; +} + +int +toku_recover_lock(const char *lock_dir, int *lockfd) { + int e = toku_single_process_lock(lock_dir, "recovery", lockfd); + if (e != 0 && e != ENOENT) { + fprintf(stderr, "Couldn't run recovery because some other process holds the recovery lock\n"); + } + return e; +} + +int +toku_recover_unlock(int lockfd) { + int lockfd_copy = lockfd; + return toku_single_process_unlock(&lockfd_copy); +} + +int tokuft_recover(DB_ENV *env, + prepared_txn_callback_t prepared_txn_callback, + keep_cachetable_callback_t keep_cachetable_callback, + TOKULOGGER logger, + const char *env_dir, const char *log_dir, + ft_compare_func bt_compare, + ft_update_func update_function, + generate_row_for_put_func generate_row_for_put, + generate_row_for_del_func generate_row_for_del, + size_t cachetable_size) { + int r; + int lockfd = -1; + + r = toku_recover_lock(log_dir, &lockfd); + if (r != 0) + return r; + + int rr = 0; + if (tokuft_needs_recovery(log_dir, false)) { + struct recover_env renv; + r = recover_env_init(&renv, + env_dir, + env, + prepared_txn_callback, + keep_cachetable_callback, + logger, + bt_compare, + update_function, + generate_row_for_put, + generate_row_for_del, + cachetable_size); + assert(r == 0); + + rr = do_recovery(&renv, env_dir, log_dir); + + recover_env_cleanup(&renv); + } + + r = toku_recover_unlock(lockfd); + if (r != 0) + return r; + + return rr; +} + +// Return 0 if recovery log exists, ENOENT if log is missing +int +tokuft_recover_log_exists(const char * log_dir) { + int r; + TOKULOGCURSOR logcursor; + + r = toku_logcursor_create(&logcursor, log_dir); + if (r == 0) { + int rclose; + r = toku_logcursor_log_exists(logcursor); // return ENOENT if no log + rclose = toku_logcursor_destroy(&logcursor); + assert(rclose == 0); + } + else + r = ENOENT; + + return r; +} + +void toku_recover_set_callback (void (*callback_fx)(void*), void* callback_args) { + recover_callback_fx = callback_fx; + recover_callback_args = callback_args; +} + +void toku_recover_set_callback2 (void (*callback_fx)(void*), void* callback_args) { + recover_callback2_fx = callback_fx; + recover_callback2_args = callback_args; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/recover.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/recover.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger/recover.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger/recover.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,139 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include + +#include "portability/memory.h" +#include "portability/toku_portability.h" + +#include "ft/comparator.h" +#include "ft/ft-ops.h" +#include "util/x1764.h" + +typedef void (*prepared_txn_callback_t)(DB_ENV *env, struct tokutxn *txn); +typedef void (*keep_cachetable_callback_t)(DB_ENV *env, struct cachetable *ct); + +// Run tokuft recovery from the log +// Returns 0 if success +int tokuft_recover(DB_ENV *env, + prepared_txn_callback_t prepared_txn_callback, + keep_cachetable_callback_t keep_cachetable_callback, + struct tokulogger *logger, + const char *env_dir, + const char *log_dir, + ft_compare_func bt_compare, + ft_update_func update_function, + generate_row_for_put_func generate_row_for_put, + generate_row_for_del_func generate_row_for_del, + size_t cachetable_size); + +// Effect: Check the tokuft logs to determine whether or not we need to run recovery. +// If the log is empty or if there is a clean shutdown at the end of the log, then we +// dont need to run recovery. +// Returns: true if we need recovery, otherwise false. +int tokuft_needs_recovery(const char *logdir, bool ignore_empty_log); + +// Return 0 if recovery log exists, ENOENT if log is missing +int tokuft_recover_log_exists(const char * log_dir); + +// For test only - set callbacks for recovery testing +void toku_recover_set_callback (void (*)(void*), void*); +void toku_recover_set_callback2 (void (*)(void*), void*); + +extern int tokuft_recovery_trace; + +int toku_recover_lock (const char *lock_dir, int *lockfd); + +int toku_recover_unlock(int lockfd); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,1469 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include -#include - -#include "ft.h" -#include "log-internal.h" -#include "txn_manager.h" -#include "rollback_log_node_cache.h" - -#include - -static const int log_format_version=TOKU_LOG_VERSION; - -static int open_logfile (TOKULOGGER logger); -static void logger_write_buffer (TOKULOGGER logger, LSN *fsynced_lsn); -static void delete_logfile(TOKULOGGER logger, long long index, uint32_t version); -static void grab_output(TOKULOGGER logger, LSN *fsynced_lsn); -static void release_output(TOKULOGGER logger, LSN fsynced_lsn); - -static void toku_print_bytes (FILE *outf, uint32_t len, char *data) { - fprintf(outf, "\""); - uint32_t i; - for (i=0; iis_open=false; - result->write_log_files = true; - result->trim_log_files = true; - result->directory=0; - result->remove_finalize_callback = NULL; - // fd is uninitialized on purpose - // ct is uninitialized on purpose - result->lg_max = 100<<20; // 100MB default - // lsn is uninitialized - result->inbuf = (struct logbuf) {0, LOGGER_MIN_BUF_SIZE, (char *) toku_xmalloc(LOGGER_MIN_BUF_SIZE), ZERO_LSN}; - result->outbuf = (struct logbuf) {0, LOGGER_MIN_BUF_SIZE, (char *) toku_xmalloc(LOGGER_MIN_BUF_SIZE), ZERO_LSN}; - // written_lsn is uninitialized - // fsynced_lsn is uninitialized - result->last_completed_checkpoint_lsn = ZERO_LSN; - // next_log_file_number is uninitialized - // n_in_file is uninitialized - result->write_block_size = FT_DEFAULT_NODE_SIZE; // default logging size is the same as the default ft block size - toku_logfilemgr_create(&result->logfilemgr); - *resultp=result; - ml_init(&result->input_lock); - toku_mutex_init(&result->output_condition_lock, NULL); - toku_cond_init(&result->output_condition, NULL); - result->rollback_cachefile = NULL; - result->output_is_available = true; - toku_txn_manager_init(&result->txn_manager); - return 0; -} - -static void fsync_logdir(TOKULOGGER logger) { - toku_fsync_dirfd_without_accounting(logger->dir); -} - -static int open_logdir(TOKULOGGER logger, const char *directory) { - if (toku_os_is_absolute_name(directory)) { - logger->directory = toku_strdup(directory); - } else { - char cwdbuf[PATH_MAX]; - char *cwd = getcwd(cwdbuf, PATH_MAX); - if (cwd == NULL) - return -1; - char *MALLOC_N(strlen(cwd) + strlen(directory) + 2, new_log_dir); - if (new_log_dir == NULL) { - return -2; - } - sprintf(new_log_dir, "%s/%s", cwd, directory); - logger->directory = new_log_dir; - } - if (logger->directory==0) return get_error_errno(); - - logger->dir = opendir(logger->directory); - if ( logger->dir == NULL ) return -1; - return 0; -} - -static int close_logdir(TOKULOGGER logger) { - return closedir(logger->dir); -} - -int -toku_logger_open_with_last_xid(const char *directory, TOKULOGGER logger, TXNID last_xid) { - if (logger->is_open) return EINVAL; - - int r; - TXNID last_xid_if_clean_shutdown = TXNID_NONE; - r = toku_logfilemgr_init(logger->logfilemgr, directory, &last_xid_if_clean_shutdown); - if ( r!=0 ) - return r; - logger->lsn = toku_logfilemgr_get_last_lsn(logger->logfilemgr); - logger->written_lsn = logger->lsn; - logger->fsynced_lsn = logger->lsn; - logger->inbuf.max_lsn_in_buf = logger->lsn; - logger->outbuf.max_lsn_in_buf = logger->lsn; - - // open directory, save pointer for fsyncing t:2445 - r = open_logdir(logger, directory); - if (r!=0) return r; - - long long nexti; - r = toku_logger_find_next_unused_log_file(logger->directory, &nexti); - if (r!=0) return r; - - logger->next_log_file_number = nexti; - r = open_logfile(logger); - if (r!=0) return r; - if (last_xid == TXNID_NONE) { - last_xid = last_xid_if_clean_shutdown; - } - toku_txn_manager_set_last_xid_from_logger(logger->txn_manager, last_xid); - - logger->is_open = true; - return 0; -} - -int toku_logger_open (const char *directory, TOKULOGGER logger) { - return toku_logger_open_with_last_xid(directory, logger, TXNID_NONE); -} - -bool toku_logger_rollback_is_open (TOKULOGGER logger) { - return logger->rollback_cachefile != NULL; -} - -#define MAX_CACHED_ROLLBACK_NODES 4096 - -void -toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft) { - toku_free_unused_blocknums(ft->blocktable, ft->h->root_blocknum); - logger->rollback_cache.init(MAX_CACHED_ROLLBACK_NODES); -} - -int -toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create) { - assert(logger->is_open); - assert(!logger->rollback_cachefile); - - FT_HANDLE t = NULL; // Note, there is no DB associated with this FT. - toku_ft_handle_create(&t); - int r = toku_ft_handle_open(t, toku_product_name_strings.rollback_cachefile, create, create, cachetable, NULL_TXN); - if (r == 0) { - logger->rollback_cachefile = t->ft->cf; - toku_logger_initialize_rollback_cache(logger, t->ft); - - //Verify it is empty - //Must have no data blocks (rollback logs or otherwise). - toku_block_verify_no_data_blocks_except_root(t->ft->blocktable, t->ft->h->root_blocknum); - bool is_empty; - is_empty = toku_ft_is_empty_fast(t); - assert(is_empty); - } else { - toku_ft_handle_close(t); - } - return r; -} - - -// Requires: Rollback cachefile can only be closed immediately after a checkpoint, -// so it will always be clean (!h->dirty) when about to be closed. -// Rollback log can only be closed when there are no open transactions, -// so it will always be empty (no data blocks) when about to be closed. -void toku_logger_close_rollback(TOKULOGGER logger) { - CACHEFILE cf = logger->rollback_cachefile; // stored in logger at rollback cachefile open - if (cf) { - FT_HANDLE ft_to_close; - { //Find "ft_to_close" - logger->rollback_cache.destroy(); - FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf)); - //Verify it is safe to close it. - assert(!ft->h->dirty); //Must not be dirty. - toku_free_unused_blocknums(ft->blocktable, ft->h->root_blocknum); - //Must have no data blocks (rollback logs or otherwise). - toku_block_verify_no_data_blocks_except_root(ft->blocktable, ft->h->root_blocknum); - assert(!ft->h->dirty); - ft_to_close = toku_ft_get_only_existing_ft_handle(ft); - { - bool is_empty; - is_empty = toku_ft_is_empty_fast(ft_to_close); - assert(is_empty); - } - assert(!ft->h->dirty); // it should not have been dirtied by the toku_ft_is_empty test. - } - - toku_ft_handle_close(ft_to_close); - //Set as dealt with already. - logger->rollback_cachefile = NULL; - } -} - -// No locks held on entry -// No locks held on exit. -// No locks are needed, since you cannot legally close the log concurrently with doing anything else. -// TODO: can't fail -int toku_logger_close(TOKULOGGER *loggerp) { - int r; - TOKULOGGER logger = *loggerp; - if (!logger->is_open) { - goto is_closed; - } - ml_lock(&logger->input_lock); - LSN fsynced_lsn; - grab_output(logger, &fsynced_lsn); - logger_write_buffer(logger, &fsynced_lsn); - if (logger->fd!=-1) { - if ( logger->write_log_files ) { - toku_file_fsync_without_accounting(logger->fd); - } - r = close(logger->fd); - assert(r == 0); - } - r = close_logdir(logger); - assert(r == 0); - logger->fd=-1; - release_output(logger, fsynced_lsn); - -is_closed: - toku_free(logger->inbuf.buf); - toku_free(logger->outbuf.buf); - // before destroying locks they must be left in the unlocked state. - ml_destroy(&logger->input_lock); - toku_mutex_destroy(&logger->output_condition_lock); - toku_cond_destroy(&logger->output_condition); - toku_txn_manager_destroy(logger->txn_manager); - if (logger->directory) toku_free(logger->directory); - toku_logfilemgr_destroy(&logger->logfilemgr); - toku_free(logger); - *loggerp=0; - return 0; -} - -void toku_logger_shutdown(TOKULOGGER logger) { - if (logger->is_open) { - TXN_MANAGER mgr = logger->txn_manager; - if (toku_txn_manager_num_live_root_txns(mgr) == 0) { - TXNID last_xid = toku_txn_manager_get_last_xid(mgr); - toku_log_shutdown(logger, NULL, true, 0, last_xid); - } - } -} - -static int close_and_open_logfile (TOKULOGGER logger, LSN *fsynced_lsn) -// Effect: close the current file, and open the next one. -// Entry: This thread has permission to modify the output. -// Exit: This thread has permission to modify the output. -{ - int r; - if (logger->write_log_files) { - toku_file_fsync_without_accounting(logger->fd); - *fsynced_lsn = logger->written_lsn; - toku_logfilemgr_update_last_lsn(logger->logfilemgr, logger->written_lsn); // fixes t:2294 - } - r = close(logger->fd); if (r!=0) return get_error_errno(); - return open_logfile(logger); -} - -static int -max_int (int a, int b) -{ - if (a>b) return a; - return b; -} - -// *********************************************************** -// output mutex/condition manipulation routines -// *********************************************************** - -static void -wait_till_output_available (TOKULOGGER logger) -// Effect: Wait until output becomes available. -// Implementation hint: Use a pthread_cond_wait. -// Entry: Holds the output_condition_lock (but not the inlock) -// Exit: Holds the output_condition_lock and logger->output_is_available -// -{ - tokutime_t t0 = toku_time_now(); - while (!logger->output_is_available) { - toku_cond_wait(&logger->output_condition, &logger->output_condition_lock); - } - if (tokutime_to_seconds(toku_time_now() - t0) >= 0.100) { - logger->num_wait_buf_long++; - } -} - -static void -grab_output(TOKULOGGER logger, LSN *fsynced_lsn) -// Effect: Wait until output becomes available and get permission to modify output. -// Entry: Holds no lock (including not holding the input lock, since we never hold both at once). -// Exit: Hold permission to modify output (but none of the locks). -{ - toku_mutex_lock(&logger->output_condition_lock); - wait_till_output_available(logger); - logger->output_is_available = false; - if (fsynced_lsn) { - *fsynced_lsn = logger->fsynced_lsn; - } - toku_mutex_unlock(&logger->output_condition_lock); -} - -static bool -wait_till_output_already_written_or_output_buffer_available (TOKULOGGER logger, LSN lsn, LSN *fsynced_lsn) -// Effect: Wait until either the output is available or the lsn has been written. -// Return true iff the lsn has been written. -// If returning true, then on exit we don't hold output permission. -// If returning false, then on exit we do hold output permission. -// Entry: Hold no locks. -// Exit: Hold the output permission if returns false. -{ - bool result; - toku_mutex_lock(&logger->output_condition_lock); - while (1) { - if (logger->fsynced_lsn.lsn >= lsn.lsn) { // we can look at the fsynced lsn since we have the lock. - result = true; - break; - } - if (logger->output_is_available) { - logger->output_is_available = false; - result = false; - break; - } - // otherwise wait for a good time to look again. - toku_cond_wait(&logger->output_condition, &logger->output_condition_lock); - } - *fsynced_lsn = logger->fsynced_lsn; - toku_mutex_unlock(&logger->output_condition_lock); - return result; -} - -static void -release_output (TOKULOGGER logger, LSN fsynced_lsn) -// Effect: Release output permission. -// Entry: Holds output permissions, but no locks. -// Exit: Holds neither locks nor output permission. -{ - toku_mutex_lock(&logger->output_condition_lock); - logger->output_is_available = true; - if (logger->fsynced_lsn.lsn < fsynced_lsn.lsn) { - logger->fsynced_lsn = fsynced_lsn; - } - toku_cond_broadcast(&logger->output_condition); - toku_mutex_unlock(&logger->output_condition_lock); -} - -static void -swap_inbuf_outbuf (TOKULOGGER logger) -// Effect: Swap the inbuf and outbuf -// Entry and exit: Hold the input lock and permission to modify output. -{ - struct logbuf tmp = logger->inbuf; - logger->inbuf = logger->outbuf; - logger->outbuf = tmp; - assert(logger->inbuf.n_in_buf == 0); -} - -static void -write_outbuf_to_logfile (TOKULOGGER logger, LSN *fsynced_lsn) -// Effect: Write the contents of outbuf to logfile. Don't necessarily fsync (but it might, in which case fynced_lsn is updated). -// If the logfile gets too big, open the next one (that's the case where an fsync might happen). -// Entry and exit: Holds permission to modify output (and doesn't let it go, so it's ok to also hold the inlock). -{ - if (logger->outbuf.n_in_buf>0) { - // Write the outbuf to disk, take accounting measurements - tokutime_t io_t0 = toku_time_now(); - toku_os_full_write(logger->fd, logger->outbuf.buf, logger->outbuf.n_in_buf); - tokutime_t io_t1 = toku_time_now(); - logger->num_writes_to_disk++; - logger->bytes_written_to_disk += logger->outbuf.n_in_buf; - logger->time_spent_writing_to_disk += (io_t1 - io_t0); - - assert(logger->outbuf.max_lsn_in_buf.lsn > logger->written_lsn.lsn); // since there is something in the buffer, its LSN must be bigger than what's previously written. - logger->written_lsn = logger->outbuf.max_lsn_in_buf; - logger->n_in_file += logger->outbuf.n_in_buf; - logger->outbuf.n_in_buf = 0; - } - // If the file got too big, then open a new file. - if (logger->n_in_file > logger->lg_max) { - int r = close_and_open_logfile(logger, fsynced_lsn); - assert_zero(r); - } -} - -void -toku_logger_make_space_in_inbuf (TOKULOGGER logger, int n_bytes_needed) -// Entry: Holds the inlock -// Exit: Holds the inlock -// Effect: Upon exit, the inlock is held and there are at least n_bytes_needed in the buffer. -// May release the inlock (and then reacquire it), so this is not atomic. -// May obtain the output lock and output permission (but if it does so, it will have released the inlock, since we don't hold both locks at once). -// (But may hold output permission and inlock at the same time.) -// Implementation hint: Makes space in the inbuf, possibly by writing the inbuf to disk or increasing the size of the inbuf. There might not be an fsync. -// Arguments: logger: the logger (side effects) -// n_bytes_needed: how many bytes to make space for. -{ - if (logger->inbuf.n_in_buf + n_bytes_needed <= LOGGER_MIN_BUF_SIZE) { - return; - } - ml_unlock(&logger->input_lock); - LSN fsynced_lsn; - grab_output(logger, &fsynced_lsn); - - ml_lock(&logger->input_lock); - // Some other thread may have written the log out while we didn't have the lock. If we have space now, then be happy. - if (logger->inbuf.n_in_buf + n_bytes_needed <= LOGGER_MIN_BUF_SIZE) { - release_output(logger, fsynced_lsn); - return; - } - if (logger->inbuf.n_in_buf > 0) { - // There isn't enough space, and there is something in the buffer, so write the inbuf. - swap_inbuf_outbuf(logger); - - // Don't release the inlock in this case, because we don't want to get starved. - write_outbuf_to_logfile(logger, &fsynced_lsn); - } - // the inbuf is empty. Make it big enough (just in case it is somehow smaller than a single log entry). - if (n_bytes_needed > logger->inbuf.buf_size) { - assert(n_bytes_needed < (1<<30)); // it seems unlikely to work if a logentry gets that big. - int new_size = max_int(logger->inbuf.buf_size * 2, n_bytes_needed); // make it at least twice as big, and big enough for n_bytes - assert(new_size < (1<<30)); - XREALLOC_N(new_size, logger->inbuf.buf); - logger->inbuf.buf_size = new_size; - } - release_output(logger, fsynced_lsn); -} - -void toku_logger_fsync (TOKULOGGER logger) -// Effect: This is the exported fsync used by ydb.c for env_log_flush. Group commit doesn't have to work. -// Entry: Holds no locks -// Exit: Holds no locks -// Implementation note: Acquire the output condition lock, then the output permission, then release the output condition lock, then get the input lock. -// Then release everything. -{ - toku_logger_maybe_fsync(logger, logger->inbuf.max_lsn_in_buf, true, false); -} - -void toku_logger_fsync_if_lsn_not_fsynced (TOKULOGGER logger, LSN lsn) { - if (logger->write_log_files) { - toku_logger_maybe_fsync(logger, lsn, true, false); - } -} - -int toku_logger_is_open(TOKULOGGER logger) { - if (logger==0) return 0; - return logger->is_open; -} - -void toku_logger_set_cachetable (TOKULOGGER logger, CACHETABLE ct) { - logger->ct = ct; -} - -int toku_logger_set_lg_max(TOKULOGGER logger, uint32_t lg_max) { - if (logger==0) return EINVAL; // no logger - if (logger->is_open) return EINVAL; - if (lg_max>(1<<30)) return EINVAL; // too big - logger->lg_max = lg_max; - return 0; -} -int toku_logger_get_lg_max(TOKULOGGER logger, uint32_t *lg_maxp) { - if (logger==0) return EINVAL; // no logger - *lg_maxp = logger->lg_max; - return 0; -} - -int toku_logger_set_lg_bsize(TOKULOGGER logger, uint32_t bsize) { - if (logger==0) return EINVAL; // no logger - if (logger->is_open) return EINVAL; - if (bsize<=0 || bsize>(1<<30)) return EINVAL; - logger->write_block_size = bsize; - return 0; -} - -int toku_logger_find_next_unused_log_file(const char *directory, long long *result) -// This is called during logger initialalization, and no locks are required. -{ - DIR *d=opendir(directory); - long long maxf=-1; *result = maxf; - struct dirent *de; - if (d==0) return get_error_errno(); - while ((de=readdir(d))) { - if (de==0) return get_error_errno(); - long long thisl = -1; - if ( is_a_logfile(de->d_name, &thisl) ) { - if ((long long)thisl > maxf) maxf = thisl; - } - } - *result=maxf+1; - int r = closedir(d); - return r; -} - -// TODO: Put this in portability layer when ready -// in: file pathname that may have a dirname prefix -// return: file leaf name -static char * fileleafname(char *pathname) { - const char delimiter = '/'; - char *leafname = strrchr(pathname, delimiter); - if (leafname) - leafname++; - else - leafname = pathname; - return leafname; -} - -static int logfilenamecompare (const void *ap, const void *bp) { - char *a=*(char**)ap; - char *a_leafname = fileleafname(a); - char *b=*(char**)bp; - char * b_leafname = fileleafname(b); - int rval; - bool valid; - uint64_t num_a = 0; // placate compiler - uint64_t num_b = 0; - uint32_t ver_a = 0; - uint32_t ver_b = 0; - valid = is_a_logfile_any_version(a_leafname, &num_a, &ver_a); - invariant(valid); - valid = is_a_logfile_any_version(b_leafname, &num_b, &ver_b); - invariant(valid); - if (ver_a < ver_b) rval = -1; - else if (ver_a > ver_b) rval = +1; - else if (num_a < num_b) rval = -1; - else if (num_a > num_b) rval = +1; - else rval = 0; - return rval; -} - -// Return the log files in sorted order -// Return a null_terminated array of strings, and also return the number of strings in the array. -// Requires: Race conditions must be dealt with by caller. Either call during initialization or grab the output permission. -int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_logfiles) -{ - int result_limit=2; - int n_results=0; - char **MALLOC_N(result_limit, result); - assert(result!= NULL); - struct dirent *de; - DIR *d=opendir(directory); - if (d==0) { - int er = get_error_errno(); - toku_free(result); - return er; - } - int dirnamelen = strlen(directory); - while ((de=readdir(d))) { - uint64_t thisl; - uint32_t version_ignore; - if ( !(is_a_logfile_any_version(de->d_name, &thisl, &version_ignore)) ) continue; //#2424: Skip over files that don't match the exact logfile template - if (n_results+1>=result_limit) { - result_limit*=2; - XREALLOC_N(result_limit, result); - } - int fnamelen = dirnamelen + strlen(de->d_name) + 2; // One for the slash and one for the trailing NUL. - char *XMALLOC_N(fnamelen, fname); - snprintf(fname, fnamelen, "%s/%s", directory, de->d_name); - result[n_results++] = fname; - } - // Return them in increasing order. Set width to allow for newer log file names ("xxx.tokulog13") - // which are one character longer than old log file names ("xxx.tokulog2"). The comparison function - // won't look beyond the terminating NUL, so an extra character in the comparison string doesn't matter. - // Allow room for terminating NUL after "xxx.tokulog13" even if result[0] is of form "xxx.tokulog2." - int width = sizeof(result[0]+2); - qsort(result, n_results, width, logfilenamecompare); - *resultp = result; - *n_logfiles = n_results; - result[n_results]=0; // make a trailing null - return d ? closedir(d) : 0; -} - -static int open_logfile (TOKULOGGER logger) -// Entry and Exit: This thread has permission to modify the output. -{ - int fnamelen = strlen(logger->directory)+50; - char fname[fnamelen]; - snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, logger->next_log_file_number, TOKU_LOG_VERSION); - long long index = logger->next_log_file_number; - if (logger->write_log_files) { - logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRWXU); - if (logger->fd==-1) { - return get_error_errno(); - } - fsync_logdir(logger); - logger->next_log_file_number++; - } else { - logger->fd = open(DEV_NULL_FILE, O_WRONLY+O_BINARY); - if (logger->fd==-1) { - return get_error_errno(); - } - } - toku_os_full_write(logger->fd, "tokulogg", 8); - int version_l = toku_htonl(log_format_version); //version MUST be in network byte order regardless of disk order - toku_os_full_write(logger->fd, &version_l, 4); - if ( logger->write_log_files ) { - TOKULOGFILEINFO XMALLOC(lf_info); - lf_info->index = index; - lf_info->maxlsn = logger->written_lsn; - lf_info->version = TOKU_LOG_VERSION; - toku_logfilemgr_add_logfile_info(logger->logfilemgr, lf_info); - } - logger->fsynced_lsn = logger->written_lsn; - logger->n_in_file = 12; - return 0; -} - -static void delete_logfile(TOKULOGGER logger, long long index, uint32_t version) -// Entry and Exit: This thread has permission to modify the output. -{ - int fnamelen = strlen(logger->directory)+50; - char fname[fnamelen]; - snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, index, version); - int r = remove(fname); - invariant_zero(r); -} - -void toku_logger_maybe_trim_log(TOKULOGGER logger, LSN trim_lsn) -// On entry and exit: No logger locks held. -// Acquires and releases output permission. -{ - LSN fsynced_lsn; - grab_output(logger, &fsynced_lsn); - TOKULOGFILEMGR lfm = logger->logfilemgr; - int n_logfiles = toku_logfilemgr_num_logfiles(lfm); - - TOKULOGFILEINFO lf_info = NULL; - - if ( logger->write_log_files && logger->trim_log_files) { - while ( n_logfiles > 1 ) { // don't delete current logfile - uint32_t log_version; - lf_info = toku_logfilemgr_get_oldest_logfile_info(lfm); - log_version = lf_info->version; - if ( lf_info->maxlsn.lsn >= trim_lsn.lsn ) { - // file contains an open LSN, can't delete this or any newer log files - break; - } - // need to save copy - toku_logfilemgr_delete_oldest_logfile_info free's the lf_info - long index = lf_info->index; - toku_logfilemgr_delete_oldest_logfile_info(lfm); - n_logfiles--; - delete_logfile(logger, index, log_version); - } - } - release_output(logger, fsynced_lsn); -} - -void toku_logger_write_log_files (TOKULOGGER logger, bool write_log_files) -// Called only during initialization (or just after recovery), so no locks are needed. -{ - logger->write_log_files = write_log_files; -} - -void toku_logger_trim_log_files (TOKULOGGER logger, bool trim_log_files) -// Called only during initialization, so no locks are needed. -{ - logger->trim_log_files = trim_log_files; -} - -bool toku_logger_txns_exist(TOKULOGGER logger) -// Called during close of environment to ensure that transactions don't exist -{ - return toku_txn_manager_txns_exist(logger->txn_manager); -} - - -void toku_logger_maybe_fsync(TOKULOGGER logger, LSN lsn, int do_fsync, bool holds_input_lock) -// Effect: If fsync is nonzero, then make sure that the log is flushed and synced at least up to lsn. -// Entry: Holds input lock iff 'holds_input_lock'. The log entry has already been written to the input buffer. -// Exit: Holds no locks. -// The input lock may be released and then reacquired. Thus this function does not run atomically with respect to other threads. -{ - if (holds_input_lock) { - ml_unlock(&logger->input_lock); - } - if (do_fsync) { - // reacquire the locks (acquire output permission first) - LSN fsynced_lsn; - bool already_done = wait_till_output_already_written_or_output_buffer_available(logger, lsn, &fsynced_lsn); - if (already_done) { - return; - } - - // otherwise we now own the output permission, and our lsn isn't outputed. - - ml_lock(&logger->input_lock); - - swap_inbuf_outbuf(logger); - - ml_unlock(&logger->input_lock); // release the input lock now, so other threads can fill the inbuf. (Thus enabling group commit.) - - write_outbuf_to_logfile(logger, &fsynced_lsn); - if (fsynced_lsn.lsn < lsn.lsn) { - // it may have gotten fsynced by the write_outbuf_to_logfile. - toku_file_fsync_without_accounting(logger->fd); - assert(fsynced_lsn.lsn <= logger->written_lsn.lsn); - fsynced_lsn = logger->written_lsn; - } - // the last lsn is only accessed while holding output permission or else when the log file is old. - if (logger->write_log_files) { - toku_logfilemgr_update_last_lsn(logger->logfilemgr, logger->written_lsn); - } - release_output(logger, fsynced_lsn); - } -} - -static void -logger_write_buffer(TOKULOGGER logger, LSN *fsynced_lsn) -// Entry: Holds the input lock and permission to modify output. -// Exit: Holds only the permission to modify output. -// Effect: Write the buffers to the output. If DO_FSYNC is true, then fsync. -// Note: Only called during single-threaded activity from toku_logger_restart, so locks aren't really needed. -{ - swap_inbuf_outbuf(logger); - ml_unlock(&logger->input_lock); - write_outbuf_to_logfile(logger, fsynced_lsn); - if (logger->write_log_files) { - toku_file_fsync_without_accounting(logger->fd); - toku_logfilemgr_update_last_lsn(logger->logfilemgr, logger->written_lsn); // t:2294 - } -} - -int toku_logger_restart(TOKULOGGER logger, LSN lastlsn) -// Entry and exit: Holds no locks (this is called only during single-threaded activity, such as initial start). -{ - int r; - - // flush out the log buffer - LSN fsynced_lsn; - grab_output(logger, &fsynced_lsn); - ml_lock(&logger->input_lock); - logger_write_buffer(logger, &fsynced_lsn); - - // close the log file - if ( logger->write_log_files) { // fsyncs don't work to /dev/null - toku_file_fsync_without_accounting(logger->fd); - } - r = close(logger->fd); assert(r == 0); - logger->fd = -1; - - // reset the LSN's to the lastlsn when the logger was opened - logger->lsn = logger->written_lsn = logger->fsynced_lsn = lastlsn; - logger->write_log_files = true; - logger->trim_log_files = true; - - // open a new log file - r = open_logfile(logger); - release_output(logger, fsynced_lsn); - return r; -} - -// fname is the iname -void toku_logger_log_fcreate (TOKUTXN txn, const char *fname, FILENUM filenum, uint32_t mode, - uint32_t treeflags, uint32_t nodesize, uint32_t basementnodesize, - enum toku_compression_method compression_method) { - if (txn) { - BYTESTRING bs_fname = { .len = (uint32_t) strlen(fname), .data = (char *) fname }; - // fsync log on fcreate - toku_log_fcreate (txn->logger, (LSN*)0, 1, txn, toku_txn_get_txnid(txn), filenum, - bs_fname, mode, treeflags, nodesize, basementnodesize, compression_method); - } -} - - -// We only do fdelete on open ft's, so we pass the filenum here -void toku_logger_log_fdelete (TOKUTXN txn, FILENUM filenum) { - if (txn) { - //No fsync. - toku_log_fdelete (txn->logger, (LSN*)0, 0, txn, toku_txn_get_txnid(txn), filenum); - } -} - - - -/* fopen isn't really an action. It's just for bookkeeping. We need to know the filename that goes with a filenum. */ -void toku_logger_log_fopen (TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags) { - if (txn) { - BYTESTRING bs; - bs.len = strlen(fname); - bs.data = (char*)fname; - toku_log_fopen (txn->logger, (LSN*)0, 0, bs, filenum, treeflags); - } -} - -static int toku_fread_uint8_t_nocrclen (FILE *f, uint8_t *v) { - int vi=fgetc(f); - if (vi==EOF) return -1; - uint8_t vc=(uint8_t)vi; - *v = vc; - return 0; -} - -int toku_fread_uint8_t (FILE *f, uint8_t *v, struct x1764 *mm, uint32_t *len) { - int vi=fgetc(f); - if (vi==EOF) return -1; - uint8_t vc=(uint8_t)vi; - toku_x1764_add(mm, &vc, 1); - (*len)++; - *v = vc; - return 0; -} - -int toku_fread_uint32_t_nocrclen (FILE *f, uint32_t *v) { - uint32_t result; - uint8_t *cp = (uint8_t*)&result; - int r; - r = toku_fread_uint8_t_nocrclen (f, cp+0); if (r!=0) return r; - r = toku_fread_uint8_t_nocrclen (f, cp+1); if (r!=0) return r; - r = toku_fread_uint8_t_nocrclen (f, cp+2); if (r!=0) return r; - r = toku_fread_uint8_t_nocrclen (f, cp+3); if (r!=0) return r; - *v = toku_dtoh32(result); - - return 0; -} -int toku_fread_uint32_t (FILE *f, uint32_t *v, struct x1764 *checksum, uint32_t *len) { - uint32_t result; - uint8_t *cp = (uint8_t*)&result; - int r; - r = toku_fread_uint8_t (f, cp+0, checksum, len); if(r!=0) return r; - r = toku_fread_uint8_t (f, cp+1, checksum, len); if(r!=0) return r; - r = toku_fread_uint8_t (f, cp+2, checksum, len); if(r!=0) return r; - r = toku_fread_uint8_t (f, cp+3, checksum, len); if(r!=0) return r; - *v = toku_dtoh32(result); - return 0; -} - -int toku_fread_uint64_t (FILE *f, uint64_t *v, struct x1764 *checksum, uint32_t *len) { - uint32_t v1,v2; - int r; - r=toku_fread_uint32_t(f, &v1, checksum, len); if (r!=0) return r; - r=toku_fread_uint32_t(f, &v2, checksum, len); if (r!=0) return r; - *v = (((uint64_t)v1)<<32 ) | ((uint64_t)v2); - return 0; -} - -int toku_fread_bool (FILE *f, bool *v, struct x1764 *mm, uint32_t *len) { - uint8_t iv; - int r = toku_fread_uint8_t(f, &iv, mm, len); - if (r == 0) { - *v = (iv!=0); - } - return r; -} - -int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, uint32_t *len) { - return toku_fread_uint64_t (f, &lsn->lsn, checksum, len); -} - -int toku_fread_BLOCKNUM (FILE *f, BLOCKNUM *b, struct x1764 *checksum, uint32_t *len) { - return toku_fread_uint64_t (f, (uint64_t*)&b->b, checksum, len); -} - -int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, uint32_t *len) { - return toku_fread_uint32_t (f, &filenum->fileid, checksum, len); -} - -int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, uint32_t *len) { - return toku_fread_uint64_t (f, txnid, checksum, len); -} - -int toku_fread_TXNID_PAIR (FILE *f, TXNID_PAIR *txnid, struct x1764 *checksum, uint32_t *len) { - TXNID parent; - TXNID child; - int r; - r = toku_fread_TXNID(f, &parent, checksum, len); if (r != 0) { return r; } - r = toku_fread_TXNID(f, &child, checksum, len); if (r != 0) { return r; } - txnid->parent_id64 = parent; - txnid->child_id64 = child; - return 0; -} - - -int toku_fread_XIDP (FILE *f, XIDP *xidp, struct x1764 *checksum, uint32_t *len) { - // These reads are verbose because XA defined the fields as "long", but we use 4 bytes, 1 byte and 1 byte respectively. - TOKU_XA_XID *XMALLOC(xid); - { - uint32_t formatID; - int r = toku_fread_uint32_t(f, &formatID, checksum, len); - if (r!=0) return r; - xid->formatID = formatID; - } - { - uint8_t gtrid_length; - int r = toku_fread_uint8_t (f, >rid_length, checksum, len); - if (r!=0) return r; - xid->gtrid_length = gtrid_length; - } - { - uint8_t bqual_length; - int r = toku_fread_uint8_t (f, &bqual_length, checksum, len); - if (r!=0) return r; - xid->bqual_length = bqual_length; - } - for (int i=0; i< xid->gtrid_length + xid->bqual_length; i++) { - uint8_t byte; - int r = toku_fread_uint8_t(f, &byte, checksum, len); - if (r!=0) return r; - xid->data[i] = byte; - } - *xidp = xid; - return 0; -} - -// fills in the bs with malloced data. -int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, uint32_t *len) { - int r=toku_fread_uint32_t(f, (uint32_t*)&bs->len, checksum, len); - if (r!=0) return r; - XMALLOC_N(bs->len, bs->data); - uint32_t i; - for (i=0; ilen; i++) { - r=toku_fread_uint8_t(f, (uint8_t*)&bs->data[i], checksum, len); - if (r!=0) { - toku_free(bs->data); - bs->data=0; - return r; - } - } - return 0; -} - -// fills in the fs with malloced data. -int toku_fread_FILENUMS (FILE *f, FILENUMS *fs, struct x1764 *checksum, uint32_t *len) { - int r=toku_fread_uint32_t(f, (uint32_t*)&fs->num, checksum, len); - if (r!=0) return r; - XMALLOC_N(fs->num, fs->filenums); - uint32_t i; - for (i=0; inum; i++) { - r=toku_fread_FILENUM (f, &fs->filenums[i], checksum, len); - if (r!=0) { - toku_free(fs->filenums); - fs->filenums=0; - return r; - } - } - return 0; -} - -int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { - LSN v; - int r = toku_fread_LSN(inf, &v, checksum, len); - if (r!=0) return r; - fprintf(outf, " %s=%" PRIu64, fieldname, v.lsn); - return 0; -} - -int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { - TXNID v; - int r = toku_fread_TXNID(inf, &v, checksum, len); - if (r!=0) return r; - fprintf(outf, " %s=%" PRIu64, fieldname, v); - return 0; -} - -int toku_logprint_TXNID_PAIR (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { - TXNID_PAIR v; - int r = toku_fread_TXNID_PAIR(inf, &v, checksum, len); - if (r!=0) return r; - fprintf(outf, " %s=%" PRIu64 ",%" PRIu64, fieldname, v.parent_id64, v.child_id64); - return 0; -} - -int toku_logprint_XIDP (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { - XIDP vp; - int r = toku_fread_XIDP(inf, &vp, checksum, len); - if (r!=0) return r; - fprintf(outf, "%s={formatID=0x%lx gtrid_length=%ld bqual_length=%ld data=", fieldname, vp->formatID, vp->gtrid_length, vp->bqual_length); - toku_print_bytes(outf, vp->gtrid_length + vp->bqual_length, vp->data); - fprintf(outf, "}"); - toku_free(vp); - return 0; -} - -int toku_logprint_uint8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format) { - uint8_t v; - int r = toku_fread_uint8_t(inf, &v, checksum, len); - if (r!=0) return r; - fprintf(outf, " %s=%d", fieldname, v); - if (format) fprintf(outf, format, v); - else if (v=='\'') fprintf(outf, "('\'')"); - else if (isprint(v)) fprintf(outf, "('%c')", v); - else {}/*nothing*/ - return 0; -} - -int toku_logprint_uint32_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format) { - uint32_t v; - int r = toku_fread_uint32_t(inf, &v, checksum, len); - if (r!=0) return r; - fprintf(outf, " %s=", fieldname); - fprintf(outf, format ? format : "%d", v); - return 0; -} - -int toku_logprint_uint64_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format) { - uint64_t v; - int r = toku_fread_uint64_t(inf, &v, checksum, len); - if (r!=0) return r; - fprintf(outf, " %s=", fieldname); - fprintf(outf, format ? format : "%" PRId64, v); - return 0; -} - -int toku_logprint_bool (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { - bool v; - int r = toku_fread_bool(inf, &v, checksum, len); - if (r!=0) return r; - fprintf(outf, " %s=%s", fieldname, v ? "true" : "false"); - return 0; - -} - -void toku_print_BYTESTRING (FILE *outf, uint32_t len, char *data) { - fprintf(outf, "{len=%u data=", len); - toku_print_bytes(outf, len, data); - fprintf(outf, "}"); - -} - -int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { - BYTESTRING bs; - int r = toku_fread_BYTESTRING(inf, &bs, checksum, len); - if (r!=0) return r; - fprintf(outf, " %s=", fieldname); - toku_print_BYTESTRING(outf, bs.len, bs.data); - toku_free(bs.data); - return 0; -} - -int toku_logprint_BLOCKNUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format) { - return toku_logprint_uint64_t(outf, inf, fieldname, checksum, len, format); - -} - -int toku_logprint_FILENUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format) { - return toku_logprint_uint32_t(outf, inf, fieldname, checksum, len, format); - -} - -static void -toku_print_FILENUMS (FILE *outf, uint32_t num, FILENUM *filenums) { - fprintf(outf, "{num=%u filenums=\"", num); - uint32_t i; - for (i=0; i0) - fprintf(outf, ","); - fprintf(outf, "0x%" PRIx32, filenums[i].fileid); - } - fprintf(outf, "\"}"); - -} - -int toku_logprint_FILENUMS (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) { - FILENUMS bs; - int r = toku_fread_FILENUMS(inf, &bs, checksum, len); - if (r!=0) return r; - fprintf(outf, " %s=", fieldname); - toku_print_FILENUMS(outf, bs.num, bs.filenums); - toku_free(bs.filenums); - return 0; -} - -int toku_read_and_print_logmagic (FILE *f, uint32_t *versionp) { - { - char magic[8]; - int r=fread(magic, 1, 8, f); - if (r!=8) { - return DB_BADFORMAT; - } - if (memcmp(magic, "tokulogg", 8)!=0) { - return DB_BADFORMAT; - } - } - { - int version; - int r=fread(&version, 1, 4, f); - if (r!=4) { - return DB_BADFORMAT; - } - printf("tokulog v.%u\n", toku_ntohl(version)); - //version MUST be in network order regardless of disk order - *versionp=toku_ntohl(version); - } - return 0; -} - -int toku_read_logmagic (FILE *f, uint32_t *versionp) { - { - char magic[8]; - int r=fread(magic, 1, 8, f); - if (r!=8) { - return DB_BADFORMAT; - } - if (memcmp(magic, "tokulogg", 8)!=0) { - return DB_BADFORMAT; - } - } - { - int version; - int r=fread(&version, 1, 4, f); - if (r!=4) { - return DB_BADFORMAT; - } - *versionp=toku_ntohl(version); - } - return 0; -} - -TXNID_PAIR toku_txn_get_txnid (TOKUTXN txn) { - TXNID_PAIR tp = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE}; - if (txn==0) return tp; - else return txn->txnid; -} - -LSN toku_logger_last_lsn(TOKULOGGER logger) { - return logger->lsn; -} - -TOKULOGGER toku_txn_logger (TOKUTXN txn) { - return txn ? txn->logger : 0; -} - -void toku_txnid2txn(TOKULOGGER logger, TXNID_PAIR txnid, TOKUTXN *result) { - TOKUTXN root_txn = NULL; - toku_txn_manager_suspend(logger->txn_manager); - toku_txn_manager_id2txn_unlocked(logger->txn_manager, txnid, &root_txn); - if (root_txn == NULL || root_txn->txnid.child_id64 == txnid.child_id64) { - *result = root_txn; - } - else if (root_txn != NULL) { - root_txn->child_manager->suspend(); - root_txn->child_manager->find_tokutxn_by_xid_unlocked(txnid, result); - root_txn->child_manager->resume(); - } - toku_txn_manager_resume(logger->txn_manager); -} - -// Find the earliest LSN in a log. No locks are needed. -static int peek_at_log (TOKULOGGER logger, char* filename, LSN *first_lsn) { - int fd = open(filename, O_RDONLY+O_BINARY); - if (fd<0) { - int er = get_error_errno(); - if (logger->write_log_files) printf("couldn't open: %s\n", strerror(er)); - return er; - } - enum { SKIP = 12+1+4 }; // read the 12 byte header, the first message, and the first len - unsigned char header[SKIP+8]; - int r = read(fd, header, SKIP+8); - if (r!=SKIP+8) return 0; // cannot determine that it's archivable, so we'll assume no. If a later-log is archivable is then this one will be too. - - uint64_t lsn; - { - struct rbuf rb; - rb.buf = header+SKIP; - rb.size = 8; - rb.ndone = 0; - lsn = rbuf_ulonglong(&rb); - } - - r=close(fd); - if (r!=0) { return 0; } - - first_lsn->lsn=lsn; - return 0; -} - -// Return a malloc'd array of malloc'd strings which are the filenames that can be archived. -// Output permission are obtained briefly so we can get a list of the log files without conflicting. -int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags) { - if (flags!=0) return EINVAL; // don't know what to do. - int all_n_logs; - int i; - char **all_logs; - int n_logfiles; - LSN fsynced_lsn; - grab_output(logger, &fsynced_lsn); - int r = toku_logger_find_logfiles (logger->directory, &all_logs, &n_logfiles); - release_output(logger, fsynced_lsn); - if (r!=0) return r; - - for (i=0; all_logs[i]; i++); - all_n_logs=i; - // get them into increasing order - qsort(all_logs, all_n_logs, sizeof(all_logs[0]), logfilenamecompare); - - LSN save_lsn = logger->last_completed_checkpoint_lsn; - - // Now starting at the last one, look for archivable ones. - // Count the total number of bytes, because we have to return a single big array. (That's the BDB interface. Bleah...) - LSN earliest_lsn_in_logfile={(unsigned long long)(-1LL)}; - r = peek_at_log(logger, all_logs[all_n_logs-1], &earliest_lsn_in_logfile); // try to find the lsn that's in the most recent log - if (earliest_lsn_in_logfile.lsn <= save_lsn.lsn) { - i=all_n_logs-1; - } else { - for (i=all_n_logs-2; i>=0; i--) { // start at all_n_logs-2 because we never archive the most recent log - r = peek_at_log(logger, all_logs[i], &earliest_lsn_in_logfile); - if (r!=0) continue; // In case of error, just keep going - - if (earliest_lsn_in_logfile.lsn <= save_lsn.lsn) { - break; - } - } - } - - // all log files up to, but but not including, i can be archived. - int n_to_archive=i; - int count_bytes=0; - for (i=0; iparent; -} - -void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn) { - logger->last_completed_checkpoint_lsn = lsn; -} - -/////////////////////////////////////////////////////////////////////////////////// -// Engine status -// -// Status is intended for display to humans to help understand system behavior. -// It does not need to be perfectly thread-safe. - -static LOGGER_STATUS_S logger_status; - -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(logger_status, k, c, t, "logger: " l, inc) - -static void -status_init(void) { - // Note, this function initializes the keyname, type, and legend fields. - // Value fields are initialized to zero by compiler. - STATUS_INIT(LOGGER_NEXT_LSN, nullptr, UINT64, "next LSN", TOKU_ENGINE_STATUS); - STATUS_INIT(LOGGER_NUM_WRITES, LOGGER_WRITES, UINT64, "writes", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(LOGGER_BYTES_WRITTEN, LOGGER_WRITES_BYTES, UINT64, "writes (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN, LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(LOGGER_TOKUTIME_WRITES, LOGGER_WRITES_SECONDS, TOKUTIME, "writes (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(LOGGER_WAIT_BUF_LONG, LOGGER_WAIT_LONG, UINT64, "count", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - logger_status.initialized = true; -} -#undef STATUS_INIT - -#define STATUS_VALUE(x) logger_status.status[x].value.num - -void -toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS statp) { - if (!logger_status.initialized) - status_init(); - if (logger) { - STATUS_VALUE(LOGGER_NEXT_LSN) = logger->lsn.lsn; - STATUS_VALUE(LOGGER_NUM_WRITES) = logger->num_writes_to_disk; - STATUS_VALUE(LOGGER_BYTES_WRITTEN) = logger->bytes_written_to_disk; - // No compression on logfiles so the uncompressed size is just number of bytes written - STATUS_VALUE(LOGGER_UNCOMPRESSED_BYTES_WRITTEN) = logger->bytes_written_to_disk; - STATUS_VALUE(LOGGER_TOKUTIME_WRITES) = logger->time_spent_writing_to_disk; - STATUS_VALUE(LOGGER_WAIT_BUF_LONG) = logger->num_wait_buf_long; - } - *statp = logger_status; -} - - - -////////////////////////////////////////////////////////////////////////////////////////////////////// -// Used for upgrade: -// if any valid log files exist in log_dir, then -// set *found_any_logs to true and set *version_found to version number of latest log -int -toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs, uint32_t *version_found) { - bool found = false; - uint32_t highest_version = 0; - int r = 0; - - struct dirent *de; - DIR *d=opendir(log_dir); - if (d==NULL) { - r = get_error_errno(); - } - else { - // Examine every file in the directory and find highest version - while ((de=readdir(d))) { - uint32_t this_log_version; - uint64_t this_log_number; - bool is_log = is_a_logfile_any_version(de->d_name, &this_log_number, &this_log_version); - if (is_log) { - if (!found) { // first log file found - found = true; - highest_version = this_log_version; - } - else - highest_version = highest_version > this_log_version ? highest_version : this_log_version; - } - } - int r2 = closedir(d); - if (r==0) r = r2; - } - if (r==0) { - *found_any_logs = found; - if (found) - *version_found = highest_version; - } - return r; -} - -TXN_MANAGER toku_logger_get_txn_manager(TOKULOGGER logger) { - return logger->txn_manager; -} - -#undef STATUS_VALUE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/logger.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/logger.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,264 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_LOGGER_H -#define TOKU_LOGGER_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fttypes.h" -#include "ft_layout_version.h" - -enum { - TOKU_LOG_VERSION_1 = 1, - TOKU_LOG_VERSION_2 = 2, - //After 2 we linked the log version to the FT_LAYOUT VERSION. - //So it went from 2 to 13 (3-12 do not exist) - TOKU_LOG_VERSION = FT_LAYOUT_VERSION, - TOKU_LOG_MIN_SUPPORTED_VERSION = FT_LAYOUT_MIN_SUPPORTED_VERSION, -}; - -int toku_logger_create (TOKULOGGER *resultp); -int toku_logger_open (const char *directory, TOKULOGGER logger); -int toku_logger_open_with_last_xid(const char *directory, TOKULOGGER logger, TXNID last_xid); -void toku_logger_shutdown(TOKULOGGER logger); -int toku_logger_close(TOKULOGGER *loggerp); -void toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft); -int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create); -void toku_logger_close_rollback(TOKULOGGER logger); -bool toku_logger_rollback_is_open (TOKULOGGER); // return true iff the rollback is open. - -void toku_logger_fsync (TOKULOGGER logger); -void toku_logger_fsync_if_lsn_not_fsynced(TOKULOGGER logger, LSN lsn); -int toku_logger_is_open(TOKULOGGER logger); -void toku_logger_set_cachetable (TOKULOGGER logger, CACHETABLE ct); -int toku_logger_set_lg_max(TOKULOGGER logger, uint32_t lg_max); -int toku_logger_get_lg_max(TOKULOGGER logger, uint32_t *lg_maxp); -int toku_logger_set_lg_bsize(TOKULOGGER logger, uint32_t bsize); - -void toku_logger_write_log_files (TOKULOGGER logger, bool write_log_files); -void toku_logger_trim_log_files(TOKULOGGER logger, bool trim_log_files); -bool toku_logger_txns_exist(TOKULOGGER logger); - -// Restart the logger. This function is used by recovery to really start -// logging. -// Effects: Flush the current log buffer, reset the logger's lastlsn, and -// open a new log file. -// Returns: 0 if success -int toku_logger_restart(TOKULOGGER logger, LSN lastlsn); - -// Maybe trim the log entries from the log that are older than the given LSN -// Effect: find all of the log files whose largest LSN is smaller than the -// given LSN and delete them. -void toku_logger_maybe_trim_log(TOKULOGGER logger, LSN oldest_open_lsn); - -void toku_logger_log_fcreate(TOKUTXN txn, const char *fname, FILENUM filenum, uint32_t mode, uint32_t flags, uint32_t nodesize, uint32_t basementnodesize, enum toku_compression_method compression_method); -void toku_logger_log_fdelete(TOKUTXN txn, FILENUM filenum); -void toku_logger_log_fopen(TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags); - -int toku_fread_uint8_t (FILE *f, uint8_t *v, struct x1764 *mm, uint32_t *len); -int toku_fread_uint32_t_nocrclen (FILE *f, uint32_t *v); -int toku_fread_uint32_t (FILE *f, uint32_t *v, struct x1764 *checksum, uint32_t *len); -int toku_fread_uint64_t (FILE *f, uint64_t *v, struct x1764 *checksum, uint32_t *len); -int toku_fread_bool (FILE *f, bool *v, struct x1764 *checksum, uint32_t *len); -int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, uint32_t *len); -int toku_fread_BLOCKNUM (FILE *f, BLOCKNUM *lsn, struct x1764 *checksum, uint32_t *len); -int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, uint32_t *len); -int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, uint32_t *len); -int toku_fread_TXNID_PAIR (FILE *f, TXNID_PAIR *txnid, struct x1764 *checksum, uint32_t *len); -int toku_fread_XIDP (FILE *f, XIDP *xidp, struct x1764 *checksum, uint32_t *len); -int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, uint32_t *len); -int toku_fread_FILENUMS (FILE *f, FILENUMS *fs, struct x1764 *checksum, uint32_t *len); - -int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); -int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); -int toku_logprint_TXNID_PAIR (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); -int toku_logprint_XIDP (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); -int toku_logprint_uint8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); -int toku_logprint_uint32_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); -int toku_logprint_BLOCKNUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); -int toku_logprint_uint64_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); -int toku_logprint_bool (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); -void toku_print_BYTESTRING (FILE *outf, uint32_t len, char *data); -int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))); -int toku_logprint_FILENUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); -int toku_logprint_FILENUMS (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format); -int toku_read_and_print_logmagic (FILE *f, uint32_t *versionp); -int toku_read_logmagic (FILE *f, uint32_t *versionp); - -TXNID_PAIR toku_txn_get_txnid (TOKUTXN txn); -LSN toku_logger_last_lsn(TOKULOGGER logger); -TOKULOGGER toku_txn_logger (TOKUTXN txn); - -void toku_txnid2txn (TOKULOGGER logger, TXNID_PAIR txnid, TOKUTXN *result); - -int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags); - -TOKUTXN toku_logger_txn_parent (TOKUTXN txn); -void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn); - -void toku_logger_make_space_in_inbuf (TOKULOGGER logger, int n_bytes_needed); - -int toku_logger_write_inbuf (TOKULOGGER logger); -// Effect: Write the buffered data (from the inbuf) to a file. No fsync, however. -// As a side effect, the inbuf will be made empty. -// Return 0 on success, otherwise return an error number. -// Requires: The inbuf lock is currently held, and the outbuf lock is not held. -// Upon return, the inbuf lock will be held, and the outbuf lock is not held. -// However, no side effects should have been made to the logger. The lock was acquired simply to determine that the buffer will overflow if we try to put something into it. -// The inbuf lock will be released, so the operations before and after this function call will not be atomic. -// Rationale: When the buffer becomes nearly full, call this function so that more can be put in. -// Implementation note: Since the output lock is acquired first, we must release the input lock, and then grab both in the right order. - -void toku_logger_maybe_fsync (TOKULOGGER logger, LSN lsn, int do_fsync, bool holds_input_lock); -// Effect: If fsync is nonzero, then make sure that the log is flushed and synced at least up to lsn. -// Entry: Holds input lock iff 'holds_input_lock'. -// Exit: Holds no locks. - -// Discussion: How does the logger work: -// The logger has two buffers: an inbuf and an outbuf. -// There are two locks, called the inlock, and the outlock. To write, both locks must be held, and the outlock is acquired first. -// Roughly speaking, the inbuf is used to accumulate logged data, and the outbuf is used to write to disk. -// When something is to be logged we do the following: -// acquire the inlock. -// Make sure there is space in the inbuf for the logentry. (We know the size of the logentry in advance): -// if the inbuf doesn't have enough space then -// release the inlock -// acquire the outlock -// acquire the inlock -// it's possible that some other thread made space. -// if there still isn't space -// swap the inbuf and the outbuf -// release the inlock -// write the outbuf -// acquire the inlock -// release the outlock -// if the inbuf is still too small, then increase the size of the inbuf -// Increment the LSN and fill the inbuf. -// If fsync is required then -// release the inlock -// acquire the outlock -// acquire the inlock -// if the LSN has been flushed and fsynced (if so we are done. Some other thread did the flush.) -// release the locks -// if the LSN has been flushed but not fsynced up to the LSN: -// release the inlock -// fsync -// release the outlock -// otherwise: -// swap the outbuf and the inbuf -// release the inlock -// write the outbuf -// fsync -// release the outlock - -typedef enum { - LOGGER_NEXT_LSN = 0, - LOGGER_NUM_WRITES, - LOGGER_BYTES_WRITTEN, - LOGGER_UNCOMPRESSED_BYTES_WRITTEN, - LOGGER_TOKUTIME_WRITES, - LOGGER_WAIT_BUF_LONG, - LOGGER_STATUS_NUM_ROWS -} logger_status_entry; - -typedef struct { - bool initialized; - TOKU_ENGINE_STATUS_ROW_S status[LOGGER_STATUS_NUM_ROWS]; -} LOGGER_STATUS_S, *LOGGER_STATUS; - -void toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s); - -int toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs, uint32_t *version_found); - -TXN_MANAGER toku_logger_get_txn_manager(TOKULOGGER logger); - -static const TOKULOGGER NULL_logger __attribute__((__unused__)) = NULL; - -#endif /* TOKU_LOGGER_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/log.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/log.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/log.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/log.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,134 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_LOGGGER_H -#define TOKU_LOGGGER_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include - -#include "fttypes.h" -#include "memory.h" -#include "logger.h" -#include "rollback.h" -#include "recover.h" -#include "txn.h" - -struct roll_entry; - -static inline int toku_copy_BYTESTRING(BYTESTRING *target, BYTESTRING val) { - target->len = val.len; - target->data = (char *) toku_memdup(val.data, (size_t)val.len); - if (target->data==0) { - return get_error_errno(); - } - return 0; -} -static inline void toku_free_TXNID(TXNID txnid __attribute__((__unused__))) {} -static inline void toku_free_TXNID_PAIR(TXNID_PAIR txnid __attribute__((__unused__))) {} - -static inline void toku_free_LSN(LSN lsn __attribute__((__unused__))) {} -static inline void toku_free_uint64_t(uint64_t u __attribute__((__unused__))) {} -static inline void toku_free_uint32_t(uint32_t u __attribute__((__unused__))) {} -static inline void toku_free_uint8_t(uint8_t u __attribute__((__unused__))) {} -static inline void toku_free_FILENUM(FILENUM u __attribute__((__unused__))) {} -static inline void toku_free_BLOCKNUM(BLOCKNUM u __attribute__((__unused__))) {} -static inline void toku_free_bool(bool u __attribute__((__unused__))) {} -static inline void toku_free_XIDP(XIDP xidp) { toku_free(xidp); } -static inline void toku_free_BYTESTRING(BYTESTRING val) { toku_free(val.data); } -static inline void toku_free_FILENUMS(FILENUMS val) { toku_free(val.filenums); } - -int toku_maybe_upgrade_log (const char *env_dir, const char *log_dir, LSN * lsn_of_clean_shutdown, bool * upgrade_in_progress); -uint64_t toku_log_upgrade_get_footprint(void); - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/log-internal.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/log-internal.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/log-internal.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/log-internal.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,375 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef LOG_INTERNAL_H -#define LOG_INTERNAL_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include -#include -#include "ft-internal.h" -#include "log.h" -#include "toku_list.h" -#include "logfilemgr.h" -#include "txn.h" -#include "txn_manager.h" -#include "rollback_log_node_cache.h" -#include "txn_child_manager.h" - -#include - -#include -#include - -using namespace toku; -// Locking for the logger -// For most purposes we use the big ydb lock. -// To log: grab the buf lock -// If the buf would overflow, then grab the file lock, swap file&buf, release buf lock, write the file, write the entry, release the file lock -// else append to buf & release lock - -#define LOGGER_MIN_BUF_SIZE (1<<24) - -struct mylock { - toku_mutex_t lock; -}; - -static inline void ml_init(struct mylock *l) { - toku_mutex_init(&l->lock, 0); -} -static inline void ml_lock(struct mylock *l) { - toku_mutex_lock(&l->lock); -} -static inline void ml_unlock(struct mylock *l) { - toku_mutex_unlock(&l->lock); -} -static inline void ml_destroy(struct mylock *l) { - toku_mutex_destroy(&l->lock); -} - -struct logbuf { - int n_in_buf; - int buf_size; - char *buf; - LSN max_lsn_in_buf; -}; - -struct tokulogger { - struct mylock input_lock; - - toku_mutex_t output_condition_lock; // if you need both this lock and input_lock, acquire the output_lock first, then input_lock. More typical is to get the output_is_available condition to be false, and then acquire the input_lock. - toku_cond_t output_condition; // - bool output_is_available; // this is part of the predicate for the output condition. It's true if no thread is modifying the output (either doing an fsync or otherwise fiddling with the output). - - bool is_open; - bool write_log_files; - bool trim_log_files; // for test purposes - char *directory; // file system directory - DIR *dir; // descriptor for directory - int fd; - CACHETABLE ct; - int lg_max; // The size of the single file in the log. Default is 100MB in TokuDB - - // To access these, you must have the input lock - LSN lsn; // the next available lsn - struct logbuf inbuf; // data being accumulated for the write - - // To access these, you must have the output condition lock. - LSN written_lsn; // the last lsn written - LSN fsynced_lsn; // What is the LSN of the highest fsynced log entry (accessed only while holding the output lock, and updated only when the output lock and output permission are held) - LSN last_completed_checkpoint_lsn; // What is the LSN of the most recent completed checkpoint. - long long next_log_file_number; - struct logbuf outbuf; // data being written to the file - int n_in_file; // The amount of data in the current file - - // To access the logfilemgr you must have the output condition lock. - TOKULOGFILEMGR logfilemgr; - - uint32_t write_block_size; // How big should the blocks be written to various logs? - - uint64_t num_writes_to_disk; // how many times did we write to disk? - uint64_t bytes_written_to_disk; // how many bytes have been written to disk? - tokutime_t time_spent_writing_to_disk; // how much tokutime did we spend writing to disk? - uint64_t num_wait_buf_long; // how many times we waited >= 100ms for the in buf - - void (*remove_finalize_callback) (DICTIONARY_ID, void*); // ydb-level callback to be called when a transaction that ... - void * remove_finalize_callback_extra; // ... deletes a file is committed or when one that creates a file is aborted. - CACHEFILE rollback_cachefile; - rollback_log_node_cache rollback_cache; - TXN_MANAGER txn_manager; -}; - -int toku_logger_find_next_unused_log_file(const char *directory, long long *result); -int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_logfiles); - -struct txn_roll_info { - // these are number of rollback nodes and rollback entries for this txn. - // - // the current rollback node below has sequence number num_rollback_nodes - 1 - // (because they are numbered 0...num-1). often, the current rollback is - // already set to this block num, which means it exists and is available to - // log some entries. if the current rollback is NONE and the number of - // rollback nodes for this transaction is non-zero, then we will use - // the number of rollback nodes to know which sequence number to assign - // to a new one we create - uint64_t num_rollback_nodes; - uint64_t num_rollentries; - uint64_t num_rollentries_processed; - uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children. - - // spilled rollback nodes are rollback nodes that were gorged by this - // transaction, retired, and saved in a list. - - // the spilled rollback head is the block number of the first rollback node - // that makes up the rollback log chain - BLOCKNUM spilled_rollback_head; - // the spilled rollback is the block number of the last rollback node that - // makes up the rollback log chain. - BLOCKNUM spilled_rollback_tail; - // the current rollback node block number we may use. if this is ROLLBACK_NONE, - // then we need to create one and set it here before using it. - BLOCKNUM current_rollback; -}; - -struct tokutxn { - // These don't change after create: - - TXNID_PAIR txnid; - - uint64_t snapshot_txnid64; // this is the lsn of the snapshot - const TXN_SNAPSHOT_TYPE snapshot_type; - const bool for_recovery; - const TOKULOGGER logger; - const TOKUTXN parent; - // The child txn is protected by the child_txn_manager lock - // and by the user contract. The user contract states (and is - // enforced at the ydb layer) that a child txn should not be created - // while another child exists. The txn_child_manager will protect - // other threads from trying to read this value while another - // thread commits/aborts the child - TOKUTXN child; - // statically allocated child manager, if this - // txn is a root txn, this manager will be used and set to - // child_manager for this transaction and all of its children - txn_child_manager child_manager_s; - // child manager for this transaction, all of its children, - // and all of its ancestors - txn_child_manager* child_manager; - // These don't change but they're created in a way that's hard to make - // strictly const. - DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn - xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started. - XIDS xids; // Represents the xid list - - TOKUTXN snapshot_next; - TOKUTXN snapshot_prev; - - bool begin_was_logged; - bool declared_read_only; // true if the txn was declared read only when began - // These are not read until a commit, prepare, or abort starts, and - // they're "monotonic" (only go false->true) during operation: - bool do_fsync; - bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn) - - // Not used until commit, prepare, or abort starts: - LSN do_fsync_lsn; - TOKU_XA_XID xa_xid; // for prepared transactions - TXN_PROGRESS_POLL_FUNCTION progress_poll_fun; - void *progress_poll_fun_extra; - - toku_mutex_t txn_lock; - // Protected by the txn lock: - omt open_fts; // a collection of the fts that we touched. Indexed by filenum. - struct txn_roll_info roll_info; // Info used to manage rollback entries - - // mutex that protects the transition of the state variable - // the rest of the variables are used by the txn code and - // hot indexing to ensure that when hot indexing is processing a - // leafentry, a TOKUTXN cannot dissappear or change state out from - // underneath it - toku_mutex_t state_lock; - toku_cond_t state_cond; - TOKUTXN_STATE state; - uint32_t num_pin; // number of threads (all hot indexes) that want this - // txn to not transition to commit or abort - uint64_t client_id; -}; - -static inline int -txn_has_current_rollback_log(TOKUTXN txn) { - return txn->roll_info.current_rollback.b != ROLLBACK_NONE.b; -} - -static inline int -txn_has_spilled_rollback_logs(TOKUTXN txn) { - return txn->roll_info.spilled_rollback_tail.b != ROLLBACK_NONE.b; -} - -struct txninfo { - uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children. - uint32_t num_fts; - FT *open_fts; - bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn) - uint64_t num_rollback_nodes; - uint64_t num_rollentries; - BLOCKNUM spilled_rollback_head; - BLOCKNUM spilled_rollback_tail; - BLOCKNUM current_rollback; -}; - -static inline int toku_logsizeof_uint8_t (uint32_t v __attribute__((__unused__))) { - return 1; -} - -static inline int toku_logsizeof_uint32_t (uint32_t v __attribute__((__unused__))) { - return 4; -} - -static inline int toku_logsizeof_uint64_t (uint32_t v __attribute__((__unused__))) { - return 8; -} - -static inline int toku_logsizeof_bool (uint32_t v __attribute__((__unused__))) { - return 1; -} - -static inline int toku_logsizeof_FILENUM (FILENUM v __attribute__((__unused__))) { - return 4; -} - -static inline int toku_logsizeof_DISKOFF (DISKOFF v __attribute__((__unused__))) { - return 8; -} -static inline int toku_logsizeof_BLOCKNUM (BLOCKNUM v __attribute__((__unused__))) { - return 8; -} - -static inline int toku_logsizeof_LSN (LSN lsn __attribute__((__unused__))) { - return 8; -} - -static inline int toku_logsizeof_TXNID (TXNID txnid __attribute__((__unused__))) { - return 8; -} - -static inline int toku_logsizeof_TXNID_PAIR (TXNID_PAIR txnid __attribute__((__unused__))) { - return 16; -} - -static inline int toku_logsizeof_XIDP (XIDP xid) { - assert(0<=xid->gtrid_length && xid->gtrid_length<=64); - assert(0<=xid->bqual_length && xid->bqual_length<=64); - return xid->gtrid_length - + xid->bqual_length - + 4 // formatID - + 1 // gtrid_length - + 1; // bqual_length -} - -static inline int toku_logsizeof_FILENUMS (FILENUMS fs) { - static const FILENUM f = {0}; //fs could have .num==0 and then we cannot dereference - return 4 + fs.num * toku_logsizeof_FILENUM(f); -} - -static inline int toku_logsizeof_BYTESTRING (BYTESTRING bs) { - return 4+bs.len; -} - -static inline char *fixup_fname(BYTESTRING *f) { - assert(f->len>0); - char *fname = (char*)toku_xmalloc(f->len+1); - memcpy(fname, f->data, f->len); - fname[f->len]=0; - return fname; -} - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/log_upgrade.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/log_upgrade.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/log_upgrade.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/log_upgrade.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,346 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include - -#include "log-internal.h" -#include "logcursor.h" -#include "checkpoint.h" - -static uint64_t footprint = 0; // for debug and accountability - -uint64_t -toku_log_upgrade_get_footprint(void) { - return footprint; -} - -// Footprint concept here is that each function increments a different decimal digit. -// The cumulative total shows the path taken for the upgrade. -// Each function must have a single return for this to work. -#define FOOTPRINT(x) function_footprint=(x*footprint_increment) -#define FOOTPRINTSETUP(increment) uint64_t function_footprint = 0; uint64_t footprint_increment=increment; -#define FOOTPRINTCAPTURE footprint+=function_footprint; - - -// return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown -static int -verify_clean_shutdown_of_log_version_current(const char *log_dir, LSN * last_lsn, TXNID *last_xid) { - int rval = TOKUDB_UPGRADE_FAILURE; - TOKULOGCURSOR cursor = NULL; - int r; - FOOTPRINTSETUP(100); - - FOOTPRINT(1); - - r = toku_logcursor_create(&cursor, log_dir); - assert(r == 0); - struct log_entry *le = NULL; - r = toku_logcursor_last(cursor, &le); - if (r == 0) { - FOOTPRINT(2); - if (le->cmd==LT_shutdown) { - LSN lsn = le->u.shutdown.lsn; - if (last_lsn) { - *last_lsn = lsn; - } - if (last_xid) { - *last_xid = le->u.shutdown.last_xid; - } - rval = 0; - } - } - r = toku_logcursor_destroy(&cursor); - assert(r == 0); - FOOTPRINTCAPTURE; - return rval; -} - - -// return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown -static int -verify_clean_shutdown_of_log_version_old(const char *log_dir, LSN * last_lsn, TXNID *last_xid, uint32_t version) { - int rval = TOKUDB_UPGRADE_FAILURE; - int r; - FOOTPRINTSETUP(10); - - FOOTPRINT(1); - - int n_logfiles; - char **logfiles; - r = toku_logger_find_logfiles(log_dir, &logfiles, &n_logfiles); - if (r!=0) return r; - - char *basename; - TOKULOGCURSOR cursor; - struct log_entry *entry; - // Only look at newest log - // basename points to first char after last / in file pathname - basename = strrchr(logfiles[n_logfiles-1], '/') + 1; - uint32_t version_name; - long long index = -1; - r = sscanf(basename, "log%lld.tokulog%u", &index, &version_name); - assert(r==2); // found index and version - invariant(version_name == version); - assert(version>=TOKU_LOG_MIN_SUPPORTED_VERSION); - assert(version< TOKU_LOG_VERSION); //Must be old - // find last LSN - r = toku_logcursor_create_for_file(&cursor, log_dir, basename); - if (r != 0) { - goto cleanup_no_logcursor; - } - r = toku_logcursor_last(cursor, &entry); - if (r != 0) { - goto cleanup; - } - FOOTPRINT(2); - //TODO: Remove this special case once FT_LAYOUT_VERSION_19 (and older) are not supported. - if (version <= FT_LAYOUT_VERSION_19) { - if (entry->cmd==LT_shutdown_up_to_19) { - LSN lsn = entry->u.shutdown_up_to_19.lsn; - if (last_lsn) { - *last_lsn = lsn; - } - if (last_xid) { - // Use lsn as last_xid. - *last_xid = lsn.lsn; - } - rval = 0; - } - } - else if (entry->cmd==LT_shutdown) { - LSN lsn = entry->u.shutdown.lsn; - if (last_lsn) { - *last_lsn = lsn; - } - if (last_xid) { - *last_xid = entry->u.shutdown.last_xid; - } - rval = 0; - } -cleanup: - r = toku_logcursor_destroy(&cursor); - assert(r == 0); -cleanup_no_logcursor: - for(int i=0;i TOKU_LOG_VERSION) - r = TOKUDB_DICTIONARY_TOO_NEW; - else if (version_of_logs_on_disk < TOKU_LOG_MIN_SUPPORTED_VERSION) - r = TOKUDB_DICTIONARY_TOO_OLD; - else if (version_of_logs_on_disk == TOKU_LOG_VERSION) - r = 0; //Logs are up to date - else { - FOOTPRINT(4); - LSN last_lsn = ZERO_LSN; - TXNID last_xid = TXNID_NONE; - r = verify_clean_shutdown_of_log_version(log_dir, version_of_logs_on_disk, &last_lsn, &last_xid); - if (r != 0) { - goto cleanup; - } - FOOTPRINT(5); - *lsn_of_clean_shutdown = last_lsn; - *upgrade_in_progress = true; - r = upgrade_log(env_dir, log_dir, last_lsn, last_xid); - } -cleanup: - { - //Clean up - int rc; - rc = toku_recover_unlock(lockfd); - if (r==0) r = rc; - } -cleanup_no_lock: - FOOTPRINTCAPTURE; - return r; -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/minicron.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/minicron.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/minicron.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/minicron.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,248 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "$Id$" - -#include -#include -#include - -#include "toku_assert.h" -#include "fttypes.h" -#include "minicron.h" - -static void -toku_gettime (toku_timespec_t *a) { - struct timeval tv; - gettimeofday(&tv, 0); - a->tv_sec = tv.tv_sec; - a->tv_nsec = tv.tv_usec * 1000LL; -} - - -static int -timespec_compare (toku_timespec_t *a, toku_timespec_t *b) { - if (a->tv_sec > b->tv_sec) return 1; - if (a->tv_sec < b->tv_sec) return -1; - if (a->tv_nsec > b->tv_nsec) return 1; - if (a->tv_nsec < b->tv_nsec) return -1; - return 0; -} - -// Implementation notes: -// When calling do_shutdown or change_period, the mutex is obtained, the variables in the minicron struct are modified, and -// the condition variable is signalled. Possibly the minicron thread will miss the signal. To avoid this problem, whenever -// the minicron thread acquires the mutex, it must check to see what the variables say to do (e.g., should it shut down?). - -static void* -minicron_do (void *pv) -{ - struct minicron *CAST_FROM_VOIDP(p, pv); - toku_mutex_lock(&p->mutex); - while (1) { - if (p->do_shutdown) { - toku_mutex_unlock(&p->mutex); - return 0; - } - if (p->period_in_ms == 0) { - // if we aren't supposed to do it then just do an untimed wait. - toku_cond_wait(&p->condvar, &p->mutex); - } - else if (p->period_in_ms <= 1000) { - toku_mutex_unlock(&p->mutex); - usleep(p->period_in_ms * 1000); - toku_mutex_lock(&p->mutex); - } - else { - // Recompute the wakeup time every time (instead of once per call to f) in case the period changges. - toku_timespec_t wakeup_at = p->time_of_last_call_to_f; - wakeup_at.tv_sec += (p->period_in_ms/1000); - wakeup_at.tv_nsec += (p->period_in_ms % 1000) * 1000000; - toku_timespec_t now; - toku_gettime(&now); - int compare = timespec_compare(&wakeup_at, &now); - // if the time to wakeup has yet to come, then we sleep - // otherwise, we continue - if (compare > 0) { - int r = toku_cond_timedwait(&p->condvar, &p->mutex, &wakeup_at); - if (r!=0 && r!=ETIMEDOUT) fprintf(stderr, "%s:%d r=%d (%s)", __FILE__, __LINE__, r, strerror(r)); - assert(r==0 || r==ETIMEDOUT); - } - } - // Now we woke up, and we should figure out what to do - if (p->do_shutdown) { - toku_mutex_unlock(&p->mutex); - return 0; - } - if (p->period_in_ms > 1000) { - toku_timespec_t now; - toku_gettime(&now); - toku_timespec_t time_to_call = p->time_of_last_call_to_f; - time_to_call.tv_sec += p->period_in_ms/1000; - time_to_call.tv_nsec += (p->period_in_ms % 1000) * 1000000; - int compare = timespec_compare(&time_to_call, &now); - if (compare <= 0) { - toku_gettime(&p->time_of_last_call_to_f); // the measured period includes the time to make the call. - toku_mutex_unlock(&p->mutex); - int r = p->f(p->arg); - assert(r==0); - toku_mutex_lock(&p->mutex); - - } - } - else if (p->period_in_ms != 0) { - toku_mutex_unlock(&p->mutex); - int r = p->f(p->arg); - assert(r==0); - toku_mutex_lock(&p->mutex); - } - } -} - -int -toku_minicron_setup(struct minicron *p, uint32_t period_in_ms, int(*f)(void *), void *arg) -{ - p->f = f; - p->arg = arg; - toku_gettime(&p->time_of_last_call_to_f); - //printf("now=%.6f", p->time_of_last_call_to_f.tv_sec + p->time_of_last_call_to_f.tv_nsec*1e-9); - p->period_in_ms = period_in_ms; - p->do_shutdown = false; - toku_mutex_init(&p->mutex, 0); - toku_cond_init (&p->condvar, 0); - return toku_pthread_create(&p->thread, 0, minicron_do, p); -} - -void -toku_minicron_change_period(struct minicron *p, uint32_t new_period) -{ - toku_mutex_lock(&p->mutex); - p->period_in_ms = new_period; - toku_cond_signal(&p->condvar); - toku_mutex_unlock(&p->mutex); -} - -/* unlocked function for use by engine status which takes no locks */ -uint32_t -toku_minicron_get_period_in_seconds_unlocked(struct minicron *p) -{ - uint32_t retval = p->period_in_ms/1000; - return retval; -} - -/* unlocked function for use by engine status which takes no locks */ -uint32_t -toku_minicron_get_period_in_ms_unlocked(struct minicron *p) -{ - uint32_t retval = p->period_in_ms; - return retval; -} - -int -toku_minicron_shutdown(struct minicron *p) { - toku_mutex_lock(&p->mutex); - assert(!p->do_shutdown); - p->do_shutdown = true; - //printf("%s:%d signalling\n", __FILE__, __LINE__); - toku_cond_signal(&p->condvar); - toku_mutex_unlock(&p->mutex); - void *returned_value; - //printf("%s:%d joining\n", __FILE__, __LINE__); - int r = toku_pthread_join(p->thread, &returned_value); - if (r!=0) fprintf(stderr, "%s:%d r=%d (%s)\n", __FILE__, __LINE__, r, strerror(r)); - assert(r==0); assert(returned_value==0); - toku_cond_destroy(&p->condvar); - toku_mutex_destroy(&p->mutex); - //printf("%s:%d shutdowned\n", __FILE__, __LINE__); - return 0; -} - -bool -toku_minicron_has_been_shutdown(struct minicron *p) { - return p->do_shutdown; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/minicron.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/minicron.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/minicron.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/minicron.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,132 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "$Id$" - -#ifndef TOKU_MINICRON_H -#define TOKU_MINICRON_H - -#include -#include -#include "fttypes.h" - - -// Specification: -// A minicron is a miniature cron job for executing a job periodically inside a pthread. -// To create a minicron, -// 1) allocate a "struct minicron" somewhere. -// Rationale: This struct can be stored inside another struct (such as the cachetable), avoiding a malloc/free pair. -// 2) call toku_minicron_setup, specifying a period (in milliseconds), a function, and some arguments. -// If the period is positive then the function is called periodically (with the period specified) -// Note: The period is measured from when the previous call to f finishes to when the new call starts. -// Thus, if the period is 5 minutes, and it takes 8 minutes to run f, then the actual periodicity is 13 minutes. -// Rationale: If f always takes longer than f to run, then it will get "behind". This module makes getting behind explicit. -// 3) When finished, call toku_minicron_shutdown. -// 4) If you want to change the period, then call toku_minicron_change_period. The time since f finished is applied to the new period -// and the call is rescheduled. (If the time since f finished is more than the new period, then f is called immediately). - -struct minicron { - toku_pthread_t thread; - toku_timespec_t time_of_last_call_to_f; - toku_mutex_t mutex; - toku_cond_t condvar; - int (*f)(void*); - void *arg; - uint32_t period_in_ms; - bool do_shutdown; -}; - -int toku_minicron_setup (struct minicron *s, uint32_t period_in_ms, int(*f)(void *), void *arg); -void toku_minicron_change_period(struct minicron *p, uint32_t new_period); -uint32_t toku_minicron_get_period_in_seconds_unlocked(struct minicron *p); -uint32_t toku_minicron_get_period_in_ms_unlocked(struct minicron *p); -int toku_minicron_shutdown(struct minicron *p); -bool toku_minicron_has_been_shutdown(struct minicron *p); - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/msg_buffer.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/msg_buffer.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/msg_buffer.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/msg_buffer.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,318 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include "ft/msg_buffer.h" +#include "util/dbt.h" + +void message_buffer::create() { + _num_entries = 0; + _memory = nullptr; + _memory_size = 0; + _memory_used = 0; +} + +void message_buffer::clone(message_buffer *src) { + _num_entries = src->_num_entries; + _memory_used = src->_memory_used; + _memory_size = src->_memory_size; + XMALLOC_N(_memory_size, _memory); + memcpy(_memory, src->_memory, _memory_size); +} + +void message_buffer::destroy() { + if (_memory != nullptr) { + toku_free(_memory); + } +} + +void message_buffer::deserialize_from_rbuf(struct rbuf *rb, + int32_t **fresh_offsets, int32_t *nfresh, + int32_t **stale_offsets, int32_t *nstale, + int32_t **broadcast_offsets, int32_t *nbroadcast) { + // read the number of messages in this buffer + int n_in_this_buffer = rbuf_int(rb); + if (fresh_offsets != nullptr) { + XMALLOC_N(n_in_this_buffer, *fresh_offsets); + } + if (stale_offsets != nullptr) { + XMALLOC_N(n_in_this_buffer, *stale_offsets); + } + if (broadcast_offsets != nullptr) { + XMALLOC_N(n_in_this_buffer, *broadcast_offsets); + } + + _resize(rb->size + 64); // rb->size is a good hint for how big the buffer will be + + // deserialize each message individually, noting whether it was fresh + // and putting its buffer offset in the appropriate offsets array + for (int i = 0; i < n_in_this_buffer; i++) { + XIDS xids; + bool is_fresh; + const ft_msg msg = ft_msg::deserialize_from_rbuf(rb, &xids, &is_fresh); + + int32_t *dest; + if (ft_msg_type_applies_once(msg.type())) { + if (is_fresh) { + dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr; + } else { + dest = stale_offsets ? *stale_offsets + (*nstale)++ : nullptr; + } + } else { + invariant(ft_msg_type_applies_all(msg.type()) || ft_msg_type_does_nothing(msg.type())); + dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr; + } + + enqueue(msg, is_fresh, dest); + toku_xids_destroy(&xids); + } + + invariant(_num_entries == n_in_this_buffer); +} + +MSN message_buffer::deserialize_from_rbuf_v13(struct rbuf *rb, + MSN *highest_unused_msn_for_upgrade, + int32_t **fresh_offsets, int32_t *nfresh, + int32_t **broadcast_offsets, int32_t *nbroadcast) { + // read the number of messages in this buffer + int n_in_this_buffer = rbuf_int(rb); + if (fresh_offsets != nullptr) { + XMALLOC_N(n_in_this_buffer, *fresh_offsets); + } + if (broadcast_offsets != nullptr) { + XMALLOC_N(n_in_this_buffer, *broadcast_offsets); + } + + // Atomically decrement the header's MSN count by the number + // of messages in the buffer. + MSN highest_msn_in_this_buffer = { + .msn = toku_sync_sub_and_fetch(&highest_unused_msn_for_upgrade->msn, n_in_this_buffer) + }; + + // Create the message buffers from the deserialized buffer. + for (int i = 0; i < n_in_this_buffer; i++) { + XIDS xids; + // There were no stale messages at this version, so call it fresh. + const bool is_fresh = true; + + // Increment our MSN, the last message should have the + // newest/highest MSN. See above for a full explanation. + highest_msn_in_this_buffer.msn++; + const ft_msg msg = ft_msg::deserialize_from_rbuf_v13(rb, highest_msn_in_this_buffer, &xids); + + int32_t *dest; + if (ft_msg_type_applies_once(msg.type())) { + dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr; + } else { + invariant(ft_msg_type_applies_all(msg.type()) || ft_msg_type_does_nothing(msg.type())); + dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr; + } + + enqueue(msg, is_fresh, dest); + toku_xids_destroy(&xids); + } + + return highest_msn_in_this_buffer; +} + +void message_buffer::_resize(size_t new_size) { + XREALLOC_N(new_size, _memory); + _memory_size = new_size; +} + +static int next_power_of_two (int n) { + int r = 4096; + while (r < n) { + r*=2; + assert(r>0); + } + return r; +} + +struct message_buffer::buffer_entry *message_buffer::get_buffer_entry(int32_t offset) const { + return (struct buffer_entry *) (_memory + offset); +} + +void message_buffer::enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset) { + int need_space_here = msg_memsize_in_buffer(msg); + int need_space_total = _memory_used + need_space_here; + if (_memory == nullptr || need_space_total > _memory_size) { + // resize the buffer to the next power of 2 greater than the needed space + int next_2 = next_power_of_two(need_space_total); + _resize(next_2); + } + uint32_t keylen = msg.kdbt()->size; + uint32_t datalen = msg.vdbt()->size; + struct buffer_entry *entry = get_buffer_entry(_memory_used); + entry->type = (unsigned char) msg.type(); + entry->msn = msg.msn(); + toku_xids_cpy(&entry->xids_s, msg.xids()); + entry->is_fresh = is_fresh; + unsigned char *e_key = toku_xids_get_end_of_array(&entry->xids_s); + entry->keylen = keylen; + memcpy(e_key, msg.kdbt()->data, keylen); + entry->vallen = datalen; + memcpy(e_key + keylen, msg.vdbt()->data, datalen); + if (offset) { + *offset = _memory_used; + } + _num_entries++; + _memory_used += need_space_here; +} + +void message_buffer::set_freshness(int32_t offset, bool is_fresh) { + struct buffer_entry *entry = get_buffer_entry(offset); + entry->is_fresh = is_fresh; +} + +bool message_buffer::get_freshness(int32_t offset) const { + struct buffer_entry *entry = get_buffer_entry(offset); + return entry->is_fresh; +} + +ft_msg message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const { + struct buffer_entry *entry = get_buffer_entry(offset); + uint32_t keylen = entry->keylen; + uint32_t vallen = entry->vallen; + enum ft_msg_type type = (enum ft_msg_type) entry->type; + MSN msn = entry->msn; + const XIDS xids = (XIDS) &entry->xids_s; + const void *key = toku_xids_get_end_of_array(xids); + const void *val = (uint8_t *) key + entry->keylen; + return ft_msg(toku_fill_dbt(keydbt, key, keylen), toku_fill_dbt(valdbt, val, vallen), type, msn, xids); +} + +void message_buffer::get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const { + struct buffer_entry *entry = get_buffer_entry(offset); + if (key != nullptr) { + toku_fill_dbt(key, toku_xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen); + } + if (msn != nullptr) { + *msn = entry->msn; + } +} + +int message_buffer::num_entries() const { + return _num_entries; +} + +size_t message_buffer::buffer_size_in_use() const { + return _memory_used; +} + +size_t message_buffer::memory_size_in_use() const { + return sizeof(*this) + _memory_used; +} + +size_t message_buffer::memory_footprint() const { + return sizeof(*this) + toku_memory_footprint(_memory, _memory_used); +} + +bool message_buffer::equals(message_buffer *other) const { + return (_memory_used == other->_memory_used && + memcmp(_memory, other->_memory, _memory_used) == 0); +} + +void message_buffer::serialize_to_wbuf(struct wbuf *wb) const { + wbuf_nocrc_int(wb, _num_entries); + struct msg_serialize_fn { + struct wbuf *wb; + msg_serialize_fn(struct wbuf *w) : wb(w) { } + int operator()(const ft_msg &msg, bool is_fresh) { + msg.serialize_to_wbuf(wb, is_fresh); + return 0; + } + } serialize_fn(wb); + iterate(serialize_fn); +} + +size_t message_buffer::msg_memsize_in_buffer(const ft_msg &msg) { + const uint32_t keylen = msg.kdbt()->size; + const uint32_t datalen = msg.vdbt()->size; + const size_t xidslen = toku_xids_get_size(msg.xids()); + return sizeof(struct buffer_entry) + keylen + datalen + xidslen - sizeof(XIDS_S); +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/msg_buffer.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/msg_buffer.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/msg_buffer.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/msg_buffer.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,181 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#include "ft/msg.h" +#include "ft/txn/xids.h" +#include "util/dbt.h" + +class message_buffer { +public: + void create(); + + void clone(message_buffer *dst); + + void destroy(); + + // effect: deserializes a message buffer from the given rbuf + // returns: *fresh_offsets (etc) malloc'd to be num_entries large and + // populated with *nfresh (etc) offsets in the message buffer + // requires: if fresh_offsets (etc) != nullptr, then nfresh != nullptr + void deserialize_from_rbuf(struct rbuf *rb, + int32_t **fresh_offsets, int32_t *nfresh, + int32_t **stale_offsets, int32_t *nstale, + int32_t **broadcast_offsets, int32_t *nbroadcast); + + // effect: deserializes a message buffer whose messages are at version 13/14 + // returns: similar to deserialize_from_rbuf(), excpet there are no stale messages + // and each message is assigned a sequential value from *highest_unused_msn_for_upgrade, + // which is modified as needed using toku_sync_fech_and_sub() + // returns: the highest MSN assigned to any message in this buffer + // requires: similar to deserialize_from_rbuf(), and highest_unused_msn_for_upgrade != nullptr + MSN deserialize_from_rbuf_v13(struct rbuf *rb, + MSN *highest_unused_msn_for_upgrade, + int32_t **fresh_offsets, int32_t *nfresh, + int32_t **broadcast_offsets, int32_t *nbroadcast); + + void enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset); + + void set_freshness(int32_t offset, bool is_fresh); + + bool get_freshness(int32_t offset) const; + + ft_msg get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const; + + void get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const; + + int num_entries() const; + + size_t buffer_size_in_use() const; + + size_t memory_size_in_use() const; + + size_t memory_footprint() const; + + template + int iterate(F &fn) const { + for (int32_t offset = 0; offset < _memory_used; ) { + DBT k, v; + const ft_msg msg = get_message(offset, &k, &v); + bool is_fresh = get_freshness(offset); + int r = fn(msg, is_fresh); + if (r != 0) { + return r; + } + offset += msg_memsize_in_buffer(msg); + } + return 0; + } + + bool equals(message_buffer *other) const; + + void serialize_to_wbuf(struct wbuf *wb) const; + + static size_t msg_memsize_in_buffer(const ft_msg &msg); + +private: + void _resize(size_t new_size); + + // If this isn't packged, the compiler aligns the xids array and we waste a lot of space + struct __attribute__((__packed__)) buffer_entry { + unsigned int keylen; + unsigned int vallen; + unsigned char type; + bool is_fresh; + MSN msn; + XIDS_S xids_s; + }; + + struct buffer_entry *get_buffer_entry(int32_t offset) const; + + int _num_entries; + char *_memory; // An array of bytes into which buffer entries are embedded. + int _memory_size; // How big is _memory + int _memory_used; // How many bytes are in use? +}; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/msg.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/msg.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/msg.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/msg.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,171 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." + +#include "portability/toku_portability.h" + +#include "ft/msg.h" +#include "ft/txn/xids.h" +#include "util/dbt.h" + +ft_msg::ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x) : + _key(key ? *key : toku_empty_dbt()), + _val(val ? *val : toku_empty_dbt()), + _type(t), _msn(m), _xids(x) { +} + +ft_msg ft_msg::deserialize_from_rbuf(struct rbuf *rb, XIDS *x, bool *is_fresh) { + const void *keyp, *valp; + uint32_t keylen, vallen; + enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb); + *is_fresh = rbuf_char(rb); + MSN m = rbuf_MSN(rb); + toku_xids_create_from_buffer(rb, x); + rbuf_bytes(rb, &keyp, &keylen); + rbuf_bytes(rb, &valp, &vallen); + + DBT k, v; + return ft_msg(toku_fill_dbt(&k, keyp, keylen), toku_fill_dbt(&v, valp, vallen), t, m, *x); +} + +ft_msg ft_msg::deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *x) { + const void *keyp, *valp; + uint32_t keylen, vallen; + enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb); + toku_xids_create_from_buffer(rb, x); + rbuf_bytes(rb, &keyp, &keylen); + rbuf_bytes(rb, &valp, &vallen); + + DBT k, v; + return ft_msg(toku_fill_dbt(&k, keyp, keylen), toku_fill_dbt(&v, valp, vallen), t, m, *x); +} + +const DBT *ft_msg::kdbt() const { + return &_key; +} + +const DBT *ft_msg::vdbt() const { + return &_val; +} + +enum ft_msg_type ft_msg::type() const { + return _type; +} + +MSN ft_msg::msn() const { + return _msn; +} + +XIDS ft_msg::xids() const { + return _xids; +} + +size_t ft_msg::total_size() const { + // Must store two 4-byte lengths + static const size_t key_val_overhead = 8; + + // 1 byte type, 1 byte freshness, then 8 byte MSN + static const size_t msg_overhead = 2 + sizeof(MSN); + + static const size_t total_overhead = key_val_overhead + msg_overhead; + + const size_t keyval_size = _key.size + _val.size; + const size_t xids_size = toku_xids_get_serialize_size(xids()); + return total_overhead + keyval_size + xids_size; +} + +void ft_msg::serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const { + wbuf_nocrc_char(wb, (unsigned char) _type); + wbuf_nocrc_char(wb, (unsigned char) is_fresh); + wbuf_MSN(wb, _msn); + wbuf_nocrc_xids(wb, _xids); + wbuf_nocrc_bytes(wb, _key.data, _key.size); + wbuf_nocrc_bytes(wb, _val.data, _val.size); +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/msg.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/msg.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/msg.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/msg.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,246 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* The purpose of this file is to provide access to the ft_msg, + * which is the ephemeral version of the messages that lives in + * a message buffer. + */ + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#include + +#include "portability/toku_assert.h" +#include "portability/toku_stdint.h" + +#include "ft/txn/xids.h" + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// Message Sequence Number (MSN) +typedef struct __toku_msn { uint64_t msn; } MSN; + +// dummy used for message construction, to be filled in when msg is applied to tree +static const MSN ZERO_MSN = { .msn = 0 }; + +// first 2^62 values reserved for messages created before Dr. No (for upgrade) +static const MSN MIN_MSN = { .msn = 1ULL << 62 }; +static const MSN MAX_MSN = { .msn = UINT64_MAX }; + +/* tree command types */ +enum ft_msg_type { + FT_NONE = 0, + FT_INSERT = 1, + FT_DELETE_ANY = 2, // Delete any matching key. This used to be called FT_DELETE. + //FT_DELETE_BOTH = 3, + FT_ABORT_ANY = 4, // Abort any commands on any matching key. + //FT_ABORT_BOTH = 5, // Abort commands that match both the key and the value + FT_COMMIT_ANY = 6, + //FT_COMMIT_BOTH = 7, + FT_COMMIT_BROADCAST_ALL = 8, // Broadcast to all leafentries, (commit all transactions). + FT_COMMIT_BROADCAST_TXN = 9, // Broadcast to all leafentries, (commit specific transaction). + FT_ABORT_BROADCAST_TXN = 10, // Broadcast to all leafentries, (commit specific transaction). + FT_INSERT_NO_OVERWRITE = 11, + FT_OPTIMIZE = 12, // Broadcast + FT_OPTIMIZE_FOR_UPGRADE = 13, // same as FT_OPTIMIZE, but record version number in leafnode + FT_UPDATE = 14, + FT_UPDATE_BROADCAST_ALL = 15 +}; + +static inline bool +ft_msg_type_applies_once(enum ft_msg_type type) +{ + bool ret_val; + switch (type) { + case FT_INSERT_NO_OVERWRITE: + case FT_INSERT: + case FT_DELETE_ANY: + case FT_ABORT_ANY: + case FT_COMMIT_ANY: + case FT_UPDATE: + ret_val = true; + break; + case FT_COMMIT_BROADCAST_ALL: + case FT_COMMIT_BROADCAST_TXN: + case FT_ABORT_BROADCAST_TXN: + case FT_OPTIMIZE: + case FT_OPTIMIZE_FOR_UPGRADE: + case FT_UPDATE_BROADCAST_ALL: + case FT_NONE: + ret_val = false; + break; + default: + assert(false); + } + return ret_val; +} + +static inline bool +ft_msg_type_applies_all(enum ft_msg_type type) +{ + bool ret_val; + switch (type) { + case FT_NONE: + case FT_INSERT_NO_OVERWRITE: + case FT_INSERT: + case FT_DELETE_ANY: + case FT_ABORT_ANY: + case FT_COMMIT_ANY: + case FT_UPDATE: + ret_val = false; + break; + case FT_COMMIT_BROADCAST_ALL: + case FT_COMMIT_BROADCAST_TXN: + case FT_ABORT_BROADCAST_TXN: + case FT_OPTIMIZE: + case FT_OPTIMIZE_FOR_UPGRADE: + case FT_UPDATE_BROADCAST_ALL: + ret_val = true; + break; + default: + assert(false); + } + return ret_val; +} + +static inline bool +ft_msg_type_does_nothing(enum ft_msg_type type) +{ + return (type == FT_NONE); +} + +class ft_msg { +public: + ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x); + + enum ft_msg_type type() const; + + MSN msn() const; + + XIDS xids() const; + + const DBT *kdbt() const; + + const DBT *vdbt() const; + + size_t total_size() const; + + void serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const; + + // deserialization goes through a static factory function so the ft msg + // API stays completely const and there's no default constructor + static ft_msg deserialize_from_rbuf(struct rbuf *rb, XIDS *xids, bool *is_fresh); + + // Version 13/14 messages did not have an msn - so `m' is the MSN + // that will be assigned to the message that gets deserialized. + static ft_msg deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *xids); + +private: + const DBT _key; + const DBT _val; + enum ft_msg_type _type; + MSN _msn; + XIDS _xids; +}; + +// For serialize / deserialize + +#include "ft/serialize/wbuf.h" + +static inline void wbuf_MSN(struct wbuf *wb, MSN msn) { + wbuf_ulonglong(wb, msn.msn); +} + +#include "ft/serialize/rbuf.h" + +static inline MSN rbuf_MSN(struct rbuf *rb) { + MSN msn = { .msn = rbuf_ulonglong(rb) }; + return msn; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/node.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/node.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/node.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/node.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,1982 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/node.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/wbuf.h" +#include "util/scoped_malloc.h" +#include "util/sort.h" + +// Effect: Fill in N as an empty ftnode. +// TODO: Rename toku_ftnode_create +void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int num_children, int layout_version, unsigned int flags) { + paranoid_invariant(layout_version != 0); + paranoid_invariant(height >= 0); + + n->max_msn_applied_to_node_on_disk = ZERO_MSN; // correct value for root node, harmless for others + n->flags = flags; + n->blocknum = blocknum; + n->layout_version = layout_version; + n->layout_version_original = layout_version; + n->layout_version_read_from_disk = layout_version; + n->height = height; + n->pivotkeys.create_empty(); + n->bp = 0; + n->n_children = num_children; + n->oldest_referenced_xid_known = TXNID_NONE; + + if (num_children > 0) { + XMALLOC_N(num_children, n->bp); + for (int i = 0; i < num_children; i++) { + BP_BLOCKNUM(n,i).b=0; + BP_STATE(n,i) = PT_INVALID; + BP_WORKDONE(n,i) = 0; + BP_INIT_TOUCHED_CLOCK(n, i); + set_BNULL(n,i); + if (height > 0) { + set_BNC(n, i, toku_create_empty_nl()); + } else { + set_BLB(n, i, toku_create_empty_bn()); + } + } + } + n->dirty = 1; // special case exception, it's okay to mark as dirty because the basements are empty + + toku_ft_status_note_ftnode(height, true); +} + +// destroys the internals of the ftnode, but it does not free the values +// that are stored +// this is common functionality for toku_ftnode_free and rebalance_ftnode_leaf +// MUST NOT do anything besides free the structures that have been allocated +void toku_destroy_ftnode_internals(FTNODE node) { + node->pivotkeys.destroy(); + for (int i = 0; i < node->n_children; i++) { + if (BP_STATE(node,i) == PT_AVAIL) { + if (node->height > 0) { + destroy_nonleaf_childinfo(BNC(node,i)); + } else { + destroy_basement_node(BLB(node, i)); + } + } else if (BP_STATE(node,i) == PT_COMPRESSED) { + SUB_BLOCK sb = BSB(node,i); + toku_free(sb->compressed_ptr); + toku_free(sb); + } else { + paranoid_invariant(is_BNULL(node, i)); + } + set_BNULL(node, i); + } + toku_free(node->bp); + node->bp = NULL; +} + +/* Frees a node, including all the stuff in the hash table. */ +void toku_ftnode_free(FTNODE *nodep) { + FTNODE node = *nodep; + toku_ft_status_note_ftnode(node->height, false); + toku_destroy_ftnode_internals(node); + toku_free(node); + *nodep = nullptr; +} + +void toku_ftnode_update_disk_stats(FTNODE ftnode, FT ft, bool for_checkpoint) { + STAT64INFO_S deltas = ZEROSTATS; + // capture deltas before rebalancing basements for serialization + deltas = toku_get_and_clear_basement_stats(ftnode); + // locking not necessary here with respect to checkpointing + // in Clayface (because of the pending lock and cachetable lock + // in toku_cachetable_begin_checkpoint) + // essentially, if we are dealing with a for_checkpoint + // parameter in a function that is called by the flush_callback, + // then the cachetable needs to ensure that this is called in a safe + // manner that does not interfere with the beginning + // of a checkpoint, which it does with the cachetable lock + // and pending lock + toku_ft_update_stats(&ft->h->on_disk_stats, deltas); + if (for_checkpoint) { + toku_ft_update_stats(&ft->checkpoint_header->on_disk_stats, deltas); + } +} + +void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node) { + for (int i = 0; i < node->n_children; i++) { + BP_BLOCKNUM(cloned_node,i) = BP_BLOCKNUM(node,i); + paranoid_invariant(BP_STATE(node,i) == PT_AVAIL); + BP_STATE(cloned_node,i) = PT_AVAIL; + BP_WORKDONE(cloned_node, i) = BP_WORKDONE(node, i); + if (node->height == 0) { + set_BLB(cloned_node, i, toku_clone_bn(BLB(node,i))); + } else { + set_BNC(cloned_node, i, toku_clone_nl(BNC(node,i))); + } + } +} + +void toku_evict_bn_from_memory(FTNODE node, int childnum, FT ft) { + // free the basement node + assert(!node->dirty); + BASEMENTNODE bn = BLB(node, childnum); + toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); + destroy_basement_node(bn); + set_BNULL(node, childnum); + BP_STATE(node, childnum) = PT_ON_DISK; +} + +BASEMENTNODE toku_detach_bn(FTNODE node, int childnum) { + assert(BP_STATE(node, childnum) == PT_AVAIL); + BASEMENTNODE bn = BLB(node, childnum); + set_BNULL(node, childnum); + BP_STATE(node, childnum) = PT_ON_DISK; + return bn; +} + +// +// Orthopush +// + +struct store_msg_buffer_offset_extra { + int32_t *offsets; + int i; +}; + +int store_msg_buffer_offset(const int32_t &offset, const uint32_t UU(idx), struct store_msg_buffer_offset_extra *const extra) __attribute__((nonnull(3))); +int store_msg_buffer_offset(const int32_t &offset, const uint32_t UU(idx), struct store_msg_buffer_offset_extra *const extra) +{ + extra->offsets[extra->i] = offset; + extra->i++; + return 0; +} + +/** + * Given pointers to offsets within a message buffer where we can find messages, + * figure out the MSN of each message, and compare those MSNs. Returns 1, + * 0, or -1 if a is larger than, equal to, or smaller than b. + */ +int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, const int32_t &bo); +int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, const int32_t &bo) +{ + MSN amsn, bmsn; + msg_buffer.get_message_key_msn(ao, nullptr, &amsn); + msg_buffer.get_message_key_msn(bo, nullptr, &bmsn); + if (amsn.msn > bmsn.msn) { + return +1; + } + if (amsn.msn < bmsn.msn) { + return -1; + } + return 0; +} + +/** + * Given a message buffer and and offset, apply the message with toku_ft_bn_apply_msg, or discard it, + * based on its MSN and the MSN of the basement node. + */ +static void +do_bn_apply_msg(FT_HANDLE ft_handle, BASEMENTNODE bn, message_buffer *msg_buffer, int32_t offset, + txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update) { + DBT k, v; + ft_msg msg = msg_buffer->get_message(offset, &k, &v); + + // The messages are being iterated over in (key,msn) order or just in + // msn order, so all the messages for one key, from one buffer, are in + // ascending msn order. So it's ok that we don't update the basement + // node's msn until the end. + if (msg.msn().msn > bn->max_msn_applied.msn) { + toku_ft_bn_apply_msg( + ft_handle->ft->cmp, + ft_handle->ft->update_fun, + bn, + msg, + gc_info, + workdone, + stats_to_update + ); + } else { + toku_ft_status_note_msn_discard(); + } + + // We must always mark message as stale since it has been marked + // (using omt::iterate_and_mark_range) + // It is possible to call do_bn_apply_msg even when it won't apply the message because + // the node containing it could have been evicted and brought back in. + msg_buffer->set_freshness(offset, false); +} + + +struct iterate_do_bn_apply_msg_extra { + FT_HANDLE t; + BASEMENTNODE bn; + NONLEAF_CHILDINFO bnc; + txn_gc_info *gc_info; + uint64_t *workdone; + STAT64INFO stats_to_update; +}; + +int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e) __attribute__((nonnull(3))); +int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e) +{ + do_bn_apply_msg(e->t, e->bn, &e->bnc->msg_buffer, offset, e->gc_info, e->workdone, e->stats_to_update); + return 0; +} + +/** + * Given the bounds of the basement node to which we will apply messages, + * find the indexes within message_tree which contain the range of + * relevant messages. + * + * The message tree contains offsets into the buffer, where messages are + * found. The pivot_bounds are the lower bound exclusive and upper bound + * inclusive, because they come from pivot keys in the tree. We want OMT + * indices, which must have the lower bound be inclusive and the upper + * bound exclusive. We will get these by telling omt::find to look + * for something strictly bigger than each of our pivot bounds. + * + * Outputs the OMT indices in lbi (lower bound inclusive) and ube (upper + * bound exclusive). + */ +template +static void +find_bounds_within_message_tree( + const toku::comparator &cmp, + const find_bounds_omt_t &message_tree, /// tree holding message buffer offsets, in which we want to look for indices + message_buffer *msg_buffer, /// message buffer in which messages are found + const pivot_bounds &bounds, /// key bounds within the basement node we're applying messages to + uint32_t *lbi, /// (output) "lower bound inclusive" (index into message_tree) + uint32_t *ube /// (output) "upper bound exclusive" (index into message_tree) + ) +{ + int r = 0; + + if (!toku_dbt_is_empty(bounds.lbe())) { + // By setting msn to MAX_MSN and by using direction of +1, we will + // get the first message greater than (in (key, msn) order) any + // message (with any msn) with the key lower_bound_exclusive. + // This will be a message we want to try applying, so it is the + // "lower bound inclusive" within the message_tree. + struct toku_msg_buffer_key_msn_heaviside_extra lbi_extra(cmp, msg_buffer, bounds.lbe(), MAX_MSN); + int32_t found_lb; + r = message_tree.template find(lbi_extra, +1, &found_lb, lbi); + if (r == DB_NOTFOUND) { + // There is no relevant data (the lower bound is bigger than + // any message in this tree), so we have no range and we're + // done. + *lbi = 0; + *ube = 0; + return; + } + if (!toku_dbt_is_empty(bounds.ubi())) { + // Check if what we found for lbi is greater than the upper + // bound inclusive that we have. If so, there are no relevant + // messages between these bounds. + const DBT *ubi = bounds.ubi(); + const int32_t offset = found_lb; + DBT found_lbidbt; + msg_buffer->get_message_key_msn(offset, &found_lbidbt, nullptr); + int c = cmp(&found_lbidbt, ubi); + // These DBTs really are both inclusive bounds, so we need + // strict inequality in order to determine that there's + // nothing between them. If they're equal, then we actually + // need to apply the message pointed to by lbi, and also + // anything with the same key but a bigger msn. + if (c > 0) { + *lbi = 0; + *ube = 0; + return; + } + } + } else { + // No lower bound given, it's negative infinity, so we start at + // the first message in the OMT. + *lbi = 0; + } + if (!toku_dbt_is_empty(bounds.ubi())) { + // Again, we use an msn of MAX_MSN and a direction of +1 to get + // the first thing bigger than the upper_bound_inclusive key. + // This is therefore the smallest thing we don't want to apply, + // and omt::iterate_on_range will not examine it. + struct toku_msg_buffer_key_msn_heaviside_extra ube_extra(cmp, msg_buffer, bounds.ubi(), MAX_MSN); + r = message_tree.template find(ube_extra, +1, nullptr, ube); + if (r == DB_NOTFOUND) { + // Couldn't find anything in the buffer bigger than our key, + // so we need to look at everything up to the end of + // message_tree. + *ube = message_tree.size(); + } + } else { + // No upper bound given, it's positive infinity, so we need to go + // through the end of the OMT. + *ube = message_tree.size(); + } +} + +/** + * For each message in the ancestor's buffer (determined by childnum) that + * is key-wise between lower_bound_exclusive and upper_bound_inclusive, + * apply the message to the basement node. We treat the bounds as minus + * or plus infinity respectively if they are NULL. Do not mark the node + * as dirty (preserve previous state of 'dirty' bit). + */ +static void +bnc_apply_messages_to_basement_node( + FT_HANDLE t, // used for comparison function + BASEMENTNODE bn, // where to apply messages + FTNODE ancestor, // the ancestor node where we can find messages to apply + int childnum, // which child buffer of ancestor contains messages we want + const pivot_bounds &bounds, // contains pivot key bounds of this basement node + txn_gc_info *gc_info, + bool* msgs_applied + ) +{ + int r; + NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum); + + // Determine the offsets in the message trees between which we need to + // apply messages from this buffer + STAT64INFO_S stats_delta = {0,0}; + uint64_t workdone_this_ancestor = 0; + + uint32_t stale_lbi, stale_ube; + if (!bn->stale_ancestor_messages_applied) { + find_bounds_within_message_tree(t->ft->cmp, bnc->stale_message_tree, &bnc->msg_buffer, bounds, &stale_lbi, &stale_ube); + } else { + stale_lbi = 0; + stale_ube = 0; + } + uint32_t fresh_lbi, fresh_ube; + find_bounds_within_message_tree(t->ft->cmp, bnc->fresh_message_tree, &bnc->msg_buffer, bounds, &fresh_lbi, &fresh_ube); + + // We now know where all the messages we must apply are, so one of the + // following 4 cases will do the application, depending on which of + // the lists contains relevant messages: + // + // 1. broadcast messages and anything else, or a mix of fresh and stale + // 2. only fresh messages + // 3. only stale messages + if (bnc->broadcast_list.size() > 0 || + (stale_lbi != stale_ube && fresh_lbi != fresh_ube)) { + // We have messages in multiple trees, so we grab all + // the relevant messages' offsets and sort them by MSN, then apply + // them in MSN order. + const int buffer_size = ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + bnc->broadcast_list.size()); + toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t)); + int32_t *offsets = reinterpret_cast(offsets_buf.get()); + struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 }; + + // Populate offsets array with offsets to stale messages + r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &sfo_extra); + assert_zero(r); + + // Then store fresh offsets, and mark them to be moved to stale later. + r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &sfo_extra); + assert_zero(r); + + // Store offsets of all broadcast messages. + r = bnc->broadcast_list.iterate(&sfo_extra); + assert_zero(r); + invariant(sfo_extra.i == buffer_size); + + // Sort by MSN. + toku::sort::mergesort_r(offsets, buffer_size, bnc->msg_buffer); + + // Apply the messages in MSN order. + for (int i = 0; i < buffer_size; ++i) { + *msgs_applied = true; + do_bn_apply_msg(t, bn, &bnc->msg_buffer, offsets[i], gc_info, &workdone_this_ancestor, &stats_delta); + } + } else if (stale_lbi == stale_ube) { + // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later. + struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta }; + if (fresh_ube - fresh_lbi > 0) *msgs_applied = true; + r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &iter_extra); + assert_zero(r); + } else { + invariant(fresh_lbi == fresh_ube); + // No fresh messages to apply, we just apply stale messages. + + if (stale_ube - stale_lbi > 0) *msgs_applied = true; + struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta }; + + r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &iter_extra); + assert_zero(r); + } + // + // update stats + // + if (workdone_this_ancestor > 0) { + (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor); + } + if (stats_delta.numbytes || stats_delta.numrows) { + toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta); + } +} + +static void +apply_ancestors_messages_to_bn( + FT_HANDLE t, + FTNODE node, + int childnum, + ANCESTORS ancestors, + const pivot_bounds &bounds, + txn_gc_info *gc_info, + bool* msgs_applied + ) +{ + BASEMENTNODE curr_bn = BLB(node, childnum); + const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum); + for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) { + if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > curr_bn->max_msn_applied.msn) { + paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL); + bnc_apply_messages_to_basement_node( + t, + curr_bn, + curr_ancestors->node, + curr_ancestors->childnum, + curr_bounds, + gc_info, + msgs_applied + ); + // We don't want to check this ancestor node again if the + // next time we query it, the msn hasn't changed. + curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk; + } + } + // At this point, we know all the stale messages above this + // basement node have been applied, and any new messages will be + // fresh, so we don't need to look at stale messages for this + // basement node, unless it gets evicted (and this field becomes + // false when it's read in again). + curr_bn->stale_ancestor_messages_applied = true; +} + +void +toku_apply_ancestors_messages_to_node ( + FT_HANDLE t, + FTNODE node, + ANCESTORS ancestors, + const pivot_bounds &bounds, + bool* msgs_applied, + int child_to_read + ) +// Effect: +// Bring a leaf node up-to-date according to all the messages in the ancestors. +// If the leaf node is already up-to-date then do nothing. +// If the leaf node is not already up-to-date, then record the work done +// for that leaf in each ancestor. +// Requires: +// This is being called when pinning a leaf node for the query path. +// The entire root-to-leaf path is pinned and appears in the ancestors list. +{ + VERIFY_NODE(t, node); + paranoid_invariant(node->height == 0); + + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(t); + txn_manager_state txn_state_for_gc(txn_manager); + + TXNID oldest_referenced_xid_for_simple_gc = toku_ft_get_oldest_referenced_xid_estimate(t); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_for_simple_gc, + node->oldest_referenced_xid_known, + true); + if (!node->dirty && child_to_read >= 0) { + paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); + apply_ancestors_messages_to_bn( + t, + node, + child_to_read, + ancestors, + bounds, + &gc_info, + msgs_applied + ); + } + else { + // know we are a leaf node + // An important invariant: + // We MUST bring every available basement node for a dirty node up to date. + // flushing on the cleaner thread depends on this. This invariant + // allows the cleaner thread to just pick an internal node and flush it + // as opposed to being forced to start from the root. + for (int i = 0; i < node->n_children; i++) { + if (BP_STATE(node, i) != PT_AVAIL) { continue; } + apply_ancestors_messages_to_bn( + t, + node, + i, + ancestors, + bounds, + &gc_info, + msgs_applied + ); + } + } + VERIFY_NODE(t, node); +} + +static bool bn_needs_ancestors_messages( + FT ft, + FTNODE node, + int childnum, + const pivot_bounds &bounds, + ANCESTORS ancestors, + MSN* max_msn_applied + ) +{ + BASEMENTNODE bn = BLB(node, childnum); + const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum); + bool needs_ancestors_messages = false; + for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) { + if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > bn->max_msn_applied.msn) { + paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL); + NONLEAF_CHILDINFO bnc = BNC(curr_ancestors->node, curr_ancestors->childnum); + if (bnc->broadcast_list.size() > 0) { + needs_ancestors_messages = true; + goto cleanup; + } + if (!bn->stale_ancestor_messages_applied) { + uint32_t stale_lbi, stale_ube; + find_bounds_within_message_tree(ft->cmp, + bnc->stale_message_tree, + &bnc->msg_buffer, + curr_bounds, + &stale_lbi, + &stale_ube); + if (stale_lbi < stale_ube) { + needs_ancestors_messages = true; + goto cleanup; + } + } + uint32_t fresh_lbi, fresh_ube; + find_bounds_within_message_tree(ft->cmp, + bnc->fresh_message_tree, + &bnc->msg_buffer, + curr_bounds, + &fresh_lbi, + &fresh_ube); + if (fresh_lbi < fresh_ube) { + needs_ancestors_messages = true; + goto cleanup; + } + if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > max_msn_applied->msn) { + max_msn_applied->msn = curr_ancestors->node->max_msn_applied_to_node_on_disk.msn; + } + } + } +cleanup: + return needs_ancestors_messages; +} + +bool toku_ft_leaf_needs_ancestors_messages( + FT ft, + FTNODE node, + ANCESTORS ancestors, + const pivot_bounds &bounds, + MSN *const max_msn_in_path, + int child_to_read + ) +// Effect: Determine whether there are messages in a node's ancestors +// which must be applied to it. These messages are in the correct +// keyrange for any available basement nodes, and are in nodes with the +// correct max_msn_applied_to_node_on_disk. +// Notes: +// This is an approximate query. +// Output: +// max_msn_in_path: max of "max_msn_applied_to_node_on_disk" over +// ancestors. This is used later to update basement nodes' +// max_msn_applied values in case we don't do the full algorithm. +// Returns: +// true if there may be some such messages +// false only if there are definitely no such messages +// Rationale: +// When we pin a node with a read lock, we want to quickly determine if +// we should exchange it for a write lock in preparation for applying +// messages. If there are no messages, we don't need the write lock. +{ + paranoid_invariant(node->height == 0); + bool needs_ancestors_messages = false; + // child_to_read may be -1 in test cases + if (!node->dirty && child_to_read >= 0) { + paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); + needs_ancestors_messages = bn_needs_ancestors_messages( + ft, + node, + child_to_read, + bounds, + ancestors, + max_msn_in_path + ); + } + else { + for (int i = 0; i < node->n_children; ++i) { + if (BP_STATE(node, i) != PT_AVAIL) { continue; } + needs_ancestors_messages = bn_needs_ancestors_messages( + ft, + node, + i, + bounds, + ancestors, + max_msn_in_path + ); + if (needs_ancestors_messages) { + goto cleanup; + } + } + } +cleanup: + return needs_ancestors_messages; +} + +void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read) { + invariant(node->height == 0); + if (!node->dirty && child_to_read >= 0) { + paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); + BASEMENTNODE bn = BLB(node, child_to_read); + if (max_msn_applied.msn > bn->max_msn_applied.msn) { + // see comment below + (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn); + } + } + else { + for (int i = 0; i < node->n_children; ++i) { + if (BP_STATE(node, i) != PT_AVAIL) { continue; } + BASEMENTNODE bn = BLB(node, i); + if (max_msn_applied.msn > bn->max_msn_applied.msn) { + // This function runs in a shared access context, so to silence tools + // like DRD, we use a CAS and ignore the result. + // Any threads trying to update these basement nodes should be + // updating them to the same thing (since they all have a read lock on + // the same root-to-leaf path) so this is safe. + (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn); + } + } + } +} + +struct copy_to_stale_extra { + FT ft; + NONLEAF_CHILDINFO bnc; +}; + +int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) __attribute__((nonnull(3))); +int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) +{ + MSN msn; + DBT key; + extra->bnc->msg_buffer.get_message_key_msn(offset, &key, &msn); + struct toku_msg_buffer_key_msn_heaviside_extra heaviside_extra(extra->ft->cmp, &extra->bnc->msg_buffer, &key, msn); + int r = extra->bnc->stale_message_tree.insert(offset, heaviside_extra, nullptr); + invariant_zero(r); + return 0; +} + +void toku_ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc) { + struct copy_to_stale_extra cts_extra = { .ft = ft, .bnc = bnc }; + int r = bnc->fresh_message_tree.iterate_over_marked(&cts_extra); + invariant_zero(r); + bnc->fresh_message_tree.delete_all_marked(); +} + +void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node) { + invariant(node->height > 0); + for (int i = 0; i < node->n_children; ++i) { + if (BP_STATE(node, i) != PT_AVAIL) { + continue; + } + NONLEAF_CHILDINFO bnc = BNC(node, i); + // We can't delete things out of the fresh tree inside the above + // procedures because we're still looking at the fresh tree. Instead + // we have to move messages after we're done looking at it. + toku_ft_bnc_move_messages_to_stale(ft, bnc); + } +} + +// +// Balance // Availibility // Size + +struct rebalance_array_info { + uint32_t offset; + LEAFENTRY *le_array; + uint32_t *key_sizes_array; + const void **key_ptr_array; + static int fn(const void* key, const uint32_t keylen, const LEAFENTRY &le, + const uint32_t idx, struct rebalance_array_info *const ai) { + ai->le_array[idx+ai->offset] = le; + ai->key_sizes_array[idx+ai->offset] = keylen; + ai->key_ptr_array[idx+ai->offset] = key; + return 0; + } +}; + +// There must still be at least one child +// Requires that all messages in buffers above have been applied. +// Because all messages above have been applied, setting msn of all new basements +// to max msn of existing basements is correct. (There cannot be any messages in +// buffers above that still need to be applied.) +void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize) { + + assert(node->height == 0); + assert(node->dirty); + + uint32_t num_orig_basements = node->n_children; + // Count number of leaf entries in this leaf (num_le). + uint32_t num_le = 0; + for (uint32_t i = 0; i < num_orig_basements; i++) { + num_le += BLB_DATA(node, i)->num_klpairs(); + } + + uint32_t num_alloc = num_le ? num_le : 1; // simplify logic below by always having at least one entry per array + + // Create an array of OMTVALUE's that store all the pointers to all the data. + // Each element in leafpointers is a pointer to a leaf. + toku::scoped_malloc leafpointers_buf(sizeof(LEAFENTRY) * num_alloc); + LEAFENTRY *leafpointers = reinterpret_cast(leafpointers_buf.get()); + leafpointers[0] = NULL; + + toku::scoped_malloc key_pointers_buf(sizeof(void *) * num_alloc); + const void **key_pointers = reinterpret_cast(key_pointers_buf.get()); + key_pointers[0] = NULL; + + toku::scoped_malloc key_sizes_buf(sizeof(uint32_t) * num_alloc); + uint32_t *key_sizes = reinterpret_cast(key_sizes_buf.get()); + + // Capture pointers to old mempools' buffers (so they can be destroyed) + toku::scoped_malloc old_bns_buf(sizeof(BASEMENTNODE) * num_orig_basements); + BASEMENTNODE *old_bns = reinterpret_cast(old_bns_buf.get()); + old_bns[0] = NULL; + + uint32_t curr_le = 0; + for (uint32_t i = 0; i < num_orig_basements; i++) { + bn_data* bd = BLB_DATA(node, i); + struct rebalance_array_info ai {.offset = curr_le, .le_array = leafpointers, .key_sizes_array = key_sizes, .key_ptr_array = key_pointers }; + bd->iterate(&ai); + curr_le += bd->num_klpairs(); + } + + // Create an array that will store indexes of new pivots. + // Each element in new_pivots is the index of a pivot key. + // (Allocating num_le of them is overkill, but num_le is an upper bound.) + toku::scoped_malloc new_pivots_buf(sizeof(uint32_t) * num_alloc); + uint32_t *new_pivots = reinterpret_cast(new_pivots_buf.get()); + new_pivots[0] = 0; + + // Each element in le_sizes is the size of the leafentry pointed to by leafpointers. + toku::scoped_malloc le_sizes_buf(sizeof(size_t) * num_alloc); + size_t *le_sizes = reinterpret_cast(le_sizes_buf.get()); + le_sizes[0] = 0; + + // Create an array that will store the size of each basement. + // This is the sum of the leaf sizes of all the leaves in that basement. + // We don't know how many basements there will be, so we use num_le as the upper bound. + + // Sum of all le sizes in a single basement + toku::scoped_calloc bn_le_sizes_buf(sizeof(size_t) * num_alloc); + size_t *bn_le_sizes = reinterpret_cast(bn_le_sizes_buf.get()); + + // Sum of all key sizes in a single basement + toku::scoped_calloc bn_key_sizes_buf(sizeof(size_t) * num_alloc); + size_t *bn_key_sizes = reinterpret_cast(bn_key_sizes_buf.get()); + + // TODO 4050: All these arrays should be combined into a single array of some bn_info struct (pivot, msize, num_les). + // Each entry is the number of leafentries in this basement. (Again, num_le is overkill upper baound.) + toku::scoped_malloc num_les_this_bn_buf(sizeof(uint32_t) * num_alloc); + uint32_t *num_les_this_bn = reinterpret_cast(num_les_this_bn_buf.get()); + num_les_this_bn[0] = 0; + + // Figure out the new pivots. + // We need the index of each pivot, and for each basement we need + // the number of leaves and the sum of the sizes of the leaves (memory requirement for basement). + uint32_t curr_pivot = 0; + uint32_t num_le_in_curr_bn = 0; + uint32_t bn_size_so_far = 0; + for (uint32_t i = 0; i < num_le; i++) { + uint32_t curr_le_size = leafentry_disksize((LEAFENTRY) leafpointers[i]); + le_sizes[i] = curr_le_size; + if ((bn_size_so_far + curr_le_size + sizeof(uint32_t) + key_sizes[i] > basementnodesize) && (num_le_in_curr_bn != 0)) { + // cap off the current basement node to end with the element before i + new_pivots[curr_pivot] = i-1; + curr_pivot++; + num_le_in_curr_bn = 0; + bn_size_so_far = 0; + } + num_le_in_curr_bn++; + num_les_this_bn[curr_pivot] = num_le_in_curr_bn; + bn_le_sizes[curr_pivot] += curr_le_size; + bn_key_sizes[curr_pivot] += sizeof(uint32_t) + key_sizes[i]; // uint32_t le_offset + bn_size_so_far += curr_le_size + sizeof(uint32_t) + key_sizes[i]; + } + // curr_pivot is now the total number of pivot keys in the leaf node + int num_pivots = curr_pivot; + int num_children = num_pivots + 1; + + // now we need to fill in the new basement nodes and pivots + + // TODO: (Zardosht) this is an ugly thing right now + // Need to figure out how to properly deal with seqinsert. + // I am not happy with how this is being + // handled with basement nodes + uint32_t tmp_seqinsert = BLB_SEQINSERT(node, num_orig_basements - 1); + + // choose the max msn applied to any basement as the max msn applied to all new basements + MSN max_msn = ZERO_MSN; + for (uint32_t i = 0; i < num_orig_basements; i++) { + MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i); + max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn; + } + // remove the basement node in the node, we've saved a copy + for (uint32_t i = 0; i < num_orig_basements; i++) { + // save a reference to the old basement nodes + // we will need them to ensure that the memory + // stays intact + old_bns[i] = toku_detach_bn(node, i); + } + // Now destroy the old basements, but do not destroy leaves + toku_destroy_ftnode_internals(node); + + // now reallocate pieces and start filling them in + invariant(num_children > 0); + + node->n_children = num_children; + XCALLOC_N(num_children, node->bp); // allocate pointers to basements (bp) + for (int i = 0; i < num_children; i++) { + set_BLB(node, i, toku_create_empty_bn()); // allocate empty basements and set bp pointers + } + + // now we start to fill in the data + + // first the pivots + toku::scoped_malloc pivotkeys_buf(num_pivots * sizeof(DBT)); + DBT *pivotkeys = reinterpret_cast(pivotkeys_buf.get()); + for (int i = 0; i < num_pivots; i++) { + uint32_t size = key_sizes[new_pivots[i]]; + const void *key = key_pointers[new_pivots[i]]; + toku_fill_dbt(&pivotkeys[i], key, size); + } + node->pivotkeys.create_from_dbts(pivotkeys, num_pivots); + + uint32_t baseindex_this_bn = 0; + // now the basement nodes + for (int i = 0; i < num_children; i++) { + // put back seqinsert + BLB_SEQINSERT(node, i) = tmp_seqinsert; + + // create start (inclusive) and end (exclusive) boundaries for data of basement node + uint32_t curr_start = (i==0) ? 0 : new_pivots[i-1]+1; // index of first leaf in basement + uint32_t curr_end = (i==num_pivots) ? num_le : new_pivots[i]+1; // index of first leaf in next basement + uint32_t num_in_bn = curr_end - curr_start; // number of leaves in this basement + + // create indexes for new basement + invariant(baseindex_this_bn == curr_start); + uint32_t num_les_to_copy = num_les_this_bn[i]; + invariant(num_les_to_copy == num_in_bn); + + bn_data* bd = BLB_DATA(node, i); + bd->set_contents_as_clone_of_sorted_array( + num_les_to_copy, + &key_pointers[baseindex_this_bn], + &key_sizes[baseindex_this_bn], + &leafpointers[baseindex_this_bn], + &le_sizes[baseindex_this_bn], + bn_key_sizes[i], // Total key sizes + bn_le_sizes[i] // total le sizes + ); + + BP_STATE(node,i) = PT_AVAIL; + BP_TOUCH_CLOCK(node,i); + BLB_MAX_MSN_APPLIED(node,i) = max_msn; + baseindex_this_bn += num_les_to_copy; // set to index of next bn + } + node->max_msn_applied_to_node_on_disk = max_msn; + + // destroy buffers of old mempools + for (uint32_t i = 0; i < num_orig_basements; i++) { + destroy_basement_node(old_bns[i]); + } +} + +bool toku_ftnode_fully_in_memory(FTNODE node) { + for (int i = 0; i < node->n_children; i++) { + if (BP_STATE(node,i) != PT_AVAIL) { + return false; + } + } + return true; +} + +void toku_ftnode_assert_fully_in_memory(FTNODE UU(node)) { + paranoid_invariant(toku_ftnode_fully_in_memory(node)); +} + +uint32_t toku_ftnode_leaf_num_entries(FTNODE node) { + toku_ftnode_assert_fully_in_memory(node); + uint32_t num_entries = 0; + for (int i = 0; i < node->n_children; i++) { + num_entries += BLB_DATA(node, i)->num_klpairs(); + } + return num_entries; +} + +enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize) { + enum reactivity re = RE_STABLE; + toku_ftnode_assert_fully_in_memory(node); + paranoid_invariant(node->height==0); + unsigned int size = toku_serialize_ftnode_size(node); + if (size > nodesize && toku_ftnode_leaf_num_entries(node) > 1) { + re = RE_FISSIBLE; + } else if ((size*4) < nodesize && !BLB_SEQINSERT(node, node->n_children-1)) { + re = RE_FUSIBLE; + } + return re; +} + +enum reactivity toku_ftnode_get_nonleaf_reactivity(FTNODE node, unsigned int fanout) { + paranoid_invariant(node->height > 0); + int n_children = node->n_children; + if (n_children > (int) fanout) { + return RE_FISSIBLE; + } + if (n_children * 4 < (int) fanout) { + return RE_FUSIBLE; + } + return RE_STABLE; +} + +enum reactivity toku_ftnode_get_reactivity(FT ft, FTNODE node) { + toku_ftnode_assert_fully_in_memory(node); + if (node->height == 0) { + return toku_ftnode_get_leaf_reactivity(node, ft->h->nodesize); + } else { + return toku_ftnode_get_nonleaf_reactivity(node, ft->h->fanout); + } +} + +unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc) { + return bnc->msg_buffer.buffer_size_in_use(); +} + +// Return true if the size of the buffers plus the amount of work done is large enough. +// Return false if there is nothing to be flushed (the buffers empty). +bool toku_ftnode_nonleaf_is_gorged(FTNODE node, uint32_t nodesize) { + uint64_t size = toku_serialize_ftnode_size(node); + + bool buffers_are_empty = true; + toku_ftnode_assert_fully_in_memory(node); + // + // the nonleaf node is gorged if the following holds true: + // - the buffers are non-empty + // - the total workdone by the buffers PLUS the size of the buffers + // is greater than nodesize (which as of Maxwell should be + // 4MB) + // + paranoid_invariant(node->height > 0); + for (int child = 0; child < node->n_children; ++child) { + size += BP_WORKDONE(node, child); + } + for (int child = 0; child < node->n_children; ++child) { + if (toku_bnc_nbytesinbuf(BNC(node, child)) > 0) { + buffers_are_empty = false; + break; + } + } + return ((size > nodesize) + && + (!buffers_are_empty)); +} + +int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc) { + return bnc->msg_buffer.num_entries(); +} + +// how much memory does this child buffer consume? +long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc) { + return (sizeof(*bnc) + + bnc->msg_buffer.memory_footprint() + + bnc->fresh_message_tree.memory_size() + + bnc->stale_message_tree.memory_size() + + bnc->broadcast_list.memory_size()); +} + +// how much memory in this child buffer holds useful data? +// originally created solely for use by test program(s). +long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc) { + return (sizeof(*bnc) + + bnc->msg_buffer.memory_size_in_use() + + bnc->fresh_message_tree.memory_size() + + bnc->stale_message_tree.memory_size() + + bnc->broadcast_list.memory_size()); +} + +// +// Garbage collection +// Message injection +// Message application +// + +// Used only by test programs: append a child node to a parent node +void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey) { + int childnum = node->n_children; + node->n_children++; + REALLOC_N(node->n_children, node->bp); + BP_BLOCKNUM(node,childnum) = child->blocknum; + BP_STATE(node,childnum) = PT_AVAIL; + BP_WORKDONE(node, childnum) = 0; + set_BNC(node, childnum, toku_create_empty_nl()); + if (pivotkey) { + invariant(childnum > 0); + node->pivotkeys.insert_at(pivotkey, childnum - 1); + } + node->dirty = 1; +} + +void +toku_ft_bn_apply_msg_once ( + BASEMENTNODE bn, + const ft_msg &msg, + uint32_t idx, + uint32_t le_keylen, + LEAFENTRY le, + txn_gc_info *gc_info, + uint64_t *workdone, + STAT64INFO stats_to_update + ) +// Effect: Apply msg to leafentry (msn is ignored) +// Calculate work done by message on leafentry and add it to caller's workdone counter. +// idx is the location where it goes +// le is old leafentry +{ + size_t newsize=0, oldsize=0, workdone_this_le=0; + LEAFENTRY new_le=0; + int64_t numbytes_delta = 0; // how many bytes of user data (not including overhead) were added or deleted from this row + int64_t numrows_delta = 0; // will be +1 or -1 or 0 (if row was added or deleted or not) + uint32_t key_storage_size = msg.kdbt()->size + sizeof(uint32_t); + if (le) { + oldsize = leafentry_memsize(le) + key_storage_size; + } + + // toku_le_apply_msg() may call bn_data::mempool_malloc_and_update_dmt() to allocate more space. + // That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is + // no longer in use. We'll have to release the old mempool later. + toku_le_apply_msg( + msg, + le, + &bn->data_buffer, + idx, + le_keylen, + gc_info, + &new_le, + &numbytes_delta + ); + // at this point, we cannot trust cmd->u.id.key to be valid. + // The dmt may have realloced its mempool and freed the one containing key. + + newsize = new_le ? (leafentry_memsize(new_le) + + key_storage_size) : 0; + if (le && new_le) { + workdone_this_le = (oldsize > newsize ? oldsize : newsize); // work done is max of le size before and after message application + + } else { // we did not just replace a row, so ... + if (le) { + // ... we just deleted a row ... + workdone_this_le = oldsize; + numrows_delta = -1; + } + if (new_le) { + // ... or we just added a row + workdone_this_le = newsize; + numrows_delta = 1; + } + } + if (workdone) { // test programs may call with NULL + *workdone += workdone_this_le; + } + + // now update stat64 statistics + bn->stat64_delta.numrows += numrows_delta; + bn->stat64_delta.numbytes += numbytes_delta; + // the only reason stats_to_update may be null is for tests + if (stats_to_update) { + stats_to_update->numrows += numrows_delta; + stats_to_update->numbytes += numbytes_delta; + } + +} + +static const uint32_t setval_tag = 0xee0ccb99; // this was gotten by doing "cat /dev/random|head -c4|od -x" to get a random number. We want to make sure that the user actually passes us the setval_extra_s that we passed in. +struct setval_extra_s { + uint32_t tag; + bool did_set_val; + int setval_r; // any error code that setval_fun wants to return goes here. + // need arguments for toku_ft_bn_apply_msg_once + BASEMENTNODE bn; + MSN msn; // captured from original message, not currently used + XIDS xids; + const DBT *key; + uint32_t idx; + uint32_t le_keylen; + LEAFENTRY le; + txn_gc_info *gc_info; + uint64_t * workdone; // set by toku_ft_bn_apply_msg_once() + STAT64INFO stats_to_update; +}; + +/* + * If new_val == NULL, we send a delete message instead of an insert. + * This happens here instead of in do_delete() for consistency. + * setval_fun() is called from handlerton, passing in svextra_v + * from setval_extra_s input arg to ft->update_fun(). + */ +static void setval_fun (const DBT *new_val, void *svextra_v) { + struct setval_extra_s *CAST_FROM_VOIDP(svextra, svextra_v); + paranoid_invariant(svextra->tag==setval_tag); + paranoid_invariant(!svextra->did_set_val); + svextra->did_set_val = true; + + { + // can't leave scope until toku_ft_bn_apply_msg_once if + // this is a delete + DBT val; + ft_msg msg(svextra->key, + new_val ? new_val : toku_init_dbt(&val), + new_val ? FT_INSERT : FT_DELETE_ANY, + svextra->msn, svextra->xids); + toku_ft_bn_apply_msg_once(svextra->bn, msg, + svextra->idx, svextra->le_keylen, svextra->le, + svextra->gc_info, + svextra->workdone, svextra->stats_to_update); + svextra->setval_r = 0; + } +} + +// We are already past the msn filter (in toku_ft_bn_apply_msg(), which calls do_update()), +// so capturing the msn in the setval_extra_s is not strictly required. The alternative +// would be to put a dummy msn in the messages created by setval_fun(), but preserving +// the original msn seems cleaner and it preserves accountability at a lower layer. +static int do_update(ft_update_func update_fun, const DESCRIPTOR_S *desc, BASEMENTNODE bn, const ft_msg &msg, uint32_t idx, + LEAFENTRY le, + void* keydata, + uint32_t keylen, + txn_gc_info *gc_info, + uint64_t * workdone, + STAT64INFO stats_to_update) { + LEAFENTRY le_for_update; + DBT key; + const DBT *keyp; + const DBT *update_function_extra; + DBT vdbt; + const DBT *vdbtp; + + // the location of data depends whether this is a regular or + // broadcast update + if (msg.type() == FT_UPDATE) { + // key is passed in with command (should be same as from le) + // update function extra is passed in with command + keyp = msg.kdbt(); + update_function_extra = msg.vdbt(); + } else { + invariant(msg.type() == FT_UPDATE_BROADCAST_ALL); + // key is not passed in with broadcast, it comes from le + // update function extra is passed in with command + paranoid_invariant(le); // for broadcast updates, we just hit all leafentries + // so this cannot be null + paranoid_invariant(keydata); + paranoid_invariant(keylen); + paranoid_invariant(msg.kdbt()->size == 0); + keyp = toku_fill_dbt(&key, keydata, keylen); + update_function_extra = msg.vdbt(); + } + toku_ft_status_note_update(msg.type() == FT_UPDATE_BROADCAST_ALL); + + if (le && !le_latest_is_del(le)) { + // if the latest val exists, use it, and we'll use the leafentry later + uint32_t vallen; + void *valp = le_latest_val_and_len(le, &vallen); + vdbtp = toku_fill_dbt(&vdbt, valp, vallen); + } else { + // otherwise, the val and leafentry are both going to be null + vdbtp = NULL; + } + le_for_update = le; + + struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, msg.msn(), msg.xids(), + keyp, idx, keylen, le_for_update, gc_info, + workdone, stats_to_update}; + // call handlerton's ft->update_fun(), which passes setval_extra to setval_fun() + FAKE_DB(db, desc); + int r = update_fun( + &db, + keyp, + vdbtp, + update_function_extra, + setval_fun, &setval_extra + ); + + if (r == 0) { r = setval_extra.setval_r; } + return r; +} + +// Should be renamed as something like "apply_msg_to_basement()." +void +toku_ft_bn_apply_msg ( + const toku::comparator &cmp, + ft_update_func update_fun, + BASEMENTNODE bn, + const ft_msg &msg, + txn_gc_info *gc_info, + uint64_t *workdone, + STAT64INFO stats_to_update + ) +// Effect: +// Put a msg into a leaf. +// Calculate work done by message on leafnode and add it to caller's workdone counter. +// The leaf could end up "too big" or "too small". The caller must fix that up. +{ + LEAFENTRY storeddata; + void* key = NULL; + uint32_t keylen = 0; + + uint32_t num_klpairs; + int r; + struct toku_msg_leafval_heaviside_extra be(cmp, msg.kdbt()); + + unsigned int doing_seqinsert = bn->seqinsert; + bn->seqinsert = 0; + + switch (msg.type()) { + case FT_INSERT_NO_OVERWRITE: + case FT_INSERT: { + uint32_t idx; + if (doing_seqinsert) { + idx = bn->data_buffer.num_klpairs(); + DBT kdbt; + r = bn->data_buffer.fetch_key_and_len(idx-1, &kdbt.size, &kdbt.data); + if (r != 0) goto fz; + int c = toku_msg_leafval_heaviside(kdbt, be); + if (c >= 0) goto fz; + r = DB_NOTFOUND; + } else { + fz: + r = bn->data_buffer.find_zero( + be, + &storeddata, + &key, + &keylen, + &idx + ); + } + if (r==DB_NOTFOUND) { + storeddata = 0; + } else { + assert_zero(r); + } + toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update); + + // if the insertion point is within a window of the right edge of + // the leaf then it is sequential + // window = min(32, number of leaf entries/16) + { + uint32_t s = bn->data_buffer.num_klpairs(); + uint32_t w = s / 16; + if (w == 0) w = 1; + if (w > 32) w = 32; + + // within the window? + if (s - idx <= w) + bn->seqinsert = doing_seqinsert + 1; + } + break; + } + case FT_DELETE_ANY: + case FT_ABORT_ANY: + case FT_COMMIT_ANY: { + uint32_t idx; + // Apply to all the matches + + r = bn->data_buffer.find_zero( + be, + &storeddata, + &key, + &keylen, + &idx + ); + if (r == DB_NOTFOUND) break; + assert_zero(r); + toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update); + + break; + } + case FT_OPTIMIZE_FOR_UPGRADE: + // fall through so that optimize_for_upgrade performs rest of the optimize logic + case FT_COMMIT_BROADCAST_ALL: + case FT_OPTIMIZE: + // Apply to all leafentries + num_klpairs = bn->data_buffer.num_klpairs(); + for (uint32_t idx = 0; idx < num_klpairs; ) { + void* curr_keyp = NULL; + uint32_t curr_keylen = 0; + r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp); + assert_zero(r); + int deleted = 0; + if (!le_is_clean(storeddata)) { //If already clean, nothing to do. + // message application code needs a key in order to determine how much + // work was done by this message. since this is a broadcast message, + // we have to create a new message whose key is the current le's key. + DBT curr_keydbt; + ft_msg curr_msg(toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen), + msg.vdbt(), msg.type(), msg.msn(), msg.xids()); + toku_ft_bn_apply_msg_once(bn, curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update); + // at this point, we cannot trust msg.kdbt to be valid. + uint32_t new_dmt_size = bn->data_buffer.num_klpairs(); + if (new_dmt_size != num_klpairs) { + paranoid_invariant(new_dmt_size + 1 == num_klpairs); + //Item was deleted. + deleted = 1; + } + } + if (deleted) + num_klpairs--; + else + idx++; + } + paranoid_invariant(bn->data_buffer.num_klpairs() == num_klpairs); + + break; + case FT_COMMIT_BROADCAST_TXN: + case FT_ABORT_BROADCAST_TXN: + // Apply to all leafentries if txn is represented + num_klpairs = bn->data_buffer.num_klpairs(); + for (uint32_t idx = 0; idx < num_klpairs; ) { + void* curr_keyp = NULL; + uint32_t curr_keylen = 0; + r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp); + assert_zero(r); + int deleted = 0; + if (le_has_xids(storeddata, msg.xids())) { + // message application code needs a key in order to determine how much + // work was done by this message. since this is a broadcast message, + // we have to create a new message whose key is the current le's key. + DBT curr_keydbt; + ft_msg curr_msg(toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen), + msg.vdbt(), msg.type(), msg.msn(), msg.xids()); + toku_ft_bn_apply_msg_once(bn, curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update); + uint32_t new_dmt_size = bn->data_buffer.num_klpairs(); + if (new_dmt_size != num_klpairs) { + paranoid_invariant(new_dmt_size + 1 == num_klpairs); + //Item was deleted. + deleted = 1; + } + } + if (deleted) + num_klpairs--; + else + idx++; + } + paranoid_invariant(bn->data_buffer.num_klpairs() == num_klpairs); + + break; + case FT_UPDATE: { + uint32_t idx; + r = bn->data_buffer.find_zero( + be, + &storeddata, + &key, + &keylen, + &idx + ); + if (r==DB_NOTFOUND) { + { + //Point to msg's copy of the key so we don't worry about le being freed + //TODO: 46 MAYBE Get rid of this when le_apply message memory is better handled + key = msg.kdbt()->data; + keylen = msg.kdbt()->size; + } + r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update); + } else if (r==0) { + r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update); + } // otherwise, a worse error, just return it + break; + } + case FT_UPDATE_BROADCAST_ALL: { + // apply to all leafentries. + uint32_t idx = 0; + uint32_t num_leafentries_before; + while (idx < (num_leafentries_before = bn->data_buffer.num_klpairs())) { + void* curr_key = nullptr; + uint32_t curr_keylen = 0; + r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_key); + assert_zero(r); + + //TODO: 46 replace this with something better than cloning key + // TODO: (Zardosht) This may be unnecessary now, due to how the key + // is handled in the bndata. Investigate and determine + char clone_mem[curr_keylen]; // only lasts one loop, alloca would overflow (end of function) + memcpy((void*)clone_mem, curr_key, curr_keylen); + curr_key = (void*)clone_mem; + + // This is broken below. Have a compilation error checked + // in as a reminder + r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update); + assert_zero(r); + + if (num_leafentries_before == bn->data_buffer.num_klpairs()) { + // we didn't delete something, so increment the index. + idx++; + } + } + break; + } + case FT_NONE: break; // don't do anything + } + + return; +} + +static inline int +key_msn_cmp(const DBT *a, const DBT *b, const MSN amsn, const MSN bmsn, const toku::comparator &cmp) { + int r = cmp(a, b); + if (r == 0) { + if (amsn.msn > bmsn.msn) { + r = +1; + } else if (amsn.msn < bmsn.msn) { + r = -1; + } else { + r = 0; + } + } + return r; +} + +int toku_msg_buffer_key_msn_heaviside(const int32_t &offset, const struct toku_msg_buffer_key_msn_heaviside_extra &extra) { + MSN query_msn; + DBT query_key; + extra.msg_buffer->get_message_key_msn(offset, &query_key, &query_msn); + return key_msn_cmp(&query_key, extra.key, query_msn, extra.msn, extra.cmp); +} + +int toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo) { + MSN amsn, bmsn; + DBT akey, bkey; + extra.msg_buffer->get_message_key_msn(ao, &akey, &amsn); + extra.msg_buffer->get_message_key_msn(bo, &bkey, &bmsn); + return key_msn_cmp(&akey, &bkey, amsn, bmsn, extra.cmp); +} + +// Effect: Enqueue the message represented by the parameters into the +// bnc's buffer, and put it in either the fresh or stale message tree, +// or the broadcast list. +static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, const ft_msg &msg, bool is_fresh, const toku::comparator &cmp) { + int r = 0; + int32_t offset; + bnc->msg_buffer.enqueue(msg, is_fresh, &offset); + enum ft_msg_type type = msg.type(); + if (ft_msg_type_applies_once(type)) { + DBT key; + toku_fill_dbt(&key, msg.kdbt()->data, msg.kdbt()->size); + struct toku_msg_buffer_key_msn_heaviside_extra extra(cmp, &bnc->msg_buffer, &key, msg.msn()); + if (is_fresh) { + r = bnc->fresh_message_tree.insert(offset, extra, nullptr); + assert_zero(r); + } else { + r = bnc->stale_message_tree.insert(offset, extra, nullptr); + assert_zero(r); + } + } else { + invariant(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)); + const uint32_t idx = bnc->broadcast_list.size(); + r = bnc->broadcast_list.insert_at(offset, idx); + assert_zero(r); + } +} + +// This is only exported for tests. +void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, uint32_t keylen, const void *data, uint32_t datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp) +{ + DBT k, v; + ft_msg msg(toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, data, datalen), type, msn, xids); + bnc_insert_msg(bnc, msg, is_fresh, cmp); +} + +// append a msg to a nonleaf node's child buffer +static void ft_append_msg_to_child_buffer(const toku::comparator &cmp, FTNODE node, + int childnum, const ft_msg &msg, bool is_fresh) { + paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL); + bnc_insert_msg(BNC(node, childnum), msg, is_fresh, cmp); + node->dirty = 1; +} + +// This is only exported for tests. +void toku_ft_append_to_child_buffer(const toku::comparator &cmp, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) { + ft_msg msg(key, val, type, msn, xids); + ft_append_msg_to_child_buffer(cmp, node, childnum, msg, is_fresh); +} + +static void ft_nonleaf_msg_once_to_child(const toku::comparator &cmp, FTNODE node, int target_childnum, const ft_msg &msg, bool is_fresh, size_t flow_deltas[]) +// Previously we had passive aggressive promotion, but that causes a lot of I/O a the checkpoint. So now we are just putting it in the buffer here. +// Also we don't worry about the node getting overfull here. It's the caller's problem. +{ + unsigned int childnum = (target_childnum >= 0 + ? target_childnum + : toku_ftnode_which_child(node, msg.kdbt(), cmp)); + ft_append_msg_to_child_buffer(cmp, node, childnum, msg, is_fresh); + NONLEAF_CHILDINFO bnc = BNC(node, childnum); + bnc->flow[0] += flow_deltas[0]; + bnc->flow[1] += flow_deltas[1]; +} + +// TODO: Remove me, I'm boring. +static int ft_compare_pivot(const toku::comparator &cmp, const DBT *key, const DBT *pivot) { + return cmp(key, pivot); +} + +/* Find the leftmost child that may contain the key. + * If the key exists it will be in the child whose number + * is the return value of this function. + */ +int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &cmp) { + // a funny case of no pivots + if (node->n_children <= 1) return 0; + + DBT pivot; + + // check the last key to optimize seq insertions + int n = node->n_children-1; + int c = ft_compare_pivot(cmp, k, node->pivotkeys.fill_pivot(n - 1, &pivot)); + if (c > 0) return n; + + // binary search the pivots + int lo = 0; + int hi = n-1; // skip the last one, we checked it above + int mi; + while (lo < hi) { + mi = (lo + hi) / 2; + c = ft_compare_pivot(cmp, k, node->pivotkeys.fill_pivot(mi, &pivot)); + if (c > 0) { + lo = mi+1; + continue; + } + if (c < 0) { + hi = mi; + continue; + } + return mi; + } + return lo; +} + +// Used for HOT. +int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator &cmp) { + DBT pivot; + int low = 0; + int hi = node->n_children - 1; + int mi; + while (low < hi) { + mi = (low + hi) / 2; + int r = ft_compare_pivot(cmp, k, node->pivotkeys.fill_pivot(mi, &pivot)); + if (r > 0) { + low = mi + 1; + } else if (r < 0) { + hi = mi; + } else { + // if they were exactly equal, then we want the sub-tree under + // the next pivot. + return mi + 1; + } + } + invariant(low == hi); + return low; +} + +void toku_ftnode_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p) { + FTNODE CAST_FROM_VOIDP(node, value_data); + node->ct_pair = p; +} + +static void +ft_nonleaf_msg_all(const toku::comparator &cmp, FTNODE node, const ft_msg &msg, bool is_fresh, size_t flow_deltas[]) +// Effect: Put the message into a nonleaf node. We put it into all children, possibly causing the children to become reactive. +// We don't do the splitting and merging. That's up to the caller after doing all the puts it wants to do. +// The re_array[i] gets set to the reactivity of any modified child i. (And there may be several such children.) +{ + for (int i = 0; i < node->n_children; i++) { + ft_nonleaf_msg_once_to_child(cmp, node, i, msg, is_fresh, flow_deltas); + } +} + +static void +ft_nonleaf_put_msg(const toku::comparator &cmp, FTNODE node, int target_childnum, const ft_msg &msg, bool is_fresh, size_t flow_deltas[]) +// Effect: Put the message into a nonleaf node. We may put it into a child, possibly causing the child to become reactive. +// We don't do the splitting and merging. That's up to the caller after doing all the puts it wants to do. +// The re_array[i] gets set to the reactivity of any modified child i. (And there may be several such children.) +// +{ + + // + // see comments in toku_ft_leaf_apply_msg + // to understand why we handle setting + // node->max_msn_applied_to_node_on_disk here, + // and don't do it in toku_ftnode_put_msg + // + MSN msg_msn = msg.msn(); + invariant(msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn); + node->max_msn_applied_to_node_on_disk = msg_msn; + + if (ft_msg_type_applies_once(msg.type())) { + ft_nonleaf_msg_once_to_child(cmp, node, target_childnum, msg, is_fresh, flow_deltas); + } else if (ft_msg_type_applies_all(msg.type())) { + ft_nonleaf_msg_all(cmp, node, msg, is_fresh, flow_deltas); + } else { + paranoid_invariant(ft_msg_type_does_nothing(msg.type())); + } +} + +// Garbage collect one leaf entry. +static void +ft_basement_node_gc_once(BASEMENTNODE bn, + uint32_t index, + void* keyp, + uint32_t keylen, + LEAFENTRY leaf_entry, + txn_gc_info *gc_info, + STAT64INFO_S * delta) +{ + paranoid_invariant(leaf_entry); + + // Don't run garbage collection on non-mvcc leaf entries. + if (leaf_entry->type != LE_MVCC) { + goto exit; + } + + // Don't run garbage collection if this leafentry decides it's not worth it. + if (!toku_le_worth_running_garbage_collection(leaf_entry, gc_info)) { + goto exit; + } + + LEAFENTRY new_leaf_entry; + new_leaf_entry = NULL; + + // The mempool doesn't free itself. When it allocates new memory, + // this pointer will be set to the older memory that must now be + // freed. + void * maybe_free; + maybe_free = NULL; + + // These will represent the number of bytes and rows changed as + // part of the garbage collection. + int64_t numbytes_delta; + int64_t numrows_delta; + toku_le_garbage_collect(leaf_entry, + &bn->data_buffer, + index, + keyp, + keylen, + gc_info, + &new_leaf_entry, + &numbytes_delta); + + numrows_delta = 0; + if (new_leaf_entry) { + numrows_delta = 0; + } else { + numrows_delta = -1; + } + + // If we created a new mempool buffer we must free the + // old/original buffer. + if (maybe_free) { + toku_free(maybe_free); + } + + // Update stats. + bn->stat64_delta.numrows += numrows_delta; + bn->stat64_delta.numbytes += numbytes_delta; + delta->numrows += numrows_delta; + delta->numbytes += numbytes_delta; + +exit: + return; +} + +// Garbage collect all leaf entries for a given basement node. +static void +basement_node_gc_all_les(BASEMENTNODE bn, + txn_gc_info *gc_info, + STAT64INFO_S * delta) +{ + int r = 0; + uint32_t index = 0; + uint32_t num_leafentries_before; + while (index < (num_leafentries_before = bn->data_buffer.num_klpairs())) { + void* keyp = NULL; + uint32_t keylen = 0; + LEAFENTRY leaf_entry; + r = bn->data_buffer.fetch_klpair(index, &leaf_entry, &keylen, &keyp); + assert_zero(r); + ft_basement_node_gc_once( + bn, + index, + keyp, + keylen, + leaf_entry, + gc_info, + delta + ); + // Check if the leaf entry was deleted or not. + if (num_leafentries_before == bn->data_buffer.num_klpairs()) { + ++index; + } + } +} + +// Garbage collect all leaf entires in all basement nodes. +static void +ft_leaf_gc_all_les(FT ft, FTNODE node, txn_gc_info *gc_info) +{ + toku_ftnode_assert_fully_in_memory(node); + paranoid_invariant_zero(node->height); + // Loop through each leaf entry, garbage collecting as we go. + for (int i = 0; i < node->n_children; ++i) { + // Perform the garbage collection. + BASEMENTNODE bn = BLB(node, i); + STAT64INFO_S delta; + delta.numrows = 0; + delta.numbytes = 0; + basement_node_gc_all_les(bn, gc_info, &delta); + toku_ft_update_stats(&ft->in_memory_stats, delta); + } +} + +void toku_ftnode_leaf_run_gc(FT ft, FTNODE node) { + TOKULOGGER logger = toku_cachefile_logger(ft->cf); + if (logger) { + TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger); + txn_manager_state txn_state_for_gc(txn_manager); + txn_state_for_gc.init(); + TXNID oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager); + + // Perform full garbage collection. + // + // - txn_state_for_gc + // a fresh snapshot of the transaction system. + // - oldest_referenced_xid_for_simple_gc + // the oldest xid in any live list as of right now - suitible for simple gc + // - node->oldest_referenced_xid_known + // the last known oldest referenced xid for this node and any unapplied messages. + // it is a lower bound on the actual oldest referenced xid - but becasue there + // may be abort messages above us, we need to be careful to only use this value + // for implicit promotion (as opposed to the oldest referenced xid for simple gc) + // + // The node has its own oldest referenced xid because it must be careful not to implicitly promote + // provisional entries for transactions that are no longer live, but may have abort messages + // somewhere above us in the tree. + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_for_simple_gc, + node->oldest_referenced_xid_known, + true); + ft_leaf_gc_all_les(ft, node, &gc_info); + } +} + +void +toku_ftnode_put_msg ( + const toku::comparator &cmp, + ft_update_func update_fun, + FTNODE node, + int target_childnum, + const ft_msg &msg, + bool is_fresh, + txn_gc_info *gc_info, + size_t flow_deltas[], + STAT64INFO stats_to_update + ) +// Effect: Push message into the subtree rooted at NODE. +// If NODE is a leaf, then +// put message into leaf, applying it to the leafentries +// If NODE is a nonleaf, then push the message into the message buffer(s) of the relevent child(ren). +// The node may become overfull. That's not our problem. +{ + toku_ftnode_assert_fully_in_memory(node); + // + // see comments in toku_ft_leaf_apply_msg + // to understand why we don't handle setting + // node->max_msn_applied_to_node_on_disk here, + // and instead defer to these functions + // + if (node->height==0) { + toku_ft_leaf_apply_msg(cmp, update_fun, node, target_childnum, msg, gc_info, nullptr, stats_to_update); + } else { + ft_nonleaf_put_msg(cmp, node, target_childnum, msg, is_fresh, flow_deltas); + } +} + +// Effect: applies the message to the leaf if the appropriate basement node is in memory. +// This function is called during message injection and/or flushing, so the entire +// node MUST be in memory. +void toku_ft_leaf_apply_msg( + const toku::comparator &cmp, + ft_update_func update_fun, + FTNODE node, + int target_childnum, // which child to inject to, or -1 if unknown + const ft_msg &msg, + txn_gc_info *gc_info, + uint64_t *workdone, + STAT64INFO stats_to_update + ) +{ + VERIFY_NODE(t, node); + toku_ftnode_assert_fully_in_memory(node); + + // + // Because toku_ft_leaf_apply_msg is called with the intent of permanently + // applying a message to a leaf node (meaning the message is permanently applied + // and will be purged from the system after this call, as opposed to + // toku_apply_ancestors_messages_to_node, which applies a message + // for a query, but the message may still reside in the system and + // be reapplied later), we mark the node as dirty and + // take the opportunity to update node->max_msn_applied_to_node_on_disk. + // + node->dirty = 1; + + // + // we cannot blindly update node->max_msn_applied_to_node_on_disk, + // we must check to see if the msn is greater that the one already stored, + // because the message may have already been applied earlier (via + // toku_apply_ancestors_messages_to_node) to answer a query + // + // This is why we handle node->max_msn_applied_to_node_on_disk both here + // and in ft_nonleaf_put_msg, as opposed to in one location, toku_ftnode_put_msg. + // + MSN msg_msn = msg.msn(); + if (msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn) { + node->max_msn_applied_to_node_on_disk = msg_msn; + } + + if (ft_msg_type_applies_once(msg.type())) { + unsigned int childnum = (target_childnum >= 0 + ? target_childnum + : toku_ftnode_which_child(node, msg.kdbt(), cmp)); + BASEMENTNODE bn = BLB(node, childnum); + if (msg.msn().msn > bn->max_msn_applied.msn) { + bn->max_msn_applied = msg.msn(); + toku_ft_bn_apply_msg(cmp, + update_fun, + bn, + msg, + gc_info, + workdone, + stats_to_update); + } else { + toku_ft_status_note_msn_discard(); + } + } + else if (ft_msg_type_applies_all(msg.type())) { + for (int childnum=0; childnumn_children; childnum++) { + if (msg.msn().msn > BLB(node, childnum)->max_msn_applied.msn) { + BLB(node, childnum)->max_msn_applied = msg.msn(); + toku_ft_bn_apply_msg(cmp, + update_fun, + BLB(node, childnum), + msg, + gc_info, + workdone, + stats_to_update); + } else { + toku_ft_status_note_msn_discard(); + } + } + } + else if (!ft_msg_type_does_nothing(msg.type())) { + invariant(ft_msg_type_does_nothing(msg.type())); + } + VERIFY_NODE(t, node); +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/node.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/node.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/node.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/node.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,588 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#include "ft/bndata.h" +#include "ft/comparator.h" +#include "ft/ft.h" +#include "ft/msg_buffer.h" + +/* Pivot keys. + * Child 0's keys are <= pivotkeys[0]. + * Child 1's keys are <= pivotkeys[1]. + * Child 1's keys are > pivotkeys[0]. + * etc + */ +class ftnode_pivot_keys { +public: + // effect: create an empty set of pivot keys + void create_empty(); + + // effect: create pivot keys by copying the given DBT array + void create_from_dbts(const DBT *keys, int n); + + // effect: create pivot keys as a clone of an existing set of pivotkeys + void create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys); + + void destroy(); + + // effect: deserialize pivot keys previously serialized by serialize_to_wbuf() + void deserialize_from_rbuf(struct rbuf *rb, int n); + + // returns: unowned DBT representing the i'th pivot key + DBT get_pivot(int i) const; + + // effect: fills a DBT with the i'th pivot key + // returns: the given dbt + DBT *fill_pivot(int i, DBT *dbt) const; + + // effect: insert a pivot into the i'th position, shifting others to the right + void insert_at(const DBT *key, int i); + + // effect: append pivotkeys to the end of our own pivot keys + void append(const ftnode_pivot_keys &pivotkeys); + + // effect: replace the pivot at the i'th position + void replace_at(const DBT *key, int i); + + // effect: removes the i'th pivot key, shifting others to the left + void delete_at(int i); + + // effect: split the pivot keys, removing all pivots at position greater + // than or equal to `i' and storing them in *other + // requires: *other is empty (size == 0) + void split_at(int i, ftnode_pivot_keys *other); + + // effect: serialize pivot keys to a wbuf + // requires: wbuf has at least ftnode_pivot_keys::total_size() bytes available + void serialize_to_wbuf(struct wbuf *wb) const; + + int num_pivots() const; + + // return: the total size of this data structure + size_t total_size() const; + + // return: the sum of the keys sizes of each pivot (for serialization) + size_t serialized_size() const; + +private: + inline size_t _align4(size_t x) const { + return roundup_to_multiple(4, x); + } + + // effect: create pivot keys, in fixed key format, by copying the given key array + void _create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n); + + char *_fixed_key(int i) const { + return &_fixed_keys[i * _fixed_keylen_aligned]; + } + + bool _fixed_format() const { + return _fixed_keys != nullptr; + } + + void sanity_check() const; + + void _insert_at_dbt(const DBT *key, int i); + void _append_dbt(const ftnode_pivot_keys &pivotkeys); + void _replace_at_dbt(const DBT *key, int i); + void _delete_at_dbt(int i); + void _split_at_dbt(int i, ftnode_pivot_keys *other); + + void _insert_at_fixed(const DBT *key, int i); + void _append_fixed(const ftnode_pivot_keys &pivotkeys); + void _replace_at_fixed(const DBT *key, int i); + void _delete_at_fixed(int i); + void _split_at_fixed(int i, ftnode_pivot_keys *other); + + // adds/destroys keys at a certain index (in dbt format), + // maintaining _total_size, but not _num_pivots + void _add_key_dbt(const DBT *key, int i); + void _destroy_key_dbt(int i); + + // conversions to and from packed key array format + void _convert_to_dbt_format(); + void _convert_to_fixed_format(); + + // If every key is _fixed_keylen long, then _fixed_key is a + // packed array of keys.. + char *_fixed_keys; + // The actual length of the fixed key + size_t _fixed_keylen; + // The aligned length that we use for fixed key storage + size_t _fixed_keylen_aligned; + + // ..otherwise _fixed_keys is null and we store an array of dbts, + // each representing a key. this is simpler but less cache-efficient. + DBT *_dbt_keys; + + int _num_pivots; + size_t _total_size; +}; + +// TODO: class me up +struct ftnode { + MSN max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk + unsigned int flags; + BLOCKNUM blocknum; // Which block number is this node? + int layout_version; // What version of the data structure? + int layout_version_original; // different (<) from layout_version if upgraded from a previous version (useful for debugging) + int layout_version_read_from_disk; // transient, not serialized to disk, (useful for debugging) + uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk + int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */ + int dirty; + uint32_t fullhash; + + // for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced. + // for leaf nodes, represents number of basement nodes + int n_children; + ftnode_pivot_keys pivotkeys; + + // What's the oldest referenced xid that this node knows about? The real oldest + // referenced xid might be younger, but this is our best estimate. We use it + // as a heuristic to transition provisional mvcc entries from provisional to + // committed (from implicity committed to really committed). + // + // A better heuristic would be the oldest live txnid, but we use this since it + // still works well most of the time, and its readily available on the inject + // code path. + TXNID oldest_referenced_xid_known; + + // array of size n_children, consisting of ftnode partitions + // each one is associated with a child + // for internal nodes, the ith partition corresponds to the ith message buffer + // for leaf nodes, the ith partition corresponds to the ith basement node + struct ftnode_partition *bp; + struct ctpair *ct_pair; +}; +typedef struct ftnode *FTNODE; + +// data of an available partition of a leaf ftnode +struct ftnode_leaf_basement_node { + bn_data data_buffer; + unsigned int seqinsert; // number of sequential inserts to this leaf + MSN max_msn_applied; // max message sequence number applied + bool stale_ancestor_messages_applied; + STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk +}; +typedef struct ftnode_leaf_basement_node *BASEMENTNODE; + +enum pt_state { // declare this to be packed so that when used below it will only take 1 byte. + PT_INVALID = 0, + PT_ON_DISK = 1, + PT_COMPRESSED = 2, + PT_AVAIL = 3}; + +enum ftnode_child_tag { + BCT_INVALID = 0, + BCT_NULL, + BCT_SUBBLOCK, + BCT_LEAF, + BCT_NONLEAF +}; + +typedef toku::omt off_omt_t; +typedef toku::omt marked_off_omt_t; + +// data of an available partition of a nonleaf ftnode +struct ftnode_nonleaf_childinfo { + message_buffer msg_buffer; + off_omt_t broadcast_list; + marked_off_omt_t fresh_message_tree; + off_omt_t stale_message_tree; + uint64_t flow[2]; // current and last checkpoint +}; +typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO; + +typedef struct ftnode_child_pointer { + union { + struct sub_block *subblock; + struct ftnode_nonleaf_childinfo *nonleaf; + struct ftnode_leaf_basement_node *leaf; + } u; + enum ftnode_child_tag tag; +} FTNODE_CHILD_POINTER; + +struct ftnode_disk_data { + // + // stores the offset to the beginning of the partition on disk from the ftnode, and the length, needed to read a partition off of disk + // the value is only meaningful if the node is clean. If the node is dirty, then the value is meaningless + // The START is the distance from the end of the compressed node_info data, to the beginning of the compressed partition + // The SIZE is the size of the compressed partition. + // Rationale: We cannot store the size from the beginning of the node since we don't know how big the header will be. + // However, later when we are doing aligned writes, we won't be able to store the size from the end since we want things to align. + uint32_t start; + uint32_t size; +}; +typedef struct ftnode_disk_data *FTNODE_DISK_DATA; + +// TODO: Turn these into functions instead of macros +#define BP_START(node_dd,i) ((node_dd)[i].start) +#define BP_SIZE(node_dd,i) ((node_dd)[i].size) + +// a ftnode partition, associated with a child of a node +struct ftnode_partition { + // the following three variables are used for nonleaf nodes + // for leaf nodes, they are meaningless + BLOCKNUM blocknum; // blocknum of child + + // How many bytes worth of work was performed by messages in each buffer. + uint64_t workdone; + + // + // pointer to the partition. Depending on the state, they may be different things + // if state == PT_INVALID, then the node was just initialized and ptr == NULL + // if state == PT_ON_DISK, then ptr == NULL + // if state == PT_COMPRESSED, then ptr points to a struct sub_block* + // if state == PT_AVAIL, then ptr is: + // a struct ftnode_nonleaf_childinfo for internal nodes, + // a struct ftnode_leaf_basement_node for leaf nodes + // + struct ftnode_child_pointer ptr; + // + // at any time, the partitions may be in one of the following three states (stored in pt_state): + // PT_INVALID - means that the partition was just initialized + // PT_ON_DISK - means that the partition is not in memory and needs to be read from disk. To use, must read off disk and decompress + // PT_COMPRESSED - means that the partition is compressed in memory. To use, must decompress + // PT_AVAIL - means the partition is decompressed and in memory + // + enum pt_state state; // make this an enum to make debugging easier. + + // clock count used to for pe_callback to determine if a node should be evicted or not + // for now, saturating the count at 1 + uint8_t clock_count; +}; + +// +// TODO: Fix all these names +// Organize declarations +// Fix widespread parameter ordering inconsistencies +// +BASEMENTNODE toku_create_empty_bn(void); +BASEMENTNODE toku_create_empty_bn_no_buffer(void); // create a basement node with a null buffer. +NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo); +BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn); +NONLEAF_CHILDINFO toku_create_empty_nl(void); +void destroy_basement_node (BASEMENTNODE bn); +void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl); +void toku_destroy_ftnode_internals(FTNODE node); +void toku_ftnode_free (FTNODE *node); +bool toku_ftnode_fully_in_memory(FTNODE node); +void toku_ftnode_assert_fully_in_memory(FTNODE node); +void toku_evict_bn_from_memory(FTNODE node, int childnum, FT ft); +BASEMENTNODE toku_detach_bn(FTNODE node, int childnum); +void toku_ftnode_update_disk_stats(FTNODE ftnode, FT ft, bool for_checkpoint); +void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node); + +void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM blocknum, int height, int num_children, + int layout_version, unsigned int flags); + +int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &cmp); +void toku_ftnode_save_ct_pair(CACHEKEY key, void *value_data, PAIR p); + +// +// TODO: put the heaviside functions into their respective 'struct .*extra;' namespaces +// +struct toku_msg_buffer_key_msn_heaviside_extra { + const toku::comparator &cmp; + message_buffer *msg_buffer; + const DBT *key; + MSN msn; + toku_msg_buffer_key_msn_heaviside_extra(const toku::comparator &c, message_buffer *mb, const DBT *k, MSN m) : + cmp(c), msg_buffer(mb), key(k), msn(m) { + } +}; +int toku_msg_buffer_key_msn_heaviside(const int32_t &v, const struct toku_msg_buffer_key_msn_heaviside_extra &extra); + +struct toku_msg_buffer_key_msn_cmp_extra { + const toku::comparator &cmp; + message_buffer *msg_buffer; + toku_msg_buffer_key_msn_cmp_extra(const toku::comparator &c, message_buffer *mb) : + cmp(c), msg_buffer(mb) { + } +}; +int toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extrap, const int &a, const int &b); + +struct toku_msg_leafval_heaviside_extra { + const toku::comparator &cmp; + DBT const *const key; + toku_msg_leafval_heaviside_extra(const toku::comparator &c, const DBT *k) : + cmp(c), key(k) { + } +}; +int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_heaviside_extra &be); + +unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc); +int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc); +long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc); +long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc); +void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, uint32_t keylen, const void *data, uint32_t datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp); +void toku_bnc_empty(NONLEAF_CHILDINFO bnc); +void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known); +bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull)); + +bool toku_ftnode_nonleaf_is_gorged(FTNODE node, uint32_t nodesize); +uint32_t toku_ftnode_leaf_num_entries(FTNODE node); +void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize); + +void toku_ftnode_leaf_run_gc(FT ft, FTNODE node); + +enum reactivity { + RE_STABLE, + RE_FUSIBLE, + RE_FISSIBLE +}; + +enum reactivity toku_ftnode_get_reactivity(FT ft, FTNODE node); +enum reactivity toku_ftnode_get_nonleaf_reactivity(FTNODE node, unsigned int fanout); +enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize); + +/** + * Finds the next child for HOT to flush to, given that everything up to + * and including k has been flattened. + * + * If k falls between pivots in node, then we return the childnum where k + * lies. + * + * If k is equal to some pivot, then we return the next (to the right) + * childnum. + */ +int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator &cmp); + +void toku_ftnode_put_msg(const toku::comparator &cmp, ft_update_func update_fun, + FTNODE node, int target_childnum, + const ft_msg &msg, bool is_fresh, txn_gc_info *gc_info, + size_t flow_deltas[], STAT64INFO stats_to_update); + +void toku_ft_bn_apply_msg_once(BASEMENTNODE bn, const ft_msg &msg, uint32_t idx, + uint32_t le_keylen, LEAFENTRY le, txn_gc_info *gc_info, + uint64_t *workdonep, STAT64INFO stats_to_update); + +void toku_ft_bn_apply_msg(const toku::comparator &cmp, ft_update_func update_fun, + BASEMENTNODE bn, const ft_msg &msg, txn_gc_info *gc_info, + uint64_t *workdone, STAT64INFO stats_to_update); + +void toku_ft_leaf_apply_msg(const toku::comparator &cmp, ft_update_func update_fun, + FTNODE node, int target_childnum, + const ft_msg &msg, txn_gc_info *gc_info, + uint64_t *workdone, STAT64INFO stats_to_update); + +// +// Message management for orthopush +// + +struct ancestors { + // This is the root node if next is NULL (since the root has no ancestors) + FTNODE node; + // Which buffer holds messages destined to the node whose ancestors this list represents. + int childnum; + struct ancestors *next; +}; +typedef struct ancestors *ANCESTORS; + +void toku_ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc); + +void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node); + +// TODO: Should ft_handle just be FT? +class pivot_bounds; +void toku_apply_ancestors_messages_to_node(FT_HANDLE t, FTNODE node, ANCESTORS ancestors, + const pivot_bounds &bounds, + bool *msgs_applied, int child_to_read); + +bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancestors, + const pivot_bounds &bounds, + MSN *const max_msn_in_path, int child_to_read); + +void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read); + +struct ft_search; +int toku_ft_search_which_child(const toku::comparator &cmp, FTNODE node, ft_search *search); + +// +// internal node inline functions +// TODO: Turn the macros into real functions +// + +static inline void set_BNULL(FTNODE node, int i) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + node->bp[i].ptr.tag = BCT_NULL; +} + +static inline bool is_BNULL (FTNODE node, int i) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + return node->bp[i].ptr.tag == BCT_NULL; +} + +static inline NONLEAF_CHILDINFO BNC(FTNODE node, int i) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + FTNODE_CHILD_POINTER p = node->bp[i].ptr; + paranoid_invariant(p.tag==BCT_NONLEAF); + return p.u.nonleaf; +} + +static inline void set_BNC(FTNODE node, int i, NONLEAF_CHILDINFO nl) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; + p->tag = BCT_NONLEAF; + p->u.nonleaf = nl; +} + +static inline BASEMENTNODE BLB(FTNODE node, int i) { + paranoid_invariant(i >= 0); + // The optimizer really doesn't like it when we compare + // i to n_children as signed integers. So we assert that + // n_children is in fact positive before doing a comparison + // on the values forcibly cast to unsigned ints. + paranoid_invariant(node->n_children > 0); + paranoid_invariant((unsigned) i < (unsigned) node->n_children); + FTNODE_CHILD_POINTER p = node->bp[i].ptr; + paranoid_invariant(p.tag==BCT_LEAF); + return p.u.leaf; +} + +static inline void set_BLB(FTNODE node, int i, BASEMENTNODE bn) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; + p->tag = BCT_LEAF; + p->u.leaf = bn; +} + +static inline struct sub_block *BSB(FTNODE node, int i) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + FTNODE_CHILD_POINTER p = node->bp[i].ptr; + paranoid_invariant(p.tag==BCT_SUBBLOCK); + return p.u.subblock; +} + +static inline void set_BSB(FTNODE node, int i, struct sub_block *sb) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; + p->tag = BCT_SUBBLOCK; + p->u.subblock = sb; +} + +// ftnode partition macros +// BP stands for ftnode_partition +#define BP_BLOCKNUM(node,i) ((node)->bp[i].blocknum) +#define BP_STATE(node,i) ((node)->bp[i].state) +#define BP_WORKDONE(node, i)((node)->bp[i].workdone) + +// +// macros for managing a node's clock +// Should be managed by ft-ops.c, NOT by serialize/deserialize +// + +// +// BP_TOUCH_CLOCK uses a compare and swap because multiple threads +// that have a read lock on an internal node may try to touch the clock +// simultaneously +// +#define BP_TOUCH_CLOCK(node, i) ((node)->bp[i].clock_count = 1) +#define BP_SWEEP_CLOCK(node, i) ((node)->bp[i].clock_count = 0) +#define BP_SHOULD_EVICT(node, i) ((node)->bp[i].clock_count == 0) +// not crazy about having these two here, one is for the case where we create new +// nodes, such as in splits and creating new roots, and the other is for when +// we are deserializing a node and not all bp's are touched +#define BP_INIT_TOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 1) +#define BP_INIT_UNTOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 0) + +// ftnode leaf basementnode macros, +#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied) +#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied) +#define BLB_DATA(node,i) (&(BLB(node,i)->data_buffer)) +#define BLB_NBYTESINDATA(node,i) (BLB_DATA(node,i)->get_disk_size()) +#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/pivotkeys.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/pivotkeys.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/pivotkeys.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/pivotkeys.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,493 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include + +#include "portability/memory.h" + +#include "ft/node.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/wbuf.h" + +void ftnode_pivot_keys::create_empty() { + _num_pivots = 0; + _total_size = 0; + _fixed_keys = nullptr; + _fixed_keylen = 0; + _fixed_keylen_aligned = 0; + _dbt_keys = nullptr; +} + +void ftnode_pivot_keys::create_from_dbts(const DBT *keys, int n) { + create_empty(); + _num_pivots = n; + + // see if every key has the same length + bool keys_same_size = true; + for (int i = 1; i < _num_pivots; i++) { + if (keys[i].size != keys[i - 1].size) { + keys_same_size = false; + break; + } + } + + if (keys_same_size && _num_pivots > 0) { + // if so, store pivots in a tightly packed array of fixed length keys + _fixed_keylen = keys[0].size; + _fixed_keylen_aligned = _align4(_fixed_keylen); + _total_size = _fixed_keylen_aligned * _num_pivots; + XMALLOC_N_ALIGNED(64, _total_size, _fixed_keys); + for (int i = 0; i < _num_pivots; i++) { + invariant(keys[i].size == _fixed_keylen); + memcpy(_fixed_key(i), keys[i].data, _fixed_keylen); + } + } else { + // otherwise we'll just store the pivots in an array of dbts + XMALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys); + for (int i = 0; i < _num_pivots; i++) { + size_t size = keys[i].size; + toku_memdup_dbt(&_dbt_keys[i], keys[i].data, size); + _total_size += size; + } + } + + sanity_check(); +} + +void ftnode_pivot_keys::_create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n) { + create_empty(); + _num_pivots = n; + _fixed_keylen = fixed_keylen; + _fixed_keylen_aligned = _align4(fixed_keylen); + _total_size = _fixed_keylen_aligned * _num_pivots; + XMEMDUP_N(_fixed_keys, fixedkeys, _total_size); +} + +// effect: create pivot keys as a clone of an existing set of pivotkeys +void ftnode_pivot_keys::create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys) { + if (pivotkeys._fixed_format()) { + _create_from_fixed_keys(pivotkeys._fixed_keys, pivotkeys._fixed_keylen, pivotkeys._num_pivots); + } else { + create_from_dbts(pivotkeys._dbt_keys, pivotkeys._num_pivots); + } + + sanity_check(); +} + +void ftnode_pivot_keys::destroy() { + if (_dbt_keys != nullptr) { + for (int i = 0; i < _num_pivots; i++) { + toku_destroy_dbt(&_dbt_keys[i]); + } + toku_free(_dbt_keys); + _dbt_keys = nullptr; + } + if (_fixed_keys != nullptr) { + toku_free(_fixed_keys); + _fixed_keys = nullptr; + } + _fixed_keylen = 0; + _fixed_keylen_aligned = 0; + _num_pivots = 0; + _total_size = 0; +} + +void ftnode_pivot_keys::_convert_to_fixed_format() { + invariant(!_fixed_format()); + + // convert to a tightly packed array of fixed length keys + _fixed_keylen = _dbt_keys[0].size; + _fixed_keylen_aligned = _align4(_fixed_keylen); + _total_size = _fixed_keylen_aligned * _num_pivots; + XMALLOC_N_ALIGNED(64, _total_size, _fixed_keys); + for (int i = 0; i < _num_pivots; i++) { + invariant(_dbt_keys[i].size == _fixed_keylen); + memcpy(_fixed_key(i), _dbt_keys[i].data, _fixed_keylen); + } + + // destroy the dbt array format + for (int i = 0; i < _num_pivots; i++) { + toku_destroy_dbt(&_dbt_keys[i]); + } + toku_free(_dbt_keys); + _dbt_keys = nullptr; + + invariant(_fixed_format()); + sanity_check(); +} + +void ftnode_pivot_keys::_convert_to_dbt_format() { + invariant(_fixed_format()); + + // convert to an aray of dbts + REALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys); + for (int i = 0; i < _num_pivots; i++) { + toku_memdup_dbt(&_dbt_keys[i], _fixed_key(i), _fixed_keylen); + } + // pivots sizes are not aligned up dbt format + _total_size = _num_pivots * _fixed_keylen; + + // destroy the fixed key format + toku_free(_fixed_keys); + _fixed_keys = nullptr; + _fixed_keylen = 0; + _fixed_keylen_aligned = 0; + + invariant(!_fixed_format()); + sanity_check(); +} + +void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) { + _num_pivots = n; + _total_size = 0; + _fixed_keys = nullptr; + _fixed_keylen = 0; + _dbt_keys = nullptr; + + XMALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys); + bool keys_same_size = true; + for (int i = 0; i < _num_pivots; i++) { + const void *pivotkeyptr; + uint32_t size; + rbuf_bytes(rb, &pivotkeyptr, &size); + toku_memdup_dbt(&_dbt_keys[i], pivotkeyptr, size); + _total_size += size; + if (i > 0 && keys_same_size && _dbt_keys[i].size != _dbt_keys[i - 1].size) { + // not all keys are the same size, we'll stick to the dbt array format + keys_same_size = false; + } + } + + if (keys_same_size && _num_pivots > 0) { + _convert_to_fixed_format(); + } + + sanity_check(); +} + +DBT ftnode_pivot_keys::get_pivot(int i) const { + paranoid_invariant(i < _num_pivots); + if (_fixed_format()) { + paranoid_invariant(i * _fixed_keylen_aligned < _total_size); + DBT dbt; + toku_fill_dbt(&dbt, _fixed_key(i), _fixed_keylen); + return dbt; + } else { + return _dbt_keys[i]; + } +} + +DBT *ftnode_pivot_keys::fill_pivot(int i, DBT *dbt) const { + paranoid_invariant(i < _num_pivots); + if (_fixed_format()) { + toku_fill_dbt(dbt, _fixed_key(i), _fixed_keylen); + } else { + toku_copyref_dbt(dbt, _dbt_keys[i]); + } + return dbt; +} + +void ftnode_pivot_keys::_add_key_dbt(const DBT *key, int i) { + toku_clone_dbt(&_dbt_keys[i], *key); + _total_size += _dbt_keys[i].size; +} + +void ftnode_pivot_keys::_destroy_key_dbt(int i) { + invariant(_total_size >= _dbt_keys[i].size); + _total_size -= _dbt_keys[i].size; + toku_destroy_dbt(&_dbt_keys[i]); +} + +void ftnode_pivot_keys::_insert_at_dbt(const DBT *key, int i) { + // make space for a new pivot, slide existing keys to the right + REALLOC_N_ALIGNED(64, _num_pivots + 1, _dbt_keys); + memmove(&_dbt_keys[i + 1], &_dbt_keys[i], (_num_pivots - i) * sizeof(DBT)); + _add_key_dbt(key, i); +} + +void ftnode_pivot_keys::_insert_at_fixed(const DBT *key, int i) { + REALLOC_N_ALIGNED(64, (_num_pivots + 1) * _fixed_keylen_aligned, _fixed_keys); + // TODO: This is not going to be valgrind-safe, because we do not initialize the space + // between _fixed_keylen and _fixed_keylen_aligned (but we probably should) + memmove(_fixed_key(i + 1), _fixed_key(i), (_num_pivots - i) * _fixed_keylen_aligned); + memcpy(_fixed_key(i), key->data, _fixed_keylen); + _total_size += _fixed_keylen_aligned; +} + +void ftnode_pivot_keys::insert_at(const DBT *key, int i) { + invariant(i <= _num_pivots); // it's ok to insert at the end, so we check <= n + + // if the new key doesn't have the same size, we can't be in fixed format + if (_fixed_format() && key->size != _fixed_keylen) { + _convert_to_dbt_format(); + } + + if (_fixed_format()) { + _insert_at_fixed(key, i); + } else { + _insert_at_dbt(key, i); + } + _num_pivots++; + + invariant(total_size() > 0); +} + +void ftnode_pivot_keys::_append_dbt(const ftnode_pivot_keys &pivotkeys) { + REALLOC_N_ALIGNED(64, _num_pivots + pivotkeys._num_pivots, _dbt_keys); + bool other_fixed = pivotkeys._fixed_format(); + for (int i = 0; i < pivotkeys._num_pivots; i++) { + size_t size = other_fixed ? pivotkeys._fixed_keylen : + pivotkeys._dbt_keys[i].size; + toku_memdup_dbt(&_dbt_keys[_num_pivots + i], + other_fixed ? pivotkeys._fixed_key(i) : + pivotkeys._dbt_keys[i].data, + size); + _total_size += size; + } +} + +void ftnode_pivot_keys::_append_fixed(const ftnode_pivot_keys &pivotkeys) { + if (pivotkeys._fixed_format() && pivotkeys._fixed_keylen == _fixed_keylen) { + // other pivotkeys have the same fixed keylen + REALLOC_N_ALIGNED(64, (_num_pivots + pivotkeys._num_pivots) * _fixed_keylen_aligned, _fixed_keys); + memcpy(_fixed_key(_num_pivots), pivotkeys._fixed_keys, pivotkeys._total_size); + _total_size += pivotkeys._total_size; + } else { + // must convert to dbt format, other pivotkeys have different length'd keys + _convert_to_dbt_format(); + _append_dbt(pivotkeys); + } +} + +void ftnode_pivot_keys::append(const ftnode_pivot_keys &pivotkeys) { + if (_fixed_format()) { + _append_fixed(pivotkeys); + } else { + _append_dbt(pivotkeys); + } + _num_pivots += pivotkeys._num_pivots; + + sanity_check(); +} + +void ftnode_pivot_keys::_replace_at_dbt(const DBT *key, int i) { + _destroy_key_dbt(i); + _add_key_dbt(key, i); +} + +void ftnode_pivot_keys::_replace_at_fixed(const DBT *key, int i) { + if (key->size == _fixed_keylen) { + memcpy(_fixed_key(i), key->data, _fixed_keylen); + } else { + // must convert to dbt format, replacement key has different length + _convert_to_dbt_format(); + _replace_at_dbt(key, i); + } +} + +void ftnode_pivot_keys::replace_at(const DBT *key, int i) { + if (i < _num_pivots) { + if (_fixed_format()) { + _replace_at_fixed(key, i); + } else { + _replace_at_dbt(key, i); + } + } else { + invariant(i == _num_pivots); // appending to the end is ok + insert_at(key, i); + } + invariant(total_size() > 0); +} + +void ftnode_pivot_keys::_delete_at_fixed(int i) { + memmove(_fixed_key(i), _fixed_key(i + 1), (_num_pivots - 1 - i) * _fixed_keylen_aligned); + _total_size -= _fixed_keylen_aligned; +} + +void ftnode_pivot_keys::_delete_at_dbt(int i) { + // slide over existing keys, then shrink down to size + _destroy_key_dbt(i); + memmove(&_dbt_keys[i], &_dbt_keys[i + 1], (_num_pivots - 1 - i) * sizeof(DBT)); + REALLOC_N_ALIGNED(64, _num_pivots - 1, _dbt_keys); +} + +void ftnode_pivot_keys::delete_at(int i) { + invariant(i < _num_pivots); + + if (_fixed_format()) { + _delete_at_fixed(i); + } else { + _delete_at_dbt(i); + } + + _num_pivots--; +} + +void ftnode_pivot_keys::_split_at_fixed(int i, ftnode_pivot_keys *other) { + // recreate the other set of pivots from index >= i + other->_create_from_fixed_keys(_fixed_key(i), _fixed_keylen, _num_pivots - i); + + // shrink down to size + _total_size = i * _fixed_keylen_aligned; + REALLOC_N_ALIGNED(64, _total_size, _fixed_keys); +} + +void ftnode_pivot_keys::_split_at_dbt(int i, ftnode_pivot_keys *other) { + // recreate the other set of pivots from index >= i + other->create_from_dbts(&_dbt_keys[i], _num_pivots - i); + + // destroy everything greater, shrink down to size + for (int k = i; k < _num_pivots; k++) { + _destroy_key_dbt(k); + } + REALLOC_N_ALIGNED(64, i, _dbt_keys); +} + +void ftnode_pivot_keys::split_at(int i, ftnode_pivot_keys *other) { + if (i < _num_pivots) { + if (_fixed_format()) { + _split_at_fixed(i, other); + } else { + _split_at_dbt(i, other); + } + _num_pivots = i; + } + + sanity_check(); +} + +void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const { + bool fixed = _fixed_format(); + size_t written = 0; + for (int i = 0; i < _num_pivots; i++) { + size_t size = fixed ? _fixed_keylen : _dbt_keys[i].size; + invariant(size); + wbuf_nocrc_bytes(wb, fixed ? _fixed_key(i) : _dbt_keys[i].data, size); + written += size; + } + invariant(written == serialized_size()); +} + +int ftnode_pivot_keys::num_pivots() const { + // if we have fixed size keys, the number of pivots should be consistent + paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen_aligned * _num_pivots)); + return _num_pivots; +} + +size_t ftnode_pivot_keys::total_size() const { + // if we have fixed size keys, the total size should be consistent + paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen_aligned * _num_pivots)); + return _total_size; +} + +size_t ftnode_pivot_keys::serialized_size() const { + // we only return the size that will be used when serialized, so we calculate based + // on the fixed keylen and not the aligned keylen. + return _fixed_format() ? _num_pivots * _fixed_keylen : _total_size; +} + +void ftnode_pivot_keys::sanity_check() const { + if (_fixed_format()) { + invariant(_dbt_keys == nullptr); + invariant(_fixed_keylen_aligned == _align4(_fixed_keylen)); + invariant(_num_pivots * _fixed_keylen <= _total_size); + invariant(_num_pivots * _fixed_keylen_aligned == _total_size); + } else { + invariant(_num_pivots == 0 || _dbt_keys != nullptr); + size_t size = 0; + for (int i = 0; i < _num_pivots; i++) { + size += _dbt_keys[i].size; + } + invariant(size == _total_size); + } +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/pqueue.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/pqueue.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/pqueue.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/pqueue.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,233 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include "toku_os.h" -#include "ft-internal.h" -#include "ftloader-internal.h" -#include "pqueue.h" - -#define pqueue_left(i) ((i) << 1) -#define pqueue_right(i) (((i) << 1) + 1) -#define pqueue_parent(i) ((i) >> 1) - -int pqueue_init(pqueue_t **result, size_t n, int which_db, DB *db, ft_compare_func compare, struct error_callback_s *err_callback) -{ - pqueue_t *MALLOC(q); - if (!q) { - return get_error_errno(); - } - - /* Need to allocate n+1 elements since element 0 isn't used. */ - MALLOC_N(n + 1, q->d); - if (!q->d) { - int r = get_error_errno(); - toku_free(q); - return r; - } - q->size = 1; - q->avail = q->step = (n+1); /* see comment above about n+1 */ - - q->which_db = which_db; - q->db = db; - q->compare = compare; - q->dup_error = 0; - - q->error_callback = err_callback; - - *result = q; - return 0; -} - -void pqueue_free(pqueue_t *q) -{ - toku_free(q->d); - toku_free(q); -} - - -size_t pqueue_size(pqueue_t *q) -{ - /* queue element 0 exists but doesn't count since it isn't used. */ - return (q->size - 1); -} - -static int pqueue_compare(pqueue_t *q, DBT *next_key, DBT *next_val, DBT *curr_key) -{ - int r = q->compare(q->db, next_key, curr_key); - if ( r == 0 ) { // duplicate key : next_key == curr_key - q->dup_error = 1; - if (q->error_callback) - ft_loader_set_error_and_callback(q->error_callback, DB_KEYEXIST, q->db, q->which_db, next_key, next_val); - } - return ( r > -1 ); -} - -static void pqueue_bubble_up(pqueue_t *q, size_t i) -{ - size_t parent_node; - pqueue_node_t *moving_node = q->d[i]; - DBT *moving_key = moving_node->key; - - for (parent_node = pqueue_parent(i); - ((i > 1) && pqueue_compare(q, q->d[parent_node]->key, q->d[parent_node]->val, moving_key)); - i = parent_node, parent_node = pqueue_parent(i)) - { - q->d[i] = q->d[parent_node]; - } - - q->d[i] = moving_node; -} - - -static size_t pqueue_maxchild(pqueue_t *q, size_t i) -{ - size_t child_node = pqueue_left(i); - - if (child_node >= q->size) - return 0; - - if ((child_node+1) < q->size && - pqueue_compare(q, q->d[child_node]->key, q->d[child_node]->val, q->d[child_node+1]->key)) - child_node++; /* use right child instead of left */ - - return child_node; -} - - -static void pqueue_percolate_down(pqueue_t *q, size_t i) -{ - size_t child_node; - pqueue_node_t *moving_node = q->d[i]; - DBT *moving_key = moving_node->key; - DBT *moving_val = moving_node->val; - - while ((child_node = pqueue_maxchild(q, i)) && - pqueue_compare(q, moving_key, moving_val, q->d[child_node]->key)) - { - q->d[i] = q->d[child_node]; - i = child_node; - } - - q->d[i] = moving_node; -} - - -int pqueue_insert(pqueue_t *q, pqueue_node_t *d) -{ - size_t i; - - if (!q) return 1; - if (q->size >= q->avail) return 1; - - /* insert item */ - i = q->size++; - q->d[i] = d; - pqueue_bubble_up(q, i); - - if ( q->dup_error ) return DB_KEYEXIST; - return 0; -} - -int pqueue_pop(pqueue_t *q, pqueue_node_t **d) -{ - if (!q || q->size == 1) { - *d = NULL; - return 0; - } - - *d = q->d[1]; - q->d[1] = q->d[--q->size]; - pqueue_percolate_down(q, 1); - - if ( q->dup_error ) return DB_KEYEXIST; - return 0; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/pqueue.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/pqueue.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/pqueue.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/pqueue.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,126 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_PQUEUE_H -#define TOKU_PQUEUE_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - - -typedef struct ft_pqueue_node_t -{ - DBT *key; - DBT *val; - int i; -} pqueue_node_t; - -typedef struct ft_pqueue_t -{ - size_t size; - size_t avail; - size_t step; - - int which_db; - DB *db; // needed for compare function - ft_compare_func compare; - pqueue_node_t **d; - int dup_error; - - struct error_callback_s *error_callback; - -} pqueue_t; - -int pqueue_init(pqueue_t **result, size_t n, int which_db, DB *db, ft_compare_func compare, struct error_callback_s *err_callback); -void pqueue_free(pqueue_t *q); -size_t pqueue_size(pqueue_t *q); -int pqueue_insert(pqueue_t *q, pqueue_node_t *d); -int pqueue_pop(pqueue_t *q, pqueue_node_t **d); - - -#endif //TOKU_PQUEUE_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/queue.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/queue.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/queue.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/queue.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,232 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include "toku_os.h" -#include -#include -#include "queue.h" -#include "memory.h" -#include - -struct qitem; - -struct qitem { - void *item; - struct qitem *next; - uint64_t weight; -}; - -struct queue { - uint64_t contents_weight; // how much stuff is in there? - uint64_t weight_limit; // Block enqueueing when the contents gets to be bigger than the weight. - struct qitem *head, *tail; - - bool eof; - - toku_mutex_t mutex; - toku_cond_t cond; -}; - -// Representation invariant: -// q->contents_weight is the sum of the weights of everything in the queue. -// q->weight_limit is the limit on the weight before we block. -// q->head is the oldest thing in the queue. q->tail is the newest. (If nothing is in the queue then both are NULL) -// If q->head is not null: -// q->head->item is the oldest item. -// q->head->weight is the weight of that item. -// q->head->next is the next youngest thing. -// q->eof indicates that the producer has said "that's all". -// q->mutex and q->cond are used as condition variables. - - -int queue_create (QUEUE *q, uint64_t weight_limit) -{ - QUEUE CALLOC(result); - if (result==NULL) return get_error_errno(); - result->contents_weight = 0; - result->weight_limit = weight_limit; - result->head = NULL; - result->tail = NULL; - result->eof = false; - toku_mutex_init(&result->mutex, NULL); - toku_cond_init(&result->cond, NULL); - *q = result; - return 0; -} - -int queue_destroy (QUEUE q) -{ - if (q->head) return EINVAL; - assert(q->contents_weight==0); - toku_mutex_destroy(&q->mutex); - toku_cond_destroy(&q->cond); - toku_free(q); - return 0; -} - -int queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq) -{ - toku_mutex_lock(&q->mutex); - assert(!q->eof); - // Go ahead and put it in, even if it's too much. - struct qitem *MALLOC(qi); - if (qi==NULL) { - int r = get_error_errno(); - toku_mutex_unlock(&q->mutex); - return r; - } - q->contents_weight += weight; - qi->item = item; - qi->weight = weight; - qi->next = NULL; - if (q->tail) { - q->tail->next = qi; - } else { - assert(q->head==NULL); - q->head = qi; - } - q->tail = qi; - // Wake up the consumer. - toku_cond_signal(&q->cond); - // Now block if there's too much stuff in there. - while (q->weight_limit < q->contents_weight) { - toku_cond_wait(&q->cond, &q->mutex); - } - // we are allowed to return. - if (total_weight_after_enq) { - *total_weight_after_enq = q->contents_weight; - } - toku_mutex_unlock(&q->mutex); - return 0; -} - -int queue_eof (QUEUE q) -{ - toku_mutex_lock(&q->mutex); - assert(!q->eof); - q->eof = true; - toku_cond_signal(&q->cond); - toku_mutex_unlock(&q->mutex); - return 0; -} - -int queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq) -{ - toku_mutex_lock(&q->mutex); - int result; - while (q->head==NULL && !q->eof) { - toku_cond_wait(&q->cond, &q->mutex); - } - if (q->head==NULL) { - assert(q->eof); - result = EOF; - } else { - struct qitem *head = q->head; - q->contents_weight -= head->weight; - *item = head->item; - if (weight) - *weight = head->weight; - if (total_weight_after_deq) - *total_weight_after_deq = q->contents_weight; - q->head = head->next; - toku_free(head); - if (q->head==NULL) { - q->tail = NULL; - } - // wake up the producer, since we decreased the contents_weight. - toku_cond_signal(&q->cond); - // Successful result. - result = 0; - } - toku_mutex_unlock(&q->mutex); - return result; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/queue.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/queue.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/queue.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/queue.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,140 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_QUEUE_H -#define TOKU_QUEUE_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fttypes.h" - -// The abstraction: -// -// queue.h implements a queue suitable for a producer-consumer relationship between two pthreads. -// The enqueue/dequeue operation is fairly heavyweight (involving pthread condition variables) so it may be useful -// to enqueue large chunks rather than small chunks. -// It probably won't work right to have two consumer threads. -// -// Every item inserted into the queue has a weight. If the weight -// gets too big, then the queue blocks on trying to insert more items. -// The weight can be used to limit the total number of items in the -// queue (weight of each item=1) or the total memory consumed by queue -// items (weight of each item is its size). Or the weight's could all be -// zero for an unlimited queue. - -typedef struct queue *QUEUE; - -int queue_create (QUEUE *q, uint64_t weight_limit); -// Effect: Create a queue with a given weight limit. The queue is initially empty. - -int queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq); -// Effect: Insert ITEM of weight WEIGHT into queue. If the resulting contents weight too much then block (don't return) until the total weight is low enough. -// If total_weight_after_enq!=NULL then return the current weight of the items in the queue (after finishing blocking on overweight, and after enqueueing the item). -// If successful return 0. -// If an error occurs, return the error number, and the state of the queue is undefined. The item may have been enqueued or not, and in fact the queue may be badly corrupted if the condition variables go awry. If it's just a matter of out-of-memory, then the queue is probably OK. -// Requires: There is only a single consumer. (We wake up the consumer using a pthread_cond_signal (which is suitable only for single consumers.) - -int queue_eof (QUEUE q); -// Effect: Inform the queue that no more values will be inserted. After all the values that have been inserted are dequeued, further dequeue operations will return EOF. -// Returns 0 on success. On failure, things are pretty bad (likely to be some sort of mutex failure). - -int queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq); -// Effect: Wait until the queue becomes nonempty. Then dequeue and return the oldest item. The item and its weight are returned in *ITEM. -// If weight!=NULL then return the item's weight in *weight. -// If total_weight_after_deq!=NULL then return the current weight of the items in the queue (after dequeuing the item). -// Return 0 if an item is returned. -// Return EOF is we no more items will be returned. -// Usage note: The queue should be destroyed only after any consumers will no longer look at it (for example, they saw EOF). - -int queue_destroy (QUEUE q); -// Effect: Destroy the queue. -// Requires: The queue must be empty and no consumer should try to dequeue after this (one way to do this is to make sure the consumer saw EOF). -// Returns 0 on success. If the queue is not empty, returns EINVAL. Other errors are likely to be bad (some sort of mutex or condvar failure). - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/quicklz.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/quicklz.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/quicklz.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/quicklz.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,939 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// Fast data compression library -// Copyright (C) 2006-2011 Lasse Mikkel Reinhold -// lar@quicklz.com -// -// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything -// released into public must be open source) or under a commercial license if such -// has been acquired (see http://www.quicklz.com/order.html). The commercial license -// does not cover derived or ported versions created by third parties under GPL. - -// 1.5.0 final - -#include "quicklz.h" - -#if QLZ_VERSION_MAJOR != 1 || QLZ_VERSION_MINOR != 5 || QLZ_VERSION_REVISION != 0 - #error quicklz.c and quicklz.h have different versions -#endif - -#if (defined(__X86__) || defined(__i386__) || defined(i386) || defined(_M_IX86) || defined(__386__) || defined(__x86_64__) || defined(_M_X64)) - #define X86X64 -#endif - -#define MINOFFSET 2 -#define UNCONDITIONAL_MATCHLEN 6 -#define UNCOMPRESSED_END 4 -#define CWORD_LEN 4 - -#if QLZ_COMPRESSION_LEVEL == 1 && defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0 - #define OFFSET_BASE source - #define CAST (ui32)(size_t) -#else - #define OFFSET_BASE 0 - #define CAST -#endif - -int qlz_get_setting(int setting) -{ - switch (setting) - { - case 0: return QLZ_COMPRESSION_LEVEL; - case 1: return sizeof(qlz_state_compress); - case 2: return sizeof(qlz_state_decompress); - case 3: return QLZ_STREAMING_BUFFER; -#ifdef QLZ_MEMORY_SAFE - case 6: return 1; -#else - case 6: return 0; -#endif - case 7: return QLZ_VERSION_MAJOR; - case 8: return QLZ_VERSION_MINOR; - case 9: return QLZ_VERSION_REVISION; - } - return -1; -} - -#if QLZ_COMPRESSION_LEVEL == 1 -static int same(const unsigned char *src, size_t n) -{ - while(n > 0 && *(src + n) == *src) - n--; - return n == 0 ? 1 : 0; -} -#endif - -static void reset_table_compress(qlz_state_compress *state) -{ - int i; - for(i = 0; i < QLZ_HASH_VALUES; i++) - { -#if QLZ_COMPRESSION_LEVEL == 1 - state->hash[i].offset = 0; -#else - state->hash_counter[i] = 0; - state->hash[i].offset[0] = 0; -#endif - } -} - -static void reset_table_decompress(qlz_state_decompress *state) -{ - (void)state; -#if QLZ_COMPRESSION_LEVEL == 2 - for(int i = 0; i < QLZ_HASH_VALUES; i++) - { - state->hash_counter[i] = 0; - } -#endif -} - -static __inline ui32 hash_func(ui32 i) -{ -#if QLZ_COMPRESSION_LEVEL == 2 - return ((i >> 9) ^ (i >> 13) ^ i) & (QLZ_HASH_VALUES - 1); -#else - return ((i >> 12) ^ i) & (QLZ_HASH_VALUES - 1); -#endif -} - -static __inline ui32 fast_read(void const *src, ui32 bytes) -{ -#ifndef X86X64 - unsigned char *p = (unsigned char*)src; - switch (bytes) - { - case 4: - return(*p | *(p + 1) << 8 | *(p + 2) << 16 | *(p + 3) << 24); - case 3: - return(*p | *(p + 1) << 8 | *(p + 2) << 16); - case 2: - return(*p | *(p + 1) << 8); - case 1: - return(*p); - } - return 0; -#else - if (bytes >= 1 && bytes <= 4) - return *((ui32*)src); - else - return 0; -#endif -} - -static __inline ui32 hashat(const unsigned char *src) -{ - ui32 fetch, hash; - fetch = fast_read(src, 3); - hash = hash_func(fetch); - return hash; -} - -static __inline void fast_write(ui32 f, void *dst, size_t bytes) -{ -#ifndef X86X64 - unsigned char *p = (unsigned char*)dst; - - switch (bytes) - { - case 4: - *p = (unsigned char)f; - *(p + 1) = (unsigned char)(f >> 8); - *(p + 2) = (unsigned char)(f >> 16); - *(p + 3) = (unsigned char)(f >> 24); - return; - case 3: - *p = (unsigned char)f; - *(p + 1) = (unsigned char)(f >> 8); - *(p + 2) = (unsigned char)(f >> 16); - return; - case 2: - *p = (unsigned char)f; - *(p + 1) = (unsigned char)(f >> 8); - return; - case 1: - *p = (unsigned char)f; - return; - } -#else - switch (bytes) - { - case 4: - *((ui32*)dst) = f; - return; - case 3: - *((ui32*)dst) = f; - return; - case 2: - *((ui16 *)dst) = (ui16)f; - return; - case 1: - *((unsigned char*)dst) = (unsigned char)f; - return; - } -#endif -} - - -size_t qlz_size_decompressed(const char *source) -{ - ui32 n, r; - n = (((*source) & 2) == 2) ? 4 : 1; - r = fast_read(source + 1 + n, n); - r = r & (0xffffffff >> ((4 - n)*8)); - return r; -} - -size_t qlz_size_compressed(const char *source) -{ - ui32 n, r; - n = (((*source) & 2) == 2) ? 4 : 1; - r = fast_read(source + 1, n); - r = r & (0xffffffff >> ((4 - n)*8)); - return r; -} - -static -size_t qlz_size_header(const char *source) -{ - size_t n = 2*((((*source) & 2) == 2) ? 4 : 1) + 1; - return n; -} - - -static __inline void memcpy_up(unsigned char *dst, const unsigned char *src, ui32 n) -{ - // Caution if modifying memcpy_up! Overlap of dst and src must be special handled. -#ifndef X86X64 - unsigned char *end = dst + n; - while(dst < end) - { - *dst = *src; - dst++; - src++; - } -#else - ui32 f = 0; - do - { - *(ui32 *)(dst + f) = *(ui32 *)(src + f); - f += MINOFFSET + 1; - } - while (f < n); -#endif -} - -__attribute__((unused)) -static __inline void update_hash(qlz_state_decompress *state, const unsigned char *s) -{ -#if QLZ_COMPRESSION_LEVEL == 1 - ui32 hash; - hash = hashat(s); - state->hash[hash].offset = s; - state->hash_counter[hash] = 1; -#elif QLZ_COMPRESSION_LEVEL == 2 - ui32 hash; - unsigned char c; - hash = hashat(s); - c = state->hash_counter[hash]; - state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = s; - c++; - state->hash_counter[hash] = c; -#endif - (void)state; - (void)s; -} - -#if QLZ_COMPRESSION_LEVEL <= 2 -static void update_hash_upto(qlz_state_decompress *state, unsigned char **lh, const unsigned char *max) -{ - while(*lh < max) - { - (*lh)++; - update_hash(state, *lh); - } -} -#endif - -static size_t qlz_compress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_compress *state) -{ - const unsigned char *last_byte = source + size - 1; - const unsigned char *src = source; - unsigned char *cword_ptr = destination; - unsigned char *dst = destination + CWORD_LEN; - ui32 cword_val = 1U << 31; - const unsigned char *last_matchstart = last_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END; - ui32 fetch = 0; - unsigned int lits = 0; - - (void) lits; - - if(src <= last_matchstart) - fetch = fast_read(src, 3); - - while(src <= last_matchstart) - { - if ((cword_val & 1) == 1) - { - // store uncompressed if compression ratio is too low - if (src > source + (size >> 1) && dst - destination > src - source - ((src - source) >> 5)) - return 0; - - fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN); - - cword_ptr = dst; - dst += CWORD_LEN; - cword_val = 1U << 31; - fetch = fast_read(src, 3); - } -#if QLZ_COMPRESSION_LEVEL == 1 - { - const unsigned char *o; - ui32 hash, cached; - - hash = hash_func(fetch); - cached = fetch ^ state->hash[hash].cache; - state->hash[hash].cache = fetch; - - o = state->hash[hash].offset + OFFSET_BASE; - state->hash[hash].offset = CAST(src - OFFSET_BASE); - -#ifdef X86X64 - if ((cached & 0xffffff) == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6)))) - { - if(cached != 0) - { -#else - if (cached == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6)))) - { - if (*(o + 3) != *(src + 3)) - { -#endif - hash <<= 4; - cword_val = (cword_val >> 1) | (1U << 31); - fast_write((3 - 2) | hash, dst, 2); - src += 3; - dst += 2; - } - else - { - const unsigned char *old_src = src; - size_t matchlen; - hash <<= 4; - - cword_val = (cword_val >> 1) | (1U << 31); - src += 4; - - if(*(o + (src - old_src)) == *src) - { - src++; - if(*(o + (src - old_src)) == *src) - { - size_t q = last_byte - UNCOMPRESSED_END - (src - 5) + 1; - size_t remaining = q > 255 ? 255 : q; - src++; - while(*(o + (src - old_src)) == *src && (size_t)(src - old_src) < remaining) - src++; - } - } - - matchlen = src - old_src; - if (matchlen < 18) - { - fast_write((ui32)(matchlen - 2) | hash, dst, 2); - dst += 2; - } - else - { - fast_write((ui32)(matchlen << 16) | hash, dst, 3); - dst += 3; - } - } - fetch = fast_read(src, 3); - lits = 0; - } - else - { - lits++; - *dst = *src; - src++; - dst++; - cword_val = (cword_val >> 1); -#ifdef X86X64 - fetch = fast_read(src, 3); -#else - fetch = (fetch >> 8 & 0xffff) | (*(src + 2) << 16); -#endif - } - } -#elif QLZ_COMPRESSION_LEVEL >= 2 - { - const unsigned char *o, *offset2; - ui32 hash, matchlen, k, m, best_k = 0; - unsigned char c; - size_t remaining = (last_byte - UNCOMPRESSED_END - src + 1) > 255 ? 255 : (last_byte - UNCOMPRESSED_END - src + 1); - (void)best_k; - - - //hash = hashat(src); - fetch = fast_read(src, 3); - hash = hash_func(fetch); - - c = state->hash_counter[hash]; - - offset2 = state->hash[hash].offset[0]; - if(offset2 < src - MINOFFSET && c > 0 && ((fast_read(offset2, 3) ^ fetch) & 0xffffff) == 0) - { - matchlen = 3; - if(*(offset2 + matchlen) == *(src + matchlen)) - { - matchlen = 4; - while(*(offset2 + matchlen) == *(src + matchlen) && matchlen < remaining) - matchlen++; - } - } - else - matchlen = 0; - for(k = 1; k < QLZ_POINTERS && c > k; k++) - { - o = state->hash[hash].offset[k]; -#if QLZ_COMPRESSION_LEVEL == 3 - if(((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET) -#elif QLZ_COMPRESSION_LEVEL == 2 - if(*(src + matchlen) == *(o + matchlen) && ((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET) -#endif - { - m = 3; - while(*(o + m) == *(src + m) && m < remaining) - m++; -#if QLZ_COMPRESSION_LEVEL == 3 - if ((m > matchlen) || (m == matchlen && o > offset2)) -#elif QLZ_COMPRESSION_LEVEL == 2 - if (m > matchlen) -#endif - { - offset2 = o; - matchlen = m; - best_k = k; - } - } - } - o = offset2; - state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src; - c++; - state->hash_counter[hash] = c; - -#if QLZ_COMPRESSION_LEVEL == 3 - if(matchlen > 2 && src - o < 131071) - { - ui32 u; - size_t offset = src - o; - - for(u = 1; u < matchlen; u++) - { - hash = hashat(src + u); - c = state->hash_counter[hash]++; - state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src + u; - } - - cword_val = (cword_val >> 1) | (1U << 31); - src += matchlen; - - if(matchlen == 3 && offset <= 63) - { - *dst = (unsigned char)(offset << 2); - dst++; - } - else if (matchlen == 3 && offset <= 16383) - { - ui32 f = (ui32)((offset << 2) | 1); - fast_write(f, dst, 2); - dst += 2; - } - else if (matchlen <= 18 && offset <= 1023) - { - ui32 f = ((matchlen - 3) << 2) | ((ui32)offset << 6) | 2; - fast_write(f, dst, 2); - dst += 2; - } - - else if(matchlen <= 33) - { - ui32 f = ((matchlen - 2) << 2) | ((ui32)offset << 7) | 3; - fast_write(f, dst, 3); - dst += 3; - } - else - { - ui32 f = ((matchlen - 3) << 7) | ((ui32)offset << 15) | 3; - fast_write(f, dst, 4); - dst += 4; - } - } - else - { - *dst = *src; - src++; - dst++; - cword_val = (cword_val >> 1); - } -#elif QLZ_COMPRESSION_LEVEL == 2 - - if(matchlen > 2) - { - cword_val = (cword_val >> 1) | (1U << 31); - src += matchlen; - - if (matchlen < 10) - { - ui32 f = best_k | ((matchlen - 2) << 2) | (hash << 5); - fast_write(f, dst, 2); - dst += 2; - } - else - { - ui32 f = best_k | (matchlen << 16) | (hash << 5); - fast_write(f, dst, 3); - dst += 3; - } - } - else - { - *dst = *src; - src++; - dst++; - cword_val = (cword_val >> 1); - } -#endif - } -#endif - } - while (src <= last_byte) - { - if ((cword_val & 1) == 1) - { - fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN); - cword_ptr = dst; - dst += CWORD_LEN; - cword_val = 1U << 31; - } -#if QLZ_COMPRESSION_LEVEL < 3 - if (src <= last_byte - 3) - { -#if QLZ_COMPRESSION_LEVEL == 1 - ui32 hash, fetchv; - fetchv = fast_read(src, 3); - hash = hash_func(fetch); - state->hash[hash].offset = CAST(src - OFFSET_BASE); - state->hash[hash].cache = fetchv; -#elif QLZ_COMPRESSION_LEVEL == 2 - ui32 hash; - unsigned char c; - hash = hashat(src); - c = state->hash_counter[hash]; - state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src; - c++; - state->hash_counter[hash] = c; -#endif - } -#endif - *dst = *src; - src++; - dst++; - cword_val = (cword_val >> 1); - } - - while((cword_val & 1) != 1) - cword_val = (cword_val >> 1); - - fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN); - - // min. size must be 9 bytes so that the qlz_size functions can take 9 bytes as argument - return dst - destination < 9 ? 9 : dst - destination; -} - -static size_t qlz_decompress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_decompress *state, const unsigned char *history) -{ - const unsigned char *src = source + qlz_size_header((const char *)source); - unsigned char *dst = destination; - const unsigned char *last_destination_byte = destination + size - 1; - ui32 cword_val = 1; - const unsigned char *last_matchstart = last_destination_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END; - unsigned char *last_hashed = destination - 1; - const unsigned char *last_source_byte = source + qlz_size_compressed((const char *)source) - 1; - static const ui32 bitlut[16] = {4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0}; - - (void) last_source_byte; - (void) last_hashed; - (void) state; - (void) history; - - for(;;) - { - ui32 fetch; - - if (cword_val == 1) - { -#ifdef QLZ_MEMORY_SAFE - if(src + CWORD_LEN - 1 > last_source_byte) - return 0; -#endif - cword_val = fast_read(src, CWORD_LEN); - src += CWORD_LEN; - } - -#ifdef QLZ_MEMORY_SAFE - if(src + 4 - 1 > last_source_byte) - return 0; -#endif - - fetch = fast_read(src, 4); - - if ((cword_val & 1) == 1) - { - ui32 matchlen; - const unsigned char *offset2; - -#if QLZ_COMPRESSION_LEVEL == 1 - ui32 hash; - cword_val = cword_val >> 1; - hash = (fetch >> 4) & 0xfff; - offset2 = (const unsigned char *)(size_t)state->hash[hash].offset; - - if((fetch & 0xf) != 0) - { - matchlen = (fetch & 0xf) + 2; - src += 2; - } - else - { - matchlen = *(src + 2); - src += 3; - } - -#elif QLZ_COMPRESSION_LEVEL == 2 - ui32 hash; - unsigned char c; - cword_val = cword_val >> 1; - hash = (fetch >> 5) & 0x7ff; - c = (unsigned char)(fetch & 0x3); - offset2 = state->hash[hash].offset[c]; - - if((fetch & (28)) != 0) - { - matchlen = ((fetch >> 2) & 0x7) + 2; - src += 2; - } - else - { - matchlen = *(src + 2); - src += 3; - } - -#elif QLZ_COMPRESSION_LEVEL == 3 - ui32 offset; - cword_val = cword_val >> 1; - if ((fetch & 3) == 0) - { - offset = (fetch & 0xff) >> 2; - matchlen = 3; - src++; - } - else if ((fetch & 2) == 0) - { - offset = (fetch & 0xffff) >> 2; - matchlen = 3; - src += 2; - } - else if ((fetch & 1) == 0) - { - offset = (fetch & 0xffff) >> 6; - matchlen = ((fetch >> 2) & 15) + 3; - src += 2; - } - else if ((fetch & 127) != 3) - { - offset = (fetch >> 7) & 0x1ffff; - matchlen = ((fetch >> 2) & 0x1f) + 2; - src += 3; - } - else - { - offset = (fetch >> 15); - matchlen = ((fetch >> 7) & 255) + 3; - src += 4; - } - - offset2 = dst - offset; -#endif - -#ifdef QLZ_MEMORY_SAFE - if(offset2 < history || offset2 > dst - MINOFFSET - 1) - return 0; - - if(matchlen > (ui32)(last_destination_byte - dst - UNCOMPRESSED_END + 1)) - return 0; -#endif - - memcpy_up(dst, offset2, matchlen); - dst += matchlen; - -#if QLZ_COMPRESSION_LEVEL <= 2 - update_hash_upto(state, &last_hashed, dst - matchlen); - last_hashed = dst - 1; -#endif - } - else - { - if (dst < last_matchstart) - { - unsigned int n = bitlut[cword_val & 0xf]; -#ifdef X86X64 - *(ui32 *)dst = *(ui32 *)src; -#else - memcpy_up(dst, src, 4); -#endif - cword_val = cword_val >> n; - dst += n; - src += n; -#if QLZ_COMPRESSION_LEVEL <= 2 - update_hash_upto(state, &last_hashed, dst - 3); -#endif - } - else - { - while(dst <= last_destination_byte) - { - if (cword_val == 1) - { - src += CWORD_LEN; - cword_val = 1U << 31; - } -#ifdef QLZ_MEMORY_SAFE - if(src >= last_source_byte + 1) - return 0; -#endif - *dst = *src; - dst++; - src++; - cword_val = cword_val >> 1; - } - -#if QLZ_COMPRESSION_LEVEL <= 2 - update_hash_upto(state, &last_hashed, last_destination_byte - 3); // todo, use constant -#endif - return size; - } - - } - } -} - -size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state) -{ - size_t r; - ui32 compressed; - size_t base; - - if(size == 0 || size > 0xffffffff - 400) - return 0; - - if(size < 216) - base = 3; - else - base = 9; - -#if QLZ_STREAMING_BUFFER > 0 - if (state->stream_counter + size - 1 >= QLZ_STREAMING_BUFFER) -#endif - { - reset_table_compress(state); - r = base + qlz_compress_core((const unsigned char *)source, (unsigned char*)destination + base, size, state); -#if QLZ_STREAMING_BUFFER > 0 - reset_table_compress(state); -#endif - if(r == base) - { - memcpy(destination + base, source, size); - r = size + base; - compressed = 0; - } - else - { - compressed = 1; - } - state->stream_counter = 0; - } -#if QLZ_STREAMING_BUFFER > 0 - else - { - unsigned char *src = state->stream_buffer + state->stream_counter; - - memcpy(src, source, size); - r = base + qlz_compress_core(src, (unsigned char*)destination + base, size, state); - - if(r == base) - { - memcpy(destination + base, src, size); - r = size + base; - compressed = 0; - reset_table_compress(state); - } - else - { - compressed = 1; - } - state->stream_counter += size; - } -#endif - if(base == 3) - { - *destination = (unsigned char)(0 | compressed); - *(destination + 1) = (unsigned char)r; - *(destination + 2) = (unsigned char)size; - } - else - { - *destination = (unsigned char)(2 | compressed); - fast_write((ui32)r, destination + 1, 4); - fast_write((ui32)size, destination + 5, 4); - } - - *destination |= (QLZ_COMPRESSION_LEVEL << 2); - *destination |= (1 << 6); - *destination |= ((QLZ_STREAMING_BUFFER == 0 ? 0 : (QLZ_STREAMING_BUFFER == 100000 ? 1 : (QLZ_STREAMING_BUFFER == 1000000 ? 2 : 3))) << 4); - -// 76543210 -// 01SSLLHC - - return r; -} - -size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state) -{ - size_t dsiz = qlz_size_decompressed(source); - -#if QLZ_STREAMING_BUFFER > 0 - if (state->stream_counter + qlz_size_decompressed(source) - 1 >= QLZ_STREAMING_BUFFER) -#endif - { - if((*source & 1) == 1) - { - reset_table_decompress(state); - dsiz = qlz_decompress_core((const unsigned char *)source, (unsigned char *)destination, dsiz, state, (const unsigned char *)destination); - } - else - { - memcpy(destination, source + qlz_size_header(source), dsiz); - } - state->stream_counter = 0; - reset_table_decompress(state); - } -#if QLZ_STREAMING_BUFFER > 0 - else - { - unsigned char *dst = state->stream_buffer + state->stream_counter; - if((*source & 1) == 1) - { - dsiz = qlz_decompress_core((const unsigned char *)source, dst, dsiz, state, (const unsigned char *)state->stream_buffer); - } - else - { - memcpy(dst, source + qlz_size_header(source), dsiz); - reset_table_decompress(state); - } - memcpy(destination, dst, dsiz); - state->stream_counter += dsiz; - } -#endif - return dsiz; -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/quicklz.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/quicklz.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/quicklz.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/quicklz.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,233 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef QLZ_HEADER -#define QLZ_HEADER - -// Fast data compression library -// Copyright (C) 2006-2011 Lasse Mikkel Reinhold -// lar@quicklz.com -// -// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything -// released into public must be open source) or under a commercial license if such -// has been acquired (see http://www.quicklz.com/order.html). The commercial license -// does not cover derived or ported versions created by third parties under GPL. - -// You can edit following user settings. Data must be decompressed with the same -// setting of QLZ_COMPRESSION_LEVEL and QLZ_STREAMING_BUFFER as it was compressed -// (see manual). If QLZ_STREAMING_BUFFER > 0, scratch buffers must be initially -// zeroed out (see manual). First #ifndef makes it possible to define settings from -// the outside like the compiler command line. - -// 1.5.0 final - -#ifndef QLZ_COMPRESSION_LEVEL - //#define QLZ_COMPRESSION_LEVEL 1 - //#define QLZ_COMPRESSION_LEVEL 2 - #define QLZ_COMPRESSION_LEVEL 3 - - #define QLZ_STREAMING_BUFFER 0 - //#define QLZ_STREAMING_BUFFER 100000 - //#define QLZ_STREAMING_BUFFER 1000000 - - //#define QLZ_MEMORY_SAFE -#endif - -#define QLZ_VERSION_MAJOR 1 -#define QLZ_VERSION_MINOR 5 -#define QLZ_VERSION_REVISION 0 - -// Using size_t, memset() and memcpy() -#include - -// Verify compression level -#if QLZ_COMPRESSION_LEVEL != 1 && QLZ_COMPRESSION_LEVEL != 2 && QLZ_COMPRESSION_LEVEL != 3 -#error QLZ_COMPRESSION_LEVEL must be 1, 2 or 3 -#endif - -typedef unsigned int ui32; -typedef unsigned short int ui16; - -// Decrease QLZ_POINTERS for level 3 to increase compression speed. Do not touch any other values! -#if QLZ_COMPRESSION_LEVEL == 1 -#define QLZ_POINTERS 1 -#define QLZ_HASH_VALUES 4096 -#elif QLZ_COMPRESSION_LEVEL == 2 -#define QLZ_POINTERS 4 -#define QLZ_HASH_VALUES 2048 -#elif QLZ_COMPRESSION_LEVEL == 3 -#define QLZ_POINTERS 16 -#define QLZ_HASH_VALUES 4096 -#endif - -// Detect if pointer size is 64-bit. It's not fatal if some 64-bit target is not detected because this is only for adding an optional 64-bit optimization. -#if defined _LP64 || defined __LP64__ || defined __64BIT__ || _ADDR64 || defined _WIN64 || defined __arch64__ || __WORDSIZE == 64 || (defined __sparc && defined __sparcv9) || defined __x86_64 || defined __amd64 || defined __x86_64__ || defined _M_X64 || defined _M_IA64 || defined __ia64 || defined __IA64__ - #define QLZ_PTR_64 -#endif - -// hash entry -typedef struct -{ -#if QLZ_COMPRESSION_LEVEL == 1 - ui32 cache; -#if defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0 - unsigned int offset; -#else - const unsigned char *offset; -#endif -#else - const unsigned char *offset[QLZ_POINTERS]; -#endif - -} qlz_hash_compress; - -typedef struct -{ -#if QLZ_COMPRESSION_LEVEL == 1 - const unsigned char *offset; -#else - const unsigned char *offset[QLZ_POINTERS]; -#endif -} qlz_hash_decompress; - - -// states -typedef struct -{ - #if QLZ_STREAMING_BUFFER > 0 - unsigned char stream_buffer[QLZ_STREAMING_BUFFER]; - #endif - size_t stream_counter; - qlz_hash_compress hash[QLZ_HASH_VALUES]; - unsigned char hash_counter[QLZ_HASH_VALUES]; -} qlz_state_compress; - - -#if QLZ_COMPRESSION_LEVEL == 1 || QLZ_COMPRESSION_LEVEL == 2 - typedef struct - { -#if QLZ_STREAMING_BUFFER > 0 - unsigned char stream_buffer[QLZ_STREAMING_BUFFER]; -#endif - qlz_hash_decompress hash[QLZ_HASH_VALUES]; - unsigned char hash_counter[QLZ_HASH_VALUES]; - size_t stream_counter; - } qlz_state_decompress; -#elif QLZ_COMPRESSION_LEVEL == 3 - typedef struct - { -#if QLZ_STREAMING_BUFFER > 0 - unsigned char stream_buffer[QLZ_STREAMING_BUFFER]; -#endif -#if QLZ_COMPRESSION_LEVEL <= 2 - qlz_hash_decompress hash[QLZ_HASH_VALUES]; -#endif - size_t stream_counter; - } qlz_state_decompress; -#endif - - -#if defined (__cplusplus) -extern "C" { -#endif - -// Public functions of QuickLZ -size_t qlz_size_decompressed(const char *source); -size_t qlz_size_compressed(const char *source); -size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state); -size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state); -int qlz_get_setting(int setting); - -#if defined (__cplusplus) -} -#endif - -#endif - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rbuf.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rbuf.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rbuf.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rbuf.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,280 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef RBUF_H -#define RBUF_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include "toku_assert.h" -#include "fttypes.h" -#include "memory.h" -#include - -#include - -struct rbuf { - unsigned char *buf; - unsigned int size; - unsigned int ndone; -}; -#define RBUF_INITIALIZER ((struct rbuf){.buf = NULL, .size=0, .ndone=0}) - -static inline void rbuf_init(struct rbuf *r, unsigned char *buf, unsigned int size) { - r->buf = buf; - r->size = size; - r->ndone = 0; -} - -static inline unsigned int rbuf_get_roffset(struct rbuf *r) { - return r->ndone; -} - -static inline unsigned char rbuf_char (struct rbuf *r) { - assert(r->ndonesize); - return r->buf[r->ndone++]; -} - -static inline void rbuf_ma_uint8_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint8_t *num) { - *num = rbuf_char(r); -} - -static inline void rbuf_ma_bool (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), bool *b) { - uint8_t n = rbuf_char(r); - *b = (n!=0); -} - -//Read an int that MUST be in network order regardless of disk order -static unsigned int rbuf_network_int (struct rbuf *r) __attribute__((__unused__)); -static unsigned int rbuf_network_int (struct rbuf *r) { - assert(r->ndone+4 <= r->size); - uint32_t result = toku_ntohl(*(uint32_t*)(r->buf+r->ndone)); // This only works on machines where unaligned loads are OK. - r->ndone+=4; - return result; -} - -static unsigned int rbuf_int (struct rbuf *r) { -#if 1 - assert(r->ndone+4 <= r->size); - uint32_t result = toku_dtoh32(*(uint32_t*)(r->buf+r->ndone)); // This only works on machines where unaligned loads are OK. - r->ndone+=4; - return result; -#else - unsigned char c0 = rbuf_char(r); - unsigned char c1 = rbuf_char(r); - unsigned char c2 = rbuf_char(r); - unsigned char c3 = rbuf_char(r); - return ((c0<<24)| - (c1<<16)| - (c2<<8)| - (c3<<0)); -#endif -} - -static inline void rbuf_literal_bytes (struct rbuf *r, bytevec *bytes, unsigned int n_bytes) { - *bytes = &r->buf[r->ndone]; - r->ndone+=n_bytes; - assert(r->ndone<=r->size); -} - -/* Return a pointer into the middle of the buffer. */ -static inline void rbuf_bytes (struct rbuf *r, bytevec *bytes, unsigned int *n_bytes) -{ - *n_bytes = rbuf_int(r); - rbuf_literal_bytes(r, bytes, *n_bytes); -} - -static inline unsigned long long rbuf_ulonglong (struct rbuf *r) { - unsigned i0 = rbuf_int(r); - unsigned i1 = rbuf_int(r); - return ((unsigned long long)(i0)<<32) | ((unsigned long long)(i1)); -} - -static inline signed long long rbuf_longlong (struct rbuf *r) { - return (signed long long)rbuf_ulonglong(r); -} - -static inline DISKOFF rbuf_diskoff (struct rbuf *r) { - return rbuf_ulonglong(r); -} - -static inline LSN rbuf_lsn (struct rbuf *r) { - LSN lsn = {rbuf_ulonglong(r)}; - return lsn; -} - -static inline MSN rbuf_msn (struct rbuf *r) { - MSN msn = {rbuf_ulonglong(r)}; - return msn; -} - -static inline BLOCKNUM rbuf_blocknum (struct rbuf *r) { - BLOCKNUM result = make_blocknum(rbuf_longlong(r)); - return result; -} -static inline void rbuf_ma_BLOCKNUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), BLOCKNUM *blocknum) { - *blocknum = rbuf_blocknum(r); -} - -static inline void rbuf_ma_uint32_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint32_t *num) { - *num = rbuf_int(r); -} - -static inline void rbuf_ma_uint64_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint64_t *num) { - *num = rbuf_ulonglong(r); -} - - -static inline void rbuf_TXNID (struct rbuf *r, TXNID *txnid) { - *txnid = rbuf_ulonglong(r); -} - -static inline void rbuf_TXNID_PAIR (struct rbuf *r, TXNID_PAIR *txnid) { - txnid->parent_id64 = rbuf_ulonglong(r); - txnid->child_id64 = rbuf_ulonglong(r); -} - -static inline void rbuf_ma_TXNID (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID *txnid) { - rbuf_TXNID(r, txnid); -} - -static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID_PAIR *txnid) { - rbuf_TXNID_PAIR(r, txnid); -} - -static inline void rbuf_FILENUM (struct rbuf *r, FILENUM *filenum) { - filenum->fileid = rbuf_int(r); -} -static inline void rbuf_ma_FILENUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), FILENUM *filenum) { - rbuf_FILENUM(r, filenum); -} - -// 2954 -// Don't try to use the same space, malloc it -static inline void rbuf_FILENUMS(struct rbuf *r, FILENUMS *filenums) { - filenums->num = rbuf_int(r); - filenums->filenums = (FILENUM *) toku_malloc( filenums->num * sizeof(FILENUM) ); - assert(filenums->filenums != NULL); - for (uint32_t i=0; i < filenums->num; i++) { - rbuf_FILENUM(r, &(filenums->filenums[i])); - } -} - -// 2954 -static inline void rbuf_ma_FILENUMS (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), FILENUMS *filenums) { - rbuf_ma_uint32_t(r, ma, &(filenums->num)); - filenums->filenums = (FILENUM *) toku_memarena_malloc(ma, filenums->num * sizeof(FILENUM) ); - assert(filenums->filenums != NULL); - for (uint32_t i=0; i < filenums->num; i++) { - rbuf_ma_FILENUM(r, ma, &(filenums->filenums[i])); - } -} - -// Don't try to use the same space, malloc it -static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) { - bs->len = rbuf_int(r); - uint32_t newndone = r->ndone + bs->len; - assert(newndone <= r->size); - bs->data = (char *) toku_memdup(&r->buf[r->ndone], (size_t)bs->len); - assert(bs->data); - r->ndone = newndone; -} - -static inline void rbuf_ma_BYTESTRING (struct rbuf *r, MEMARENA ma, BYTESTRING *bs) { - bs->len = rbuf_int(r); - uint32_t newndone = r->ndone + bs->len; - assert(newndone <= r->size); - bs->data = (char *) toku_memarena_memdup(ma, &r->buf[r->ndone], (size_t)bs->len); - assert(bs->data); - r->ndone = newndone; -} - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/recover.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/recover.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/recover.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/recover.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,1678 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include - -#include "ft.h" -#include "log-internal.h" -#include "logcursor.h" -#include "cachetable.h" -#include "checkpoint.h" -#include "txn_manager.h" - -#include - -int tokudb_recovery_trace = 0; // turn on recovery tracing, default off. - -//#define DO_VERIFY_COUNTS -#ifdef DO_VERIFY_COUNTS -#define VERIFY_COUNTS(n) toku_verify_or_set_counts(n, false) -#else -#define VERIFY_COUNTS(n) ((void)0) -#endif - -// time in seconds between recovery progress reports -#define TOKUDB_RECOVERY_PROGRESS_TIME 15 - -enum ss { - BACKWARD_NEWER_CHECKPOINT_END = 1, - BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END, - FORWARD_BETWEEN_CHECKPOINT_BEGIN_END, - FORWARD_NEWER_CHECKPOINT_END, -}; - -struct scan_state { - enum ss ss; - LSN checkpoint_begin_lsn; - LSN checkpoint_end_lsn; - uint64_t checkpoint_end_timestamp; - uint64_t checkpoint_begin_timestamp; - uint32_t checkpoint_num_fassociate; - uint32_t checkpoint_num_xstillopen; - TXNID last_xid; -}; - -static const char *scan_state_strings[] = { - "?", "bw_newer", "bw_between", "fw_between", "fw_newer", -}; - -static void scan_state_init(struct scan_state *ss) { - ss->ss = BACKWARD_NEWER_CHECKPOINT_END; - ss->checkpoint_begin_lsn = ZERO_LSN; - ss->checkpoint_end_lsn = ZERO_LSN; - ss->checkpoint_num_fassociate = 0; - ss->checkpoint_num_xstillopen = 0; - ss->last_xid = 0; -} - -static const char *scan_state_string(struct scan_state *ss) { - assert(BACKWARD_NEWER_CHECKPOINT_END <= ss->ss && ss->ss <= FORWARD_NEWER_CHECKPOINT_END); - return scan_state_strings[ss->ss]; -} - -// File map tuple -struct file_map_tuple { - FILENUM filenum; - FT_HANDLE ft_handle; // NULL ft_handle means it's a rollback file. - char *iname; - struct __toku_db fake_db; -}; - -static void file_map_tuple_init(struct file_map_tuple *tuple, FILENUM filenum, FT_HANDLE ft_handle, char *iname) { - tuple->filenum = filenum; - tuple->ft_handle = ft_handle; - tuple->iname = iname; - // use a fake DB for comparisons, using the ft's cmp descriptor - memset(&tuple->fake_db, 0, sizeof(tuple->fake_db)); - tuple->fake_db.cmp_descriptor = &tuple->ft_handle->ft->cmp_descriptor; - tuple->fake_db.descriptor = &tuple->ft_handle->ft->descriptor; -} - -static void file_map_tuple_destroy(struct file_map_tuple *tuple) { - if (tuple->iname) { - toku_free(tuple->iname); - tuple->iname = NULL; - } -} - -// Map filenum to ft_handle -struct file_map { - toku::omt *filenums; -}; - -// The recovery environment -struct recover_env { - DB_ENV *env; - prepared_txn_callback_t prepared_txn_callback; // at the end of recovery, all the prepared txns are passed back to the ydb layer to make them into valid transactions. - keep_cachetable_callback_t keep_cachetable_callback; // after recovery, store the cachetable into the environment. - CACHETABLE ct; - TOKULOGGER logger; - CHECKPOINTER cp; - ft_compare_func bt_compare; - ft_update_func update_function; - generate_row_for_put_func generate_row_for_put; - generate_row_for_del_func generate_row_for_del; - DBT_ARRAY dest_keys; - DBT_ARRAY dest_vals; - struct scan_state ss; - struct file_map fmap; - bool goforward; - bool destroy_logger_at_end; // If true then destroy the logger when we are done. If false then set the logger into write-files mode when we are done with recovery.*/ -}; -typedef struct recover_env *RECOVER_ENV; - - -static void file_map_init(struct file_map *fmap) { - XMALLOC(fmap->filenums); - fmap->filenums->create(); -} - -static void file_map_destroy(struct file_map *fmap) { - fmap->filenums->destroy(); - toku_free(fmap->filenums); - fmap->filenums = nullptr; -} - -static uint32_t file_map_get_num_dictionaries(struct file_map *fmap) { - return fmap->filenums->size(); -} - -static void file_map_close_dictionaries(struct file_map *fmap, LSN oplsn) { - int r; - - while (1) { - uint32_t n = fmap->filenums->size(); - if (n == 0) { - break; - } - struct file_map_tuple *tuple; - r = fmap->filenums->fetch(n - 1, &tuple); - assert(r == 0); - r = fmap->filenums->delete_at(n - 1); - assert(r == 0); - assert(tuple->ft_handle); - // Logging is on again, but we must pass the right LSN into close. - if (tuple->ft_handle) { // it's a DB, not a rollback file - toku_ft_handle_close_recovery(tuple->ft_handle, oplsn); - } - file_map_tuple_destroy(tuple); - toku_free(tuple); - } -} - -static int file_map_h(struct file_map_tuple *const &a, const FILENUM &b) { - if (a->filenum.fileid < b.fileid) { - return -1; - } else if (a->filenum.fileid > b.fileid) { - return 1; - } else { - return 0; - } -} - -static int file_map_insert (struct file_map *fmap, FILENUM fnum, FT_HANDLE ft_handle, char *iname) { - struct file_map_tuple *XMALLOC(tuple); - file_map_tuple_init(tuple, fnum, ft_handle, iname); - int r = fmap->filenums->insert(tuple, fnum, nullptr); - return r; -} - -static void file_map_remove(struct file_map *fmap, FILENUM fnum) { - uint32_t idx; - struct file_map_tuple *tuple; - int r = fmap->filenums->find_zero(fnum, &tuple, &idx); - if (r == 0) { - r = fmap->filenums->delete_at(idx); - file_map_tuple_destroy(tuple); - toku_free(tuple); - } -} - -// Look up file info: given FILENUM, return file_map_tuple (or DB_NOTFOUND) -static int file_map_find(struct file_map *fmap, FILENUM fnum, struct file_map_tuple **file_map_tuple) { - uint32_t idx; - struct file_map_tuple *tuple; - int r = fmap->filenums->find_zero(fnum, &tuple, &idx); - if (r == 0) { - assert(tuple->filenum.fileid == fnum.fileid); - *file_map_tuple = tuple; - } else { - assert(r == DB_NOTFOUND); - } - return r; -} - -static int recover_env_init (RECOVER_ENV renv, - const char *env_dir, - DB_ENV *env, - prepared_txn_callback_t prepared_txn_callback, - keep_cachetable_callback_t keep_cachetable_callback, - TOKULOGGER logger, - ft_compare_func bt_compare, - ft_update_func update_function, - generate_row_for_put_func generate_row_for_put, - generate_row_for_del_func generate_row_for_del, - size_t cachetable_size) { - int r = 0; - - // If we are passed a logger use it, otherwise create one. - renv->destroy_logger_at_end = logger==NULL; - if (logger) { - renv->logger = logger; - } else { - r = toku_logger_create(&renv->logger); - assert(r == 0); - } - toku_logger_write_log_files(renv->logger, false); - toku_cachetable_create(&renv->ct, cachetable_size ? cachetable_size : 1<<25, (LSN){0}, renv->logger); - toku_cachetable_set_env_dir(renv->ct, env_dir); - if (keep_cachetable_callback) keep_cachetable_callback(env, renv->ct); - toku_logger_set_cachetable(renv->logger, renv->ct); - renv->env = env; - renv->prepared_txn_callback = prepared_txn_callback; - renv->keep_cachetable_callback = keep_cachetable_callback; - renv->bt_compare = bt_compare; - renv->update_function = update_function; - renv->generate_row_for_put = generate_row_for_put; - renv->generate_row_for_del = generate_row_for_del; - file_map_init(&renv->fmap); - renv->goforward = false; - renv->cp = toku_cachetable_get_checkpointer(renv->ct); - toku_dbt_array_init(&renv->dest_keys, 1); - toku_dbt_array_init(&renv->dest_vals, 1); - if (tokudb_recovery_trace) - fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__); - return r; -} - -static void recover_env_cleanup (RECOVER_ENV renv) { - int r; - - invariant_zero(renv->fmap.filenums->size()); - file_map_destroy(&renv->fmap); - - if (renv->destroy_logger_at_end) { - toku_logger_close_rollback(renv->logger); - r = toku_logger_close(&renv->logger); - assert(r == 0); - } else { - toku_logger_write_log_files(renv->logger, true); - } - - if (renv->keep_cachetable_callback) { - renv->ct = NULL; - } else { - toku_cachetable_close(&renv->ct); - } - toku_dbt_array_destroy(&renv->dest_keys); - toku_dbt_array_destroy(&renv->dest_vals); - - if (tokudb_recovery_trace) - fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__); -} - -static const char *recover_state(RECOVER_ENV renv) { - return scan_state_string(&renv->ss); -} - -// Open the file if it is not already open. If it is already open, then do nothing. -static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, bool must_create, int UU(mode), BYTESTRING *bs_iname, FILENUM filenum, uint32_t treeflags, - TOKUTXN txn, uint32_t nodesize, uint32_t basementnodesize, enum toku_compression_method compression_method, LSN max_acceptable_lsn) { - int r = 0; - FT_HANDLE ft_handle = NULL; - char *iname = fixup_fname(bs_iname); - - toku_ft_handle_create(&ft_handle); - toku_ft_set_flags(ft_handle, treeflags); - - if (nodesize != 0) { - toku_ft_handle_set_nodesize(ft_handle, nodesize); - } - - if (basementnodesize != 0) { - toku_ft_handle_set_basementnodesize(ft_handle, basementnodesize); - } - - if (compression_method != TOKU_DEFAULT_COMPRESSION_METHOD) { - toku_ft_handle_set_compression_method(ft_handle, compression_method); - } - - // set the key compare functions - if (!(treeflags & TOKU_DB_KEYCMP_BUILTIN) && renv->bt_compare) { - toku_ft_set_bt_compare(ft_handle, renv->bt_compare); - } - - if (renv->update_function) { - toku_ft_set_update(ft_handle, renv->update_function); - } - - // TODO mode (FUTURE FEATURE) - //mode = mode; - - r = toku_ft_handle_open_recovery(ft_handle, iname, must_create, must_create, renv->ct, txn, filenum, max_acceptable_lsn); - if (r != 0) { - //Note: If ft_handle_open fails, then close_ft will NOT write a header to disk. - //No need to provide lsn, so use the regular toku_ft_handle_close function - toku_ft_handle_close(ft_handle); - toku_free(iname); - if (r == ENOENT) //Not an error to simply be missing. - r = 0; - return r; - } - - file_map_insert(&renv->fmap, filenum, ft_handle, iname); - return 0; -} - -static int toku_recover_begin_checkpoint (struct logtype_begin_checkpoint *l, RECOVER_ENV renv) { - int r; - TXN_MANAGER mgr = toku_logger_get_txn_manager(renv->logger); - switch (renv->ss.ss) { - case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: - assert(l->lsn.lsn == renv->ss.checkpoint_begin_lsn.lsn); - invariant(renv->ss.last_xid == TXNID_NONE); - renv->ss.last_xid = l->last_xid; - toku_txn_manager_set_last_xid_from_recovered_checkpoint(mgr, l->last_xid); - - r = 0; - break; - case FORWARD_NEWER_CHECKPOINT_END: - assert(l->lsn.lsn > renv->ss.checkpoint_end_lsn.lsn); - // Verify last_xid is no older than the previous begin - invariant(l->last_xid >= renv->ss.last_xid); - // Verify last_xid is no older than the newest txn - invariant(l->last_xid >= toku_txn_manager_get_last_xid(mgr)); - - r = 0; // ignore it (log only has a begin checkpoint) - break; - default: - fprintf(stderr, "Tokudb recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); - abort(); - break; - } - return r; -} - -static int toku_recover_backward_begin_checkpoint (struct logtype_begin_checkpoint *l, RECOVER_ENV renv) { - int r; - time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery bw_begin_checkpoint at %" PRIu64 " timestamp %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, recover_state(renv)); - switch (renv->ss.ss) { - case BACKWARD_NEWER_CHECKPOINT_END: - // incomplete checkpoint, nothing to do - r = 0; - break; - case BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END: - assert(l->lsn.lsn == renv->ss.checkpoint_begin_lsn.lsn); - renv->ss.ss = FORWARD_BETWEEN_CHECKPOINT_BEGIN_END; - renv->ss.checkpoint_begin_timestamp = l->timestamp; - renv->goforward = true; - tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery turning around at begin checkpoint %" PRIu64 " time %" PRIu64 "\n", - ctime(&tnow), l->lsn.lsn, - renv->ss.checkpoint_end_timestamp - renv->ss.checkpoint_begin_timestamp); - r = 0; - break; - default: - fprintf(stderr, "Tokudb recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); - abort(); - break; - } - return r; -} - -static int toku_recover_end_checkpoint (struct logtype_end_checkpoint *l, RECOVER_ENV renv) { - int r; - switch (renv->ss.ss) { - case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: - assert(l->lsn_begin_checkpoint.lsn == renv->ss.checkpoint_begin_lsn.lsn); - assert(l->lsn.lsn == renv->ss.checkpoint_end_lsn.lsn); - assert(l->num_fassociate_entries == renv->ss.checkpoint_num_fassociate); - assert(l->num_xstillopen_entries == renv->ss.checkpoint_num_xstillopen); - renv->ss.ss = FORWARD_NEWER_CHECKPOINT_END; - r = 0; - break; - case FORWARD_NEWER_CHECKPOINT_END: - assert(0); - return 0; - default: - assert(0); - return 0; - } - return r; -} - -static int toku_recover_backward_end_checkpoint (struct logtype_end_checkpoint *l, RECOVER_ENV renv) { - time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery bw_end_checkpoint at %" PRIu64 " timestamp %" PRIu64 " xid %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, l->lsn_begin_checkpoint.lsn, recover_state(renv)); - switch (renv->ss.ss) { - case BACKWARD_NEWER_CHECKPOINT_END: - renv->ss.ss = BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END; - renv->ss.checkpoint_begin_lsn.lsn = l->lsn_begin_checkpoint.lsn; - renv->ss.checkpoint_end_lsn.lsn = l->lsn.lsn; - renv->ss.checkpoint_end_timestamp = l->timestamp; - return 0; - case BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END: - fprintf(stderr, "Tokudb recovery %s:%d Should not see two end_checkpoint log entries without an intervening begin_checkpoint\n", __FILE__, __LINE__); - abort(); - default: - break; - } - fprintf(stderr, "Tokudb recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); - abort(); -} - -static int toku_recover_fassociate (struct logtype_fassociate *l, RECOVER_ENV renv) { - struct file_map_tuple *tuple = NULL; - int r = file_map_find(&renv->fmap, l->filenum, &tuple); - char *fname = fixup_fname(&l->iname); - switch (renv->ss.ss) { - case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: - renv->ss.checkpoint_num_fassociate++; - assert(r==DB_NOTFOUND); //Not open - // Open it if it exists. - // If rollback file, specify which checkpointed version of file we need (not just the latest) - // because we cannot use a rollback log that is later than the last complete checkpoint. See #3113. - { - bool rollback_file = (0==strcmp(fname, toku_product_name_strings.rollback_cachefile)); - LSN max_acceptable_lsn = MAX_LSN; - if (rollback_file) { - max_acceptable_lsn = renv->ss.checkpoint_begin_lsn; - FT_HANDLE t; - toku_ft_handle_create(&t); - r = toku_ft_handle_open_recovery(t, toku_product_name_strings.rollback_cachefile, false, false, renv->ct, (TOKUTXN)NULL, l->filenum, max_acceptable_lsn); - renv->logger->rollback_cachefile = t->ft->cf; - toku_logger_initialize_rollback_cache(renv->logger, t->ft); - } else { - r = internal_recover_fopen_or_fcreate(renv, false, 0, &l->iname, l->filenum, l->treeflags, NULL, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, max_acceptable_lsn); - assert(r==0); - } - } - // try to open the file again and if we get it, restore - // the unlink on close bit. - int ret; - ret = file_map_find(&renv->fmap, l->filenum, &tuple); - if (ret == 0 && l->unlink_on_close) { - toku_cachefile_unlink_on_close(tuple->ft_handle->ft->cf); - } - break; - case FORWARD_NEWER_CHECKPOINT_END: - if (r == 0) { //IF it is open - // assert that the filenum maps to the correct iname - assert(strcmp(fname, tuple->iname) == 0); - } - r = 0; - break; - default: - assert(0); - return 0; - } - toku_free(fname); - - return r; -} - -static int toku_recover_backward_fassociate (struct logtype_fassociate *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int -recover_transaction(TOKUTXN *txnp, TXNID_PAIR xid, TXNID_PAIR parentxid, TOKULOGGER logger) { - int r; - - // lookup the parent - TOKUTXN parent = NULL; - if (!txn_pair_is_none(parentxid)) { - toku_txnid2txn(logger, parentxid, &parent); - assert(parent!=NULL); - } - else { - invariant(xid.child_id64 == TXNID_NONE); - } - - // create a transaction and bind it to the transaction id - TOKUTXN txn = NULL; - { - //Verify it does not yet exist. - toku_txnid2txn(logger, xid, &txn); - assert(txn==NULL); - } - r = toku_txn_begin_with_xid( - parent, - &txn, - logger, - xid, - TXN_SNAPSHOT_NONE, - NULL, - true, // for_recovery - false // read_only - ); - assert(r == 0); - // We only know about it because it was logged. Restore the log bit. - // Logging is 'off' but it will still set the bit. - toku_maybe_log_begin_txn_for_write_operation(txn); - if (txnp) *txnp = txn; - return 0; -} - -static int recover_xstillopen_internal (TOKUTXN *txnp, - LSN UU(lsn), - TXNID_PAIR xid, - TXNID_PAIR parentxid, - uint64_t rollentry_raw_count, - FILENUMS open_filenums, - bool force_fsync_on_commit, - uint64_t num_rollback_nodes, - uint64_t num_rollentries, - BLOCKNUM spilled_rollback_head, - BLOCKNUM spilled_rollback_tail, - BLOCKNUM current_rollback, - uint32_t UU(crc), - uint32_t UU(len), - RECOVER_ENV renv) { - int r; - *txnp = NULL; - switch (renv->ss.ss) { - case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: { - renv->ss.checkpoint_num_xstillopen++; - invariant(renv->ss.last_xid != TXNID_NONE); - invariant(xid.parent_id64 <= renv->ss.last_xid); - TOKUTXN txn = NULL; - { //Create the transaction. - r = recover_transaction(&txn, xid, parentxid, renv->logger); - assert(r==0); - assert(txn!=NULL); - *txnp = txn; - } - { //Recover rest of transaction. -#define COPY_TO_INFO(field) .field = field - struct txninfo info = { - COPY_TO_INFO(rollentry_raw_count), - .num_fts = 0, //Set afterwards - .open_fts = NULL, //Set afterwards - COPY_TO_INFO(force_fsync_on_commit), - COPY_TO_INFO(num_rollback_nodes), - COPY_TO_INFO(num_rollentries), - COPY_TO_INFO(spilled_rollback_head), - COPY_TO_INFO(spilled_rollback_tail), - COPY_TO_INFO(current_rollback) - }; -#undef COPY_TO_INFO - //Generate open_fts - FT array[open_filenums.num]; //Allocate maximum possible requirement - info.open_fts = array; - uint32_t i; - for (i = 0; i < open_filenums.num; i++) { - //open_filenums.filenums[] - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, open_filenums.filenums[i], &tuple); - if (r==0) { - info.open_fts[info.num_fts++] = tuple->ft_handle->ft; - } - else { - assert(r==DB_NOTFOUND); - } - } - r = toku_txn_load_txninfo(txn, &info); - assert(r==0); - } - break; - } - case FORWARD_NEWER_CHECKPOINT_END: { - // assert that the transaction exists - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, xid, &txn); - r = 0; - *txnp = txn; - break; - } - default: - assert(0); - return 0; - } - return r; -} - -static int toku_recover_xstillopen (struct logtype_xstillopen *l, RECOVER_ENV renv) { - TOKUTXN txn; - return recover_xstillopen_internal (&txn, - l->lsn, - l->xid, - l->parentxid, - l->rollentry_raw_count, - l->open_filenums, - l->force_fsync_on_commit, - l->num_rollback_nodes, - l->num_rollentries, - l->spilled_rollback_head, - l->spilled_rollback_tail, - l->current_rollback, - l->crc, - l->len, - renv); -} - -static int toku_recover_xstillopenprepared (struct logtype_xstillopenprepared *l, RECOVER_ENV renv) { - TOKUTXN txn; - int r = recover_xstillopen_internal (&txn, - l->lsn, - l->xid, - TXNID_PAIR_NONE, - l->rollentry_raw_count, - l->open_filenums, - l->force_fsync_on_commit, - l->num_rollback_nodes, - l->num_rollentries, - l->spilled_rollback_head, - l->spilled_rollback_tail, - l->current_rollback, - l->crc, - l->len, - renv); - if (r != 0) { - goto exit; - } - switch (renv->ss.ss) { - case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: { - toku_txn_prepare_txn(txn, l->xa_xid); - break; - } - case FORWARD_NEWER_CHECKPOINT_END: { - assert(txn->state == TOKUTXN_PREPARING); - break; - } - default: { - assert(0); - } - } -exit: - return r; -} - -static int toku_recover_backward_xstillopen (struct logtype_xstillopen *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} -static int toku_recover_backward_xstillopenprepared (struct logtype_xstillopenprepared *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_xbegin (struct logtype_xbegin *l, RECOVER_ENV renv) { - int r; - r = recover_transaction(NULL, l->xid, l->parentxid, renv->logger); - return r; -} - -static int toku_recover_backward_xbegin (struct logtype_xbegin *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_xcommit (struct logtype_xcommit *l, RECOVER_ENV renv) { - // find the transaction by transaction id - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn!=NULL); - - // commit the transaction - int r = toku_txn_commit_with_lsn(txn, true, l->lsn, - NULL, NULL); - assert(r == 0); - - // close the transaction - toku_txn_close_txn(txn); - - return 0; -} - -static int toku_recover_backward_xcommit (struct logtype_xcommit *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_xprepare (struct logtype_xprepare *l, RECOVER_ENV renv) { - // find the transaction by transaction id - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn!=NULL); - - // Save the transaction - toku_txn_prepare_txn(txn, l->xa_xid); - - return 0; -} - -static int toku_recover_backward_xprepare (struct logtype_xprepare *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - - - -static int toku_recover_xabort (struct logtype_xabort *l, RECOVER_ENV renv) { - int r; - - // find the transaction by transaction id - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn!=NULL); - - // abort the transaction - r = toku_txn_abort_with_lsn(txn, l->lsn, NULL, NULL); - assert(r == 0); - - // close the transaction - toku_txn_close_txn(txn); - - return 0; -} - -static int toku_recover_backward_xabort (struct logtype_xabort *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -// fcreate is like fopen except that the file must be created. -static int toku_recover_fcreate (struct logtype_fcreate *l, RECOVER_ENV renv) { - int r; - - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - - // assert that filenum is closed - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->filenum, &tuple); - assert(r==DB_NOTFOUND); - - assert(txn!=NULL); - - //unlink if it exists (recreate from scratch). - char *iname = fixup_fname(&l->iname); - char *iname_in_cwd = toku_cachetable_get_fname_in_cwd(renv->ct, iname); - r = unlink(iname_in_cwd); - if (r != 0) { - int er = get_error_errno(); - if (er != ENOENT) { - fprintf(stderr, "Tokudb recovery %s:%d unlink %s %d\n", __FUNCTION__, __LINE__, iname, er); - toku_free(iname); - return r; - } - } - assert(0!=strcmp(iname, toku_product_name_strings.rollback_cachefile)); //Creation of rollback cachefile never gets logged. - toku_free(iname_in_cwd); - toku_free(iname); - - bool must_create = true; - r = internal_recover_fopen_or_fcreate(renv, must_create, l->mode, &l->iname, l->filenum, l->treeflags, txn, l->nodesize, l->basementnodesize, (enum toku_compression_method) l->compression_method, MAX_LSN); - return r; -} - -static int toku_recover_backward_fcreate (struct logtype_fcreate *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - - - -static int toku_recover_fopen (struct logtype_fopen *l, RECOVER_ENV renv) { - int r; - - // assert that filenum is closed - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->filenum, &tuple); - assert(r==DB_NOTFOUND); - - bool must_create = false; - TOKUTXN txn = NULL; - char *fname = fixup_fname(&l->iname); - - assert(0!=strcmp(fname, toku_product_name_strings.rollback_cachefile)); //Rollback cachefile can be opened only via fassociate. - r = internal_recover_fopen_or_fcreate(renv, must_create, 0, &l->iname, l->filenum, l->treeflags, txn, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, MAX_LSN); - - toku_free(fname); - return r; -} - -static int toku_recover_backward_fopen (struct logtype_fopen *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_change_fdescriptor (struct logtype_change_fdescriptor *l, RECOVER_ENV renv) { - int r; - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->filenum, &tuple); - if (r==0) { - TOKUTXN txn = NULL; - //Maybe do the descriptor (lsn filter) - toku_txnid2txn(renv->logger, l->xid, &txn); - DBT old_descriptor, new_descriptor; - toku_fill_dbt( - &old_descriptor, - l->old_descriptor.data, - l->old_descriptor.len - ); - toku_fill_dbt( - &new_descriptor, - l->new_descriptor.data, - l->new_descriptor.len - ); - toku_ft_change_descriptor( - tuple->ft_handle, - &old_descriptor, - &new_descriptor, - false, - txn, - l->update_cmp_descriptor - ); - } - return 0; -} - -static int toku_recover_backward_change_fdescriptor (struct logtype_change_fdescriptor *UU(l), RECOVER_ENV UU(renv)) { - return 0; -} - - -// if file referred to in l is open, close it -static int toku_recover_fclose (struct logtype_fclose *l, RECOVER_ENV renv) { - struct file_map_tuple *tuple = NULL; - int r = file_map_find(&renv->fmap, l->filenum, &tuple); - if (r == 0) { // if file is open - char *iname = fixup_fname(&l->iname); - assert(strcmp(tuple->iname, iname) == 0); // verify that file_map has same iname as log entry - - if (0!=strcmp(iname, toku_product_name_strings.rollback_cachefile)) { - //Rollback cachefile is closed manually at end of recovery, not here - toku_ft_handle_close_recovery(tuple->ft_handle, l->lsn); - } - file_map_remove(&renv->fmap, l->filenum); - toku_free(iname); - } - return 0; -} - -static int toku_recover_backward_fclose (struct logtype_fclose *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -// fdelete is a transactional file delete. -static int toku_recover_fdelete (struct logtype_fdelete *l, RECOVER_ENV renv) { - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn != NULL); - - // if the forward scan in recovery found this file and opened it, we - // need to mark the txn to remove the ft on commit. if the file was - // not found and not opened, we don't need to do anything - the ft - // is already gone, so we're happy. - struct file_map_tuple *tuple; - int r = file_map_find(&renv->fmap, l->filenum, &tuple); - if (r == 0) { - toku_ft_unlink_on_commit(tuple->ft_handle, txn); - } - return 0; -} - -static int toku_recover_backward_fdelete (struct logtype_fdelete *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_enq_insert (struct logtype_enq_insert *l, RECOVER_ENV renv) { - int r; - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn!=NULL); - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->filenum, &tuple); - if (r==0) { - //Maybe do the insertion if we found the cachefile. - DBT keydbt, valdbt; - toku_fill_dbt(&keydbt, l->key.data, l->key.len); - toku_fill_dbt(&valdbt, l->value.data, l->value.len); - toku_ft_maybe_insert(tuple->ft_handle, &keydbt, &valdbt, txn, true, l->lsn, false, FT_INSERT); - toku_txn_maybe_note_ft(txn, tuple->ft_handle->ft); - } - return 0; -} - -static int toku_recover_backward_enq_insert (struct logtype_enq_insert *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_enq_insert_no_overwrite (struct logtype_enq_insert_no_overwrite *l, RECOVER_ENV renv) { - int r; - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn!=NULL); - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->filenum, &tuple); - if (r==0) { - //Maybe do the insertion if we found the cachefile. - DBT keydbt, valdbt; - toku_fill_dbt(&keydbt, l->key.data, l->key.len); - toku_fill_dbt(&valdbt, l->value.data, l->value.len); - toku_ft_maybe_insert(tuple->ft_handle, &keydbt, &valdbt, txn, true, l->lsn, false, FT_INSERT_NO_OVERWRITE); - } - return 0; -} - -static int toku_recover_backward_enq_insert_no_overwrite (struct logtype_enq_insert_no_overwrite *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_enq_delete_any (struct logtype_enq_delete_any *l, RECOVER_ENV renv) { - int r; - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn!=NULL); - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->filenum, &tuple); - if (r==0) { - //Maybe do the deletion if we found the cachefile. - DBT keydbt; - toku_fill_dbt(&keydbt, l->key.data, l->key.len); - toku_ft_maybe_delete(tuple->ft_handle, &keydbt, txn, true, l->lsn, false); - } - return 0; -} - -static int toku_recover_backward_enq_delete_any (struct logtype_enq_delete_any *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_enq_insert_multiple (struct logtype_enq_insert_multiple *l, RECOVER_ENV renv) { - int r; - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn!=NULL); - DB *src_db = NULL; - bool do_inserts = true; - { - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->src_filenum, &tuple); - if (l->src_filenum.fileid == FILENUM_NONE.fileid) - assert(r==DB_NOTFOUND); - else { - if (r == 0) - src_db = &tuple->fake_db; - else - do_inserts = false; // src file was probably deleted, #3129 - } - } - - if (do_inserts) { - DBT src_key, src_val; - - toku_fill_dbt(&src_key, l->src_key.data, l->src_key.len); - toku_fill_dbt(&src_val, l->src_val.data, l->src_val.len); - - for (uint32_t file = 0; file < l->dest_filenums.num; file++) { - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->dest_filenums.filenums[file], &tuple); - if (r==0) { - // We found the cachefile. (maybe) Do the insert. - DB *db = &tuple->fake_db; - - DBT_ARRAY key_array; - DBT_ARRAY val_array; - if (db != src_db) { - r = renv->generate_row_for_put(db, src_db, &renv->dest_keys, &renv->dest_vals, &src_key, &src_val); - assert(r==0); - invariant(renv->dest_keys.size <= renv->dest_keys.capacity); - invariant(renv->dest_vals.size <= renv->dest_vals.capacity); - invariant(renv->dest_keys.size == renv->dest_vals.size); - key_array = renv->dest_keys; - val_array = renv->dest_vals; - } else { - key_array.size = key_array.capacity = 1; - key_array.dbts = &src_key; - - val_array.size = val_array.capacity = 1; - val_array.dbts = &src_val; - } - for (uint32_t i = 0; i < key_array.size; i++) { - toku_ft_maybe_insert(tuple->ft_handle, &key_array.dbts[i], &val_array.dbts[i], txn, true, l->lsn, false, FT_INSERT); - } - } - } - } - - return 0; -} - -static int toku_recover_backward_enq_insert_multiple (struct logtype_enq_insert_multiple *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_enq_delete_multiple (struct logtype_enq_delete_multiple *l, RECOVER_ENV renv) { - int r; - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn!=NULL); - DB *src_db = NULL; - bool do_deletes = true; - { - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->src_filenum, &tuple); - if (l->src_filenum.fileid == FILENUM_NONE.fileid) - assert(r==DB_NOTFOUND); - else { - if (r == 0) { - src_db = &tuple->fake_db; - } else { - do_deletes = false; // src file was probably deleted, #3129 - } - } - } - - if (do_deletes) { - DBT src_key, src_val; - toku_fill_dbt(&src_key, l->src_key.data, l->src_key.len); - toku_fill_dbt(&src_val, l->src_val.data, l->src_val.len); - - for (uint32_t file = 0; file < l->dest_filenums.num; file++) { - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->dest_filenums.filenums[file], &tuple); - if (r==0) { - // We found the cachefile. (maybe) Do the delete. - DB *db = &tuple->fake_db; - - DBT_ARRAY key_array; - if (db != src_db) { - r = renv->generate_row_for_del(db, src_db, &renv->dest_keys, &src_key, &src_val); - assert(r==0); - invariant(renv->dest_keys.size <= renv->dest_keys.capacity); - key_array = renv->dest_keys; - } else { - key_array.size = key_array.capacity = 1; - key_array.dbts = &src_key; - } - for (uint32_t i = 0; i < key_array.size; i++) { - toku_ft_maybe_delete(tuple->ft_handle, &key_array.dbts[i], txn, true, l->lsn, false); - } - } - } - } - - return 0; -} - -static int toku_recover_backward_enq_delete_multiple (struct logtype_enq_delete_multiple *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_enq_update(struct logtype_enq_update *l, RECOVER_ENV renv) { - int r; - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn != NULL); - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->filenum, &tuple); - if (r == 0) { - // Maybe do the update if we found the cachefile. - DBT key, extra; - toku_fill_dbt(&key, l->key.data, l->key.len); - toku_fill_dbt(&extra, l->extra.data, l->extra.len); - toku_ft_maybe_update(tuple->ft_handle, &key, &extra, txn, true, l->lsn, false); - } - return 0; -} - -static int toku_recover_enq_updatebroadcast(struct logtype_enq_updatebroadcast *l, RECOVER_ENV renv) { - int r; - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn != NULL); - struct file_map_tuple *tuple = NULL; - r = file_map_find(&renv->fmap, l->filenum, &tuple); - if (r == 0) { - // Maybe do the update broadcast if we found the cachefile. - DBT extra; - toku_fill_dbt(&extra, l->extra.data, l->extra.len); - toku_ft_maybe_update_broadcast(tuple->ft_handle, &extra, txn, true, - l->lsn, false, l->is_resetting_op); - } - return 0; -} - -static int toku_recover_backward_enq_update(struct logtype_enq_update *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_backward_enq_updatebroadcast(struct logtype_enq_updatebroadcast *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_comment (struct logtype_comment *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_backward_comment (struct logtype_comment *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_shutdown_up_to_19 (struct logtype_shutdown_up_to_19 *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_backward_shutdown_up_to_19 (struct logtype_shutdown_up_to_19 *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_shutdown (struct logtype_shutdown *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_backward_shutdown (struct logtype_shutdown *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -static int toku_recover_load(struct logtype_load *UU(l), RECOVER_ENV UU(renv)) { - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn!=NULL); - char *new_iname = fixup_fname(&l->new_iname); - - toku_ft_load_recovery(txn, l->old_filenum, new_iname, 0, 0, (LSN*)NULL); - - toku_free(new_iname); - return 0; -} - -static int toku_recover_backward_load(struct logtype_load *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -// #2954 -static int toku_recover_hot_index(struct logtype_hot_index *UU(l), RECOVER_ENV UU(renv)) { - TOKUTXN txn = NULL; - toku_txnid2txn(renv->logger, l->xid, &txn); - assert(txn!=NULL); - // just make an entry in the rollback log - // - set do_log = 0 -> don't write to recovery log - toku_ft_hot_index_recovery(txn, l->hot_index_filenums, 0, 0, (LSN*)NULL); - return 0; -} - -// #2954 -static int toku_recover_backward_hot_index(struct logtype_hot_index *UU(l), RECOVER_ENV UU(renv)) { - // nothing - return 0; -} - -// Effects: If there are no log files, or if there is a clean "shutdown" at -// the end of the log, then we don't need recovery to run. -// Returns: true if we need recovery, otherwise false. -int tokudb_needs_recovery(const char *log_dir, bool ignore_log_empty) { - int needs_recovery; - int r; - TOKULOGCURSOR logcursor = NULL; - - r = toku_logcursor_create(&logcursor, log_dir); - if (r != 0) { - needs_recovery = true; goto exit; - } - - struct log_entry *le; - le = NULL; - r = toku_logcursor_last(logcursor, &le); - if (r == 0) { - needs_recovery = le->cmd != LT_shutdown; - } - else { - needs_recovery = !(r == DB_NOTFOUND && ignore_log_empty); - } - exit: - if (logcursor) { - r = toku_logcursor_destroy(&logcursor); - assert(r == 0); - } - return needs_recovery; -} - -static uint32_t recover_get_num_live_txns(RECOVER_ENV renv) { - return toku_txn_manager_num_live_root_txns(renv->logger->txn_manager); -} - -static int is_txn_unprepared(TOKUTXN txn, void* extra) { - TOKUTXN* ptxn = (TOKUTXN *)extra; - if (txn->state != TOKUTXN_PREPARING) { - *ptxn = txn; - return -1; // return -1 to get iterator to return - } - return 0; -} - - -static int find_an_unprepared_txn (RECOVER_ENV renv, TOKUTXN *txnp) { - TOKUTXN txn = nullptr; - int r = toku_txn_manager_iter_over_live_root_txns( - renv->logger->txn_manager, - is_txn_unprepared, - &txn - ); - assert(r == 0 || r == -1); - if (txn != nullptr) { - *txnp = txn; - return 0; - } - return DB_NOTFOUND; -} - -static int call_prepare_txn_callback_iter(TOKUTXN txn, void* extra) { - RECOVER_ENV* renv = (RECOVER_ENV *)extra; - invariant(txn->state == TOKUTXN_PREPARING); - invariant(txn->child == NULL); - (*renv)->prepared_txn_callback((*renv)->env, txn); - return 0; -} - -static void recover_abort_live_txn(TOKUTXN txn) { - // recursively abort all children first - if (txn->child != NULL) { - recover_abort_live_txn(txn->child); - } - // sanity check that the recursive call successfully NULLs out txn->child - invariant(txn->child == NULL); - // abort the transaction - int r = toku_txn_abort_txn(txn, NULL, NULL); - assert(r == 0); - - // close the transaction - toku_txn_close_txn(txn); -} - -// abort all of the remaining live transactions in descending transaction id order -static void recover_abort_all_live_txns(RECOVER_ENV renv) { - while (1) { - TOKUTXN txn; - int r = find_an_unprepared_txn(renv, &txn); - if (r==0) { - recover_abort_live_txn(txn); - } else if (r==DB_NOTFOUND) { - break; - } else { - abort(); - } - } - - // Now we have only prepared txns. These prepared txns don't have full DB_TXNs in them, so we need to make some. - int r = toku_txn_manager_iter_over_live_root_txns( - renv->logger->txn_manager, - call_prepare_txn_callback_iter, - &renv - ); - assert_zero(r); -} - -static void recover_trace_le(const char *f, int l, int r, struct log_entry *le) { - if (le) { - LSN thislsn = toku_log_entry_get_lsn(le); - fprintf(stderr, "%s:%d r=%d cmd=%c lsn=%" PRIu64 "\n", f, l, r, le->cmd, thislsn.lsn); - } else - fprintf(stderr, "%s:%d r=%d cmd=?\n", f, l, r); -} - -// For test purposes only. -static void (*recover_callback_fx)(void*) = NULL; -static void * recover_callback_args = NULL; -static void (*recover_callback2_fx)(void*) = NULL; -static void * recover_callback2_args = NULL; - - -static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_dir) { - int r; - int rr = 0; - TOKULOGCURSOR logcursor = NULL; - struct log_entry *le = NULL; - - time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery starting in env %s\n", ctime(&tnow), env_dir); - - char org_wd[1000]; - { - char *wd=getcwd(org_wd, sizeof(org_wd)); - assert(wd!=0); - } - - r = toku_logger_open(log_dir, renv->logger); - assert(r == 0); - - // grab the last LSN so that it can be restored when the log is restarted - LSN lastlsn = toku_logger_last_lsn(renv->logger); - LSN thislsn; - - // there must be at least one log entry - r = toku_logcursor_create(&logcursor, log_dir); - assert(r == 0); - - r = toku_logcursor_last(logcursor, &le); - if (r != 0) { - if (tokudb_recovery_trace) - fprintf(stderr, "RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r); - rr = DB_RUNRECOVERY; goto errorexit; - } - - r = toku_logcursor_destroy(&logcursor); - assert(r == 0); - - r = toku_logcursor_create(&logcursor, log_dir); - assert(r == 0); - - { - toku_struct_stat buf; - if (toku_stat(env_dir, &buf)!=0) { - rr = get_error_errno(); - fprintf(stderr, "%.24s Tokudb recovery error: directory does not exist: %s\n", ctime(&tnow), env_dir); - goto errorexit; - } else if (!S_ISDIR(buf.st_mode)) { - fprintf(stderr, "%.24s Tokudb recovery error: this file is supposed to be a directory, but is not: %s\n", ctime(&tnow), env_dir); - rr = ENOTDIR; goto errorexit; - } - } - // scan backwards - scan_state_init(&renv->ss); - tnow = time(NULL); - time_t tlast; - tlast = tnow; - fprintf(stderr, "%.24s Tokudb recovery scanning backward from %" PRIu64 "\n", ctime(&tnow), lastlsn.lsn); - for (unsigned i=0; 1; i++) { - - // get the previous log entry (first time gets the last one) - le = NULL; - r = toku_logcursor_prev(logcursor, &le); - if (tokudb_recovery_trace) - recover_trace_le(__FUNCTION__, __LINE__, r, le); - if (r != 0) { - if (r == DB_NOTFOUND) - break; - rr = DB_RUNRECOVERY; - goto errorexit; - } - - // trace progress - if ((i % 1000) == 0) { - tnow = time(NULL); - if (tnow - tlast >= TOKUDB_RECOVERY_PROGRESS_TIME) { - thislsn = toku_log_entry_get_lsn(le); - fprintf(stderr, "%.24s Tokudb recovery scanning backward from %" PRIu64 " at %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, recover_state(renv)); - tlast = tnow; - } - } - - // dispatch the log entry handler - assert(renv->ss.ss == BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END || - renv->ss.ss == BACKWARD_NEWER_CHECKPOINT_END); - logtype_dispatch_assign(le, toku_recover_backward_, r, renv); - if (tokudb_recovery_trace) - recover_trace_le(__FUNCTION__, __LINE__, r, le); - if (r != 0) { - if (tokudb_recovery_trace) - fprintf(stderr, "DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r); - rr = DB_RUNRECOVERY; - goto errorexit; - } - if (renv->goforward) - break; - } - - // run first callback - if (recover_callback_fx) - recover_callback_fx(recover_callback_args); - - // scan forwards - assert(le); - thislsn = toku_log_entry_get_lsn(le); - tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery starts scanning forward to %" PRIu64 " from %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv)); - - for (unsigned i=0; 1; i++) { - - // trace progress - if ((i % 1000) == 0) { - tnow = time(NULL); - if (tnow - tlast >= TOKUDB_RECOVERY_PROGRESS_TIME) { - thislsn = toku_log_entry_get_lsn(le); - fprintf(stderr, "%.24s Tokudb recovery scanning forward to %" PRIu64 " at %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv)); - tlast = tnow; - } - } - - // dispatch the log entry handler (first time calls the forward handler for the log entry at the turnaround - assert(renv->ss.ss == FORWARD_BETWEEN_CHECKPOINT_BEGIN_END || - renv->ss.ss == FORWARD_NEWER_CHECKPOINT_END); - logtype_dispatch_assign(le, toku_recover_, r, renv); - if (tokudb_recovery_trace) - recover_trace_le(__FUNCTION__, __LINE__, r, le); - if (r != 0) { - if (tokudb_recovery_trace) - fprintf(stderr, "DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r); - rr = DB_RUNRECOVERY; - goto errorexit; - } - - // get the next log entry - le = NULL; - r = toku_logcursor_next(logcursor, &le); - if (tokudb_recovery_trace) - recover_trace_le(__FUNCTION__, __LINE__, r, le); - if (r != 0) { - if (r == DB_NOTFOUND) - break; - rr = DB_RUNRECOVERY; - goto errorexit; - } - } - - // verify the final recovery state - assert(renv->ss.ss == FORWARD_NEWER_CHECKPOINT_END); - - r = toku_logcursor_destroy(&logcursor); - assert(r == 0); - - // run second callback - if (recover_callback2_fx) - recover_callback2_fx(recover_callback2_args); - - // restart logging - toku_logger_restart(renv->logger, lastlsn); - - // abort the live transactions - { - uint32_t n = recover_get_num_live_txns(renv); - if (n > 0) { - tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery has %" PRIu32 " live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : ""); - } - } - recover_abort_all_live_txns(renv); - { - uint32_t n = recover_get_num_live_txns(renv); - if (n > 0) { - tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery has %" PRIu32 " prepared transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : ""); - } - } - - // close the open dictionaries - uint32_t n; - n = file_map_get_num_dictionaries(&renv->fmap); - if (n > 0) { - tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery closing %" PRIu32 " dictionar%s\n", ctime(&tnow), n, n > 1 ? "ies" : "y"); - } - file_map_close_dictionaries(&renv->fmap, lastlsn); - - { - // write a recovery log entry - BYTESTRING recover_comment = { static_cast(strlen("recover")), (char *) "recover" }; - toku_log_comment(renv->logger, NULL, true, 0, recover_comment); - } - - // checkpoint - tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery making a checkpoint\n", ctime(&tnow)); - r = toku_checkpoint(renv->cp, renv->logger, NULL, NULL, NULL, NULL, RECOVERY_CHECKPOINT); - assert(r == 0); - tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery done\n", ctime(&tnow)); - - return 0; - - errorexit: - tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery failed %d\n", ctime(&tnow), rr); - - if (logcursor) { - r = toku_logcursor_destroy(&logcursor); - assert(r == 0); - } - - return rr; -} - -int -toku_recover_lock(const char *lock_dir, int *lockfd) { - int e = toku_single_process_lock(lock_dir, "recovery", lockfd); - if (e != 0 && e != ENOENT) { - fprintf(stderr, "Couldn't run recovery because some other process holds the recovery lock\n"); - } - return e; -} - -int -toku_recover_unlock(int lockfd) { - int lockfd_copy = lockfd; - return toku_single_process_unlock(&lockfd_copy); -} - -int tokudb_recover(DB_ENV *env, - prepared_txn_callback_t prepared_txn_callback, - keep_cachetable_callback_t keep_cachetable_callback, - TOKULOGGER logger, - const char *env_dir, const char *log_dir, - ft_compare_func bt_compare, - ft_update_func update_function, - generate_row_for_put_func generate_row_for_put, - generate_row_for_del_func generate_row_for_del, - size_t cachetable_size) { - int r; - int lockfd = -1; - - r = toku_recover_lock(log_dir, &lockfd); - if (r != 0) - return r; - - int rr = 0; - if (tokudb_needs_recovery(log_dir, false)) { - struct recover_env renv; - r = recover_env_init(&renv, - env_dir, - env, - prepared_txn_callback, - keep_cachetable_callback, - logger, - bt_compare, - update_function, - generate_row_for_put, - generate_row_for_del, - cachetable_size); - assert(r == 0); - - rr = do_recovery(&renv, env_dir, log_dir); - - recover_env_cleanup(&renv); - } - - r = toku_recover_unlock(lockfd); - if (r != 0) - return r; - - return rr; -} - -// Return 0 if recovery log exists, ENOENT if log is missing -int -tokudb_recover_log_exists(const char * log_dir) { - int r; - TOKULOGCURSOR logcursor; - - r = toku_logcursor_create(&logcursor, log_dir); - if (r == 0) { - int rclose; - r = toku_logcursor_log_exists(logcursor); // return ENOENT if no log - rclose = toku_logcursor_destroy(&logcursor); - assert(rclose == 0); - } - else - r = ENOENT; - - return r; -} - -void toku_recover_set_callback (void (*callback_fx)(void*), void* callback_args) { - recover_callback_fx = callback_fx; - recover_callback_args = callback_args; -} - -void toku_recover_set_callback2 (void (*callback_fx)(void*), void* callback_args) { - recover_callback2_fx = callback_fx; - recover_callback2_args = callback_args; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/recover.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/recover.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/recover.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/recover.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,143 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKURECOVER_H -#define TOKURECOVER_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include - -#include -#include - -#include "fttypes.h" -#include "memory.h" - -typedef void (*prepared_txn_callback_t)(DB_ENV*, TOKUTXN); -typedef void (*keep_cachetable_callback_t)(DB_ENV*, CACHETABLE); - -// Run tokudb recovery from the log -// Returns 0 if success -int tokudb_recover (DB_ENV *env, - prepared_txn_callback_t prepared_txn_callback, - keep_cachetable_callback_t keep_cachetable_callback, - TOKULOGGER logger, - const char *env_dir, const char *log_dir, - ft_compare_func bt_compare, - ft_update_func update_function, - generate_row_for_put_func generate_row_for_put, - generate_row_for_del_func generate_row_for_del, - size_t cachetable_size); - -// Effect: Check the tokudb logs to determine whether or not we need to run recovery. -// If the log is empty or if there is a clean shutdown at the end of the log, then we -// dont need to run recovery. -// Returns: true if we need recovery, otherwise false. -int tokudb_needs_recovery(const char *logdir, bool ignore_empty_log); - -// Return 0 if recovery log exists, ENOENT if log is missing -int tokudb_recover_log_exists(const char * log_dir); - -// For test only - set callbacks for recovery testing -void toku_recover_set_callback (void (*)(void*), void*); -void toku_recover_set_callback2 (void (*)(void*), void*); - -extern int tokudb_recovery_trace; - -int toku_recover_lock (const char *lock_dir, int *lockfd); - -int toku_recover_unlock(int lockfd); - -static const prepared_txn_callback_t NULL_prepared_txn_callback __attribute__((__unused__)) = NULL; -static const keep_cachetable_callback_t NULL_keep_cachetable_callback __attribute__((__unused__)) = NULL; - - -#endif // TOKURECOVER_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback-apply.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback-apply.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback-apply.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback-apply.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,305 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fttypes.h" -#include "log-internal.h" -#include "rollback-apply.h" - -static void -poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint) { - if (txn->progress_poll_fun) { - TOKU_TXN_PROGRESS_S progress = { - .entries_total = txn->roll_info.num_rollentries, - .entries_processed = txn->roll_info.num_rollentries_processed, - .is_commit = is_commit, - .stalled_on_checkpoint = stall_for_checkpoint}; - txn->progress_poll_fun(&progress, txn->progress_poll_fun_extra); - } -} - -int toku_commit_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn) { - int r=0; - rolltype_dispatch_assign(item, toku_commit_, r, txn, lsn); - txn->roll_info.num_rollentries_processed++; - if (txn->roll_info.num_rollentries_processed % 1024 == 0) { - poll_txn_progress_function(txn, true, false); - } - return r; -} - -int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn) { - int r=0; - rolltype_dispatch_assign(item, toku_rollback_, r, txn, lsn); - txn->roll_info.num_rollentries_processed++; - if (txn->roll_info.num_rollentries_processed % 1024 == 0) { - poll_txn_progress_function(txn, false, false); - } - return r; -} - -int -note_ft_used_in_txns_parent(const FT &ft, uint32_t UU(index), TOKUTXN const child); -int -note_ft_used_in_txns_parent(const FT &ft, uint32_t UU(index), TOKUTXN const child) { - TOKUTXN parent = child->parent; - toku_txn_maybe_note_ft(parent, ft); - return 0; -} - -static int -apply_txn(TOKUTXN txn, LSN lsn, apply_rollback_item func) { - int r = 0; - // do the commit/abort calls and free everything - // we do the commit/abort calls in reverse order too. - struct roll_entry *item; - //printf("%s:%d abort\n", __FILE__, __LINE__); - - BLOCKNUM next_log = ROLLBACK_NONE; - - bool is_current = false; - if (txn_has_current_rollback_log(txn)) { - next_log = txn->roll_info.current_rollback; - is_current = true; - } - else if (txn_has_spilled_rollback_logs(txn)) { - next_log = txn->roll_info.spilled_rollback_tail; - } - - uint64_t last_sequence = txn->roll_info.num_rollback_nodes; - bool found_head = false; - while (next_log.b != ROLLBACK_NONE.b) { - ROLLBACK_LOG_NODE log; - //pin log - toku_get_and_pin_rollback_log(txn, next_log, &log); - toku_rollback_verify_contents(log, txn->txnid, last_sequence - 1); - - toku_maybe_prefetch_previous_rollback_log(txn, log); - - last_sequence = log->sequence; - if (func) { - while ((item=log->newest_logentry)) { - log->newest_logentry = item->prev; - r = func(txn, item, lsn); - if (r!=0) return r; - } - } - if (next_log.b == txn->roll_info.spilled_rollback_head.b) { - assert(!found_head); - found_head = true; - assert(log->sequence == 0); - } - next_log = log->previous; - { - //Clean up transaction structure to prevent - //toku_txn_close from double-freeing - if (is_current) { - txn->roll_info.current_rollback = ROLLBACK_NONE; - is_current = false; - } - else { - txn->roll_info.spilled_rollback_tail = next_log; - } - if (found_head) { - assert(next_log.b == ROLLBACK_NONE.b); - txn->roll_info.spilled_rollback_head = next_log; - } - } - bool give_back = false; - // each txn tries to give back at most one rollback log node - // to the cache. - if (next_log.b == ROLLBACK_NONE.b) { - give_back = txn->logger->rollback_cache.give_rollback_log_node( - txn, - log - ); - } - if (!give_back) { - toku_rollback_log_unpin_and_remove(txn, log); - } - } - return r; -} - -//Commit each entry in the rollback log. -//If the transaction has a parent, it just promotes its information to its parent. -int toku_rollback_commit(TOKUTXN txn, LSN lsn) { - int r=0; - if (txn->parent!=0) { - // First we must put a rollinclude entry into the parent if we spilled - - if (txn_has_spilled_rollback_logs(txn)) { - uint64_t num_nodes = txn->roll_info.num_rollback_nodes; - if (txn_has_current_rollback_log(txn)) { - num_nodes--; //Don't count the in-progress rollback log. - } - toku_logger_save_rollback_rollinclude(txn->parent, txn->txnid, num_nodes, - txn->roll_info.spilled_rollback_head, - txn->roll_info.spilled_rollback_tail); - //Remove ownership from child. - txn->roll_info.spilled_rollback_head = ROLLBACK_NONE; - txn->roll_info.spilled_rollback_tail = ROLLBACK_NONE; - } - // if we're commiting a child rollback, put its entries into the parent - // by pinning both child and parent and then linking the child log entry - // list to the end of the parent log entry list. - if (txn_has_current_rollback_log(txn)) { - //Pin parent log - toku_txn_lock(txn->parent); - ROLLBACK_LOG_NODE parent_log; - toku_get_and_pin_rollback_log_for_new_entry(txn->parent, &parent_log); - - //Pin child log - ROLLBACK_LOG_NODE child_log; - toku_get_and_pin_rollback_log(txn, txn->roll_info.current_rollback, &child_log); - toku_rollback_verify_contents(child_log, txn->txnid, txn->roll_info.num_rollback_nodes - 1); - - // Append the list to the front of the parent. - if (child_log->oldest_logentry) { - // There are some entries, so link them in. - child_log->oldest_logentry->prev = parent_log->newest_logentry; - if (!parent_log->oldest_logentry) { - parent_log->oldest_logentry = child_log->oldest_logentry; - } - parent_log->newest_logentry = child_log->newest_logentry; - parent_log->rollentry_resident_bytecount += child_log->rollentry_resident_bytecount; - txn->parent->roll_info.rollentry_raw_count += txn->roll_info.rollentry_raw_count; - child_log->rollentry_resident_bytecount = 0; - } - if (parent_log->oldest_logentry==NULL) { - parent_log->oldest_logentry = child_log->oldest_logentry; - } - child_log->newest_logentry = child_log->oldest_logentry = 0; - // Put all the memarena data into the parent. - if (toku_memarena_total_size_in_use(child_log->rollentry_arena) > 0) { - // If there are no bytes to move, then just leave things alone, and let the memory be reclaimed on txn is closed. - toku_memarena_move_buffers(parent_log->rollentry_arena, child_log->rollentry_arena); - } - // each txn tries to give back at most one rollback log node - // to the cache. All other rollback log nodes for this child - // transaction are included in the parent's rollback log, - // so this is the only node we can give back to the cache - bool give_back = txn->logger->rollback_cache.give_rollback_log_node( - txn, - child_log - ); - if (!give_back) { - toku_rollback_log_unpin_and_remove(txn, child_log); - } - txn->roll_info.current_rollback = ROLLBACK_NONE; - - toku_maybe_spill_rollbacks(txn->parent, parent_log); - toku_rollback_log_unpin(txn->parent, parent_log); - assert(r == 0); - toku_txn_unlock(txn->parent); - } - - // Note the open FTs, the omts must be merged - r = txn->open_fts.iterate(txn); - assert(r==0); - - //If this transaction needs an fsync (if it commits) - //save that in the parent. Since the commit really happens in the root txn. - txn->parent->force_fsync_on_commit |= txn->force_fsync_on_commit; - txn->parent->roll_info.num_rollentries += txn->roll_info.num_rollentries; - } else { - r = apply_txn(txn, lsn, toku_commit_rollback_item); - assert(r==0); - } - - return r; -} - -int toku_rollback_abort(TOKUTXN txn, LSN lsn) { - int r; - r = apply_txn(txn, lsn, toku_abort_rollback_item); - assert(r==0); - return r; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback-apply.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback-apply.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback-apply.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback-apply.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,104 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef ROLLBACK_APPLY_H -#define ROLLBACK_APPLY_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - - -typedef int(*apply_rollback_item)(TOKUTXN txn, struct roll_entry *item, LSN lsn); -int toku_commit_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn); -int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn); - -int toku_rollback_commit(TOKUTXN txn, LSN lsn); -int toku_rollback_abort(TOKUTXN txn, LSN lsn); - - -#endif // ROLLBACK_APPLY_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,386 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include - -#include "ft.h" -#include "log-internal.h" -#include "rollback-ct-callbacks.h" - -static void rollback_unpin_remove_callback(CACHEKEY* cachekey, bool for_checkpoint, void* extra) { - FT CAST_FROM_VOIDP(h, extra); - toku_free_blocknum( - h->blocktable, - cachekey, - h, - for_checkpoint - ); -} - -void toku_rollback_log_unpin_and_remove(TOKUTXN txn, ROLLBACK_LOG_NODE log) { - int r; - CACHEFILE cf = txn->logger->rollback_cachefile; - FT CAST_FROM_VOIDP(h, toku_cachefile_get_userdata(cf)); - r = toku_cachetable_unpin_and_remove (cf, log->ct_pair, rollback_unpin_remove_callback, h); - assert(r == 0); -} - -int -toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind) { - if (xidxidfind) return +1; - return 0; -} - -void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size) { - return toku_memarena_malloc(log->rollentry_arena, size); -} - -void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len) { - void *r=toku_malloc_in_rollback(log, len); - memcpy(r,v,len); - return r; -} - -static inline PAIR_ATTR make_rollback_pair_attr(long size) { - PAIR_ATTR result={ - .size = size, - .nonleaf_size = 0, - .leaf_size = 0, - .rollback_size = size, - .cache_pressure_size = 0, - .is_valid = true - }; - return result; -} - -PAIR_ATTR -rollback_memory_size(ROLLBACK_LOG_NODE log) { - size_t size = sizeof(*log); - if (log->rollentry_arena) { - size += toku_memarena_total_footprint(log->rollentry_arena); - } - return make_rollback_pair_attr(size); -} - -static void toku_rollback_node_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p) { - ROLLBACK_LOG_NODE CAST_FROM_VOIDP(log, value_data); - log->ct_pair = p; -} - -// -// initializes an empty rollback log node -// Does not touch the blocknum, that is the -// responsibility of the caller -// -void rollback_empty_log_init(ROLLBACK_LOG_NODE log) { - // Having a txnid set to TXNID_NONE is how we determine if the - // rollback log node is empty or in use. - log->txnid.parent_id64 = TXNID_NONE; - log->txnid.child_id64 = TXNID_NONE; - - log->layout_version = FT_LAYOUT_VERSION; - log->layout_version_original = FT_LAYOUT_VERSION; - log->layout_version_read_from_disk = FT_LAYOUT_VERSION; - log->dirty = true; - log->sequence = 0; - log->previous = make_blocknum(0); - log->oldest_logentry = NULL; - log->newest_logentry = NULL; - log->rollentry_arena = NULL; - log->rollentry_resident_bytecount = 0; -} - - - -static void rollback_initialize_for_txn( - ROLLBACK_LOG_NODE log, - TOKUTXN txn, - BLOCKNUM previous - ) -{ - log->txnid = txn->txnid; - log->sequence = txn->roll_info.num_rollback_nodes++; - log->previous = previous; - log->oldest_logentry = NULL; - log->newest_logentry = NULL; - log->rollentry_arena = toku_memarena_create(); - log->rollentry_resident_bytecount = 0; - log->dirty = true; -} - -void make_rollback_log_empty(ROLLBACK_LOG_NODE log) { - toku_memarena_destroy(&log->rollentry_arena); - rollback_empty_log_init(log); -} - -// create and pin a new rollback log node. chain it to the other rollback nodes -// by providing a previous blocknum and assigning the new rollback log -// node the next sequence number -static void rollback_log_create ( - TOKUTXN txn, - BLOCKNUM previous, - ROLLBACK_LOG_NODE *result - ) -{ - ROLLBACK_LOG_NODE XMALLOC(log); - rollback_empty_log_init(log); - - CACHEFILE cf = txn->logger->rollback_cachefile; - FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf)); - rollback_initialize_for_txn(log, txn, previous); - toku_allocate_blocknum(ft->blocktable, &log->blocknum, ft); - const uint32_t hash = toku_cachetable_hash(ft->cf, log->blocknum); - *result = log; - toku_cachetable_put(cf, log->blocknum, hash, - log, rollback_memory_size(log), - get_write_callbacks_for_rollback_log(ft), - toku_rollback_node_save_ct_pair); - txn->roll_info.current_rollback = log->blocknum; -} - -void toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log) { - int r; - CACHEFILE cf = txn->logger->rollback_cachefile; - r = toku_cachetable_unpin( - cf, - log->ct_pair, - (enum cachetable_dirty)log->dirty, - rollback_memory_size(log) - ); - assert(r == 0); -} - -//Requires: log is pinned -// log is current -//After: -// Maybe there is no current after (if it spilled) -void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log) { - if (log->rollentry_resident_bytecount > txn->logger->write_block_size) { - assert(log->blocknum.b == txn->roll_info.current_rollback.b); - //spill - if (!txn_has_spilled_rollback_logs(txn)) { - //First spilled. Copy to head. - txn->roll_info.spilled_rollback_head = txn->roll_info.current_rollback; - } - //Unconditionally copy to tail. Old tail does not need to be cached anymore. - txn->roll_info.spilled_rollback_tail = txn->roll_info.current_rollback; - - txn->roll_info.current_rollback = ROLLBACK_NONE; - } -} - -int find_filenum (const FT &h, const FT &hfind); -int find_filenum (const FT &h, const FT &hfind) { - FILENUM fnum = toku_cachefile_filenum(h->cf); - FILENUM fnumfind = toku_cachefile_filenum(hfind->cf); - if (fnum.fileidfnumfind.fileid) return +1; - return 0; -} - -//Notify a transaction that it has touched an ft. -void toku_txn_maybe_note_ft (TOKUTXN txn, FT ft) { - toku_txn_lock(txn); - FT ftv; - uint32_t idx; - int r = txn->open_fts.find_zero(ft, &ftv, &idx); - if (r == 0) { - // already there - assert(ftv == ft); - goto exit; - } - r = txn->open_fts.insert_at(ft, idx); - assert_zero(r); - // TODO(leif): if there's anything that locks the reflock and then - // the txn lock, this may deadlock, because it grabs the reflock. - toku_ft_add_txn_ref(ft); -exit: - toku_txn_unlock(txn); -} - -// Return the number of bytes that went into the rollback data structure (the uncompressed count if there is compression) -int toku_logger_txn_rollback_stats(TOKUTXN txn, struct txn_stat *txn_stat) -{ - toku_txn_lock(txn); - txn_stat->rollback_raw_count = txn->roll_info.rollentry_raw_count; - txn_stat->rollback_num_entries = txn->roll_info.num_rollentries; - toku_txn_unlock(txn); - return 0; -} - -void toku_maybe_prefetch_previous_rollback_log(TOKUTXN txn, ROLLBACK_LOG_NODE log) { - //Currently processing 'log'. Prefetch the next (previous) log node. - - BLOCKNUM name = log->previous; - int r = 0; - if (name.b != ROLLBACK_NONE.b) { - CACHEFILE cf = txn->logger->rollback_cachefile; - uint32_t hash = toku_cachetable_hash(cf, name); - FT CAST_FROM_VOIDP(h, toku_cachefile_get_userdata(cf)); - bool doing_prefetch = false; - r = toku_cachefile_prefetch(cf, name, hash, - get_write_callbacks_for_rollback_log(h), - toku_rollback_fetch_callback, - toku_rollback_pf_req_callback, - toku_rollback_pf_callback, - h, - &doing_prefetch); - assert(r == 0); - } -} - -void toku_rollback_verify_contents(ROLLBACK_LOG_NODE log, - TXNID_PAIR txnid, uint64_t sequence) -{ - assert(log->txnid.parent_id64 == txnid.parent_id64); - assert(log->txnid.child_id64 == txnid.child_id64); - assert(log->sequence == sequence); -} - -void toku_get_and_pin_rollback_log(TOKUTXN txn, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log) { - void * value; - CACHEFILE cf = txn->logger->rollback_cachefile; - FT CAST_FROM_VOIDP(h, toku_cachefile_get_userdata(cf)); - uint32_t hash = toku_cachetable_hash(cf, blocknum); - int r = toku_cachetable_get_and_pin_with_dep_pairs(cf, blocknum, hash, - &value, NULL, - get_write_callbacks_for_rollback_log(h), - toku_rollback_fetch_callback, - toku_rollback_pf_req_callback, - toku_rollback_pf_callback, - PL_WRITE_CHEAP, // lock_type - h, - 0, NULL, NULL - ); - assert(r == 0); - ROLLBACK_LOG_NODE CAST_FROM_VOIDP(pinned_log, value); - assert(pinned_log->blocknum.b == blocknum.b); - *log = pinned_log; -} - -void toku_get_and_pin_rollback_log_for_new_entry (TOKUTXN txn, ROLLBACK_LOG_NODE *log) { - ROLLBACK_LOG_NODE pinned_log = NULL; - invariant(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING); // hot indexing may call this function for prepared transactions - if (txn_has_current_rollback_log(txn)) { - toku_get_and_pin_rollback_log(txn, txn->roll_info.current_rollback, &pinned_log); - toku_rollback_verify_contents(pinned_log, txn->txnid, txn->roll_info.num_rollback_nodes - 1); - } else { - // For each transaction, we try to acquire the first rollback log - // from the rollback log node cache, so that we avoid - // putting something new into the cachetable. However, - // if transaction has spilled rollbacks, that means we - // have already done a lot of work for this transaction, - // and subsequent rollback log nodes are created - // and put into the cachetable. The idea is for - // transactions that don't do a lot of work to (hopefully) - // get a rollback log node from a cache, as opposed to - // taking the more expensive route of creating a new one. - if (!txn_has_spilled_rollback_logs(txn)) { - txn->logger->rollback_cache.get_rollback_log_node(txn, &pinned_log); - if (pinned_log != NULL) { - rollback_initialize_for_txn( - pinned_log, - txn, - txn->roll_info.spilled_rollback_tail - ); - txn->roll_info.current_rollback = pinned_log->blocknum; - } - } - if (pinned_log == NULL) { - rollback_log_create(txn, txn->roll_info.spilled_rollback_tail, &pinned_log); - } - } - assert(pinned_log->txnid.parent_id64 == txn->txnid.parent_id64); - assert(pinned_log->txnid.child_id64 == txn->txnid.child_id64); - assert(pinned_log->blocknum.b != ROLLBACK_NONE.b); - *log = pinned_log; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback-ct-callbacks.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback-ct-callbacks.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback-ct-callbacks.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback-ct-callbacks.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,313 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include - -#include "ft-internal.h" -#include "fttypes.h" -#include "rollback.h" -#include "rollback-ct-callbacks.h" - -#include - -// Address used as a sentinel. Otherwise unused. -static struct serialized_rollback_log_node cloned_rollback; - -// Cleanup the rollback memory -static void -rollback_log_destroy(ROLLBACK_LOG_NODE log) { - make_rollback_log_empty(log); - toku_free(log); -} - -// flush an ununused log to disk, by allocating a size 0 blocknum in -// the blocktable -static void -toku_rollback_flush_unused_log( - ROLLBACK_LOG_NODE log, - BLOCKNUM logname, - int fd, - FT ft, - bool write_me, - bool keep_me, - bool for_checkpoint, - bool is_clone - ) -{ - if (write_me) { - DISKOFF offset; - toku_blocknum_realloc_on_disk(ft->blocktable, logname, 0, &offset, - ft, fd, for_checkpoint); - } - if (!keep_me && !is_clone) { - toku_free(log); - } -} - -// flush a used log to disk by serializing and writing the node out -static void -toku_rollback_flush_used_log ( - ROLLBACK_LOG_NODE log, - SERIALIZED_ROLLBACK_LOG_NODE serialized, - int fd, - FT ft, - bool write_me, - bool keep_me, - bool for_checkpoint, - bool is_clone - ) -{ - - if (write_me) { - int r = toku_serialize_rollback_log_to(fd, log, serialized, is_clone, ft, for_checkpoint); - assert(r == 0); - } - if (!keep_me) { - if (is_clone) { - toku_serialized_rollback_log_destroy(serialized); - } - else { - rollback_log_destroy(log); - } - } -} - -// Write something out. Keep trying even if partial writes occur. -// On error: Return negative with errno set. -// On success return nbytes. -void toku_rollback_flush_callback ( - CACHEFILE UU(cachefile), - int fd, - BLOCKNUM logname, - void *rollback_v, - void** UU(disk_data), - void *extraargs, - PAIR_ATTR size, - PAIR_ATTR* new_size, - bool write_me, - bool keep_me, - bool for_checkpoint, - bool is_clone - ) -{ - ROLLBACK_LOG_NODE log = nullptr; - SERIALIZED_ROLLBACK_LOG_NODE serialized = nullptr; - bool is_unused = false; - if (is_clone) { - is_unused = (rollback_v == &cloned_rollback); - CAST_FROM_VOIDP(serialized, rollback_v); - } - else { - CAST_FROM_VOIDP(log, rollback_v); - is_unused = rollback_log_is_unused(log); - } - *new_size = size; - FT ft; - CAST_FROM_VOIDP(ft, extraargs); - if (is_unused) { - toku_rollback_flush_unused_log( - log, - logname, - fd, - ft, - write_me, - keep_me, - for_checkpoint, - is_clone - ); - } - else { - toku_rollback_flush_used_log( - log, - serialized, - fd, - ft, - write_me, - keep_me, - for_checkpoint, - is_clone - ); - } -} - -int toku_rollback_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM logname, uint32_t fullhash UU(), - void **rollback_pv, void** UU(disk_data), PAIR_ATTR *sizep, int * UU(dirtyp), void *extraargs) { - int r; - FT CAST_FROM_VOIDP(h, extraargs); - assert(h->cf == cachefile); - ROLLBACK_LOG_NODE *result = (ROLLBACK_LOG_NODE*)rollback_pv; - r = toku_deserialize_rollback_log_from(fd, logname, result, h); - if (r==0) { - (*result)->ct_pair = p; - *sizep = rollback_memory_size(*result); - } - return r; -} - -void toku_rollback_pe_est_callback( - void* rollback_v, - void* UU(disk_data), - long* bytes_freed_estimate, - enum partial_eviction_cost *cost, - void* UU(write_extraargs) - ) -{ - assert(rollback_v != NULL); - *bytes_freed_estimate = 0; - *cost = PE_CHEAP; -} - -// callback for partially evicting a cachetable entry -int toku_rollback_pe_callback ( - void *rollback_v, - PAIR_ATTR old_attr, - void* UU(extraargs), - void (*finalize)(PAIR_ATTR new_attr, void * extra), - void *finalize_extra - ) -{ - assert(rollback_v != NULL); - finalize(old_attr, finalize_extra); - return 0; -} - -// partial fetch is never required for a rollback log node -bool toku_rollback_pf_req_callback(void* UU(ftnode_pv), void* UU(read_extraargs)) { - return false; -} - -// a rollback node should never be partial fetched, -// because we always say it is not required. -// (pf req callback always returns false) -int toku_rollback_pf_callback(void* UU(ftnode_pv), void* UU(disk_data), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { - assert(false); - return 0; -} - -// the cleaner thread should never choose a rollback node for cleaning -int toku_rollback_cleaner_callback ( - void* UU(ftnode_pv), - BLOCKNUM UU(blocknum), - uint32_t UU(fullhash), - void* UU(extraargs) - ) -{ - assert(false); - return 0; -} - -void toku_rollback_clone_callback( - void* value_data, - void** cloned_value_data, - long* clone_size, - PAIR_ATTR* new_attr, - bool UU(for_checkpoint), - void* UU(write_extraargs) - ) -{ - ROLLBACK_LOG_NODE CAST_FROM_VOIDP(log, value_data); - SERIALIZED_ROLLBACK_LOG_NODE serialized = nullptr; - if (!rollback_log_is_unused(log)) { - XMALLOC(serialized); - toku_serialize_rollback_log_to_memory_uncompressed(log, serialized); - *cloned_value_data = serialized; - *clone_size = sizeof(struct serialized_rollback_log_node) + serialized->len; - } - else { - *cloned_value_data = &cloned_rollback; - *clone_size = sizeof(cloned_rollback); - } - // clear the dirty bit, because the node has been cloned - log->dirty = 0; - new_attr->is_valid = false; -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback-ct-callbacks.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback-ct-callbacks.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback-ct-callbacks.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback-ct-callbacks.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,139 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef ROLLBACK_CT_CALLBACKS_H -#define ROLLBACK_CT_CALLBACKS_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - - -#include "cachetable.h" -#include "fttypes.h" - -void toku_rollback_flush_callback(CACHEFILE cachefile, int fd, BLOCKNUM logname, void *rollback_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool UU(is_clone)); -int toku_rollback_fetch_callback(CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM logname, uint32_t fullhash, void **rollback_pv, void** UU(disk_data), PAIR_ATTR *sizep, int * UU(dirtyp), void *extraargs); -void toku_rollback_pe_est_callback( - void* rollback_v, - void* UU(disk_data), - long* bytes_freed_estimate, - enum partial_eviction_cost *cost, - void* UU(write_extraargs) - ); -int toku_rollback_pe_callback ( - void *rollback_v, - PAIR_ATTR old_attr, - void* UU(extraargs), - void (*finalize)(PAIR_ATTR new_attr, void * extra), - void *finalize_extra - ); -bool toku_rollback_pf_req_callback(void* UU(ftnode_pv), void* UU(read_extraargs)) ; -int toku_rollback_pf_callback(void* UU(ftnode_pv), void* UU(disk_data), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)); -void toku_rollback_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs); - -int toku_rollback_cleaner_callback ( - void* UU(ftnode_pv), - BLOCKNUM UU(blocknum), - uint32_t UU(fullhash), - void* UU(extraargs) - ); - -static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log(FT h) { - CACHETABLE_WRITE_CALLBACK wc; - wc.flush_callback = toku_rollback_flush_callback; - wc.pe_est_callback = toku_rollback_pe_est_callback; - wc.pe_callback = toku_rollback_pe_callback; - wc.cleaner_callback = toku_rollback_cleaner_callback; - wc.clone_callback = toku_rollback_clone_callback; - wc.checkpoint_complete_callback = nullptr; - wc.write_extraargs = h; - return wc; -} - - -#endif // ROLLBACK_CT_CALLBACKS_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,195 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_ROLLBACK_H -#define TOKU_ROLLBACK_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include "sub_block.h" - -void toku_poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint); - -// these functions assert internally that they succeed - -// get a rollback node this txn may use for a new entry. if there -// is a current rollback node to use, pin it, otherwise create one. -void toku_get_and_pin_rollback_log_for_new_entry(TOKUTXN txn, ROLLBACK_LOG_NODE *log); - -// get a specific rollback by blocknum -void toku_get_and_pin_rollback_log(TOKUTXN txn, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log); - -// unpin a rollback node from the cachetable -void toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log); - -// assert that the given log's txnid and sequence match the ones given -void toku_rollback_verify_contents(ROLLBACK_LOG_NODE log, TXNID_PAIR txnid, uint64_t sequence); - -// if there is a previous rollback log for the given log node, prefetch it -void toku_maybe_prefetch_previous_rollback_log(TOKUTXN txn, ROLLBACK_LOG_NODE log); - -// unpin and rmove a rollback log from the cachetable -void toku_rollback_log_unpin_and_remove(TOKUTXN txn, ROLLBACK_LOG_NODE log); - -void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size); -void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len); - -// given a transaction and a log node, and if the log is too full, -// set the current rollback log to ROLLBACK_NONE and move the current -// node onto the tail of the rollback node chain. further insertions -// into the rollback log for this transaction will force the creation -// of a new rollback log. -// -// this never unpins the rollback log if a spill occurs. the caller -// is responsible for ensuring the given rollback node is unpinned -// if necessary. -void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log); - -void toku_txn_maybe_note_ft (TOKUTXN txn, FT h); -int toku_logger_txn_rollback_stats(TOKUTXN txn, struct txn_stat *txn_stat); - -int toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind); - -PAIR_ATTR rollback_memory_size(ROLLBACK_LOG_NODE log); - -// A high-level rollback log is made up of a chain of rollback log nodes. -// Each rollback log node is represented (separately) in the cachetable by -// this structure. Each portion of the rollback log chain has a block num -// and a hash to identify it. -struct rollback_log_node { - int layout_version; - int layout_version_original; - int layout_version_read_from_disk; - uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk - int dirty; - // to which transaction does this node belong? - TXNID_PAIR txnid; - // sequentially, where in the rollback log chain is this node? - // the sequence is between 0 and totalnodes-1 - uint64_t sequence; - BLOCKNUM blocknum; // on which block does this node live? - // which block number is the previous in the chain of rollback nodes - // that make up this rollback log? - BLOCKNUM previous; - struct roll_entry *oldest_logentry; - struct roll_entry *newest_logentry; - MEMARENA rollentry_arena; - size_t rollentry_resident_bytecount; // How many bytes for the rollentries that are stored in main memory. - PAIR ct_pair; -}; - -struct serialized_rollback_log_node { - char *data; - uint32_t len; - int n_sub_blocks; - BLOCKNUM blocknum; - struct sub_block sub_block[max_sub_blocks]; -}; - -static inline void -toku_static_serialized_rollback_log_destroy(SERIALIZED_ROLLBACK_LOG_NODE log) { - toku_free(log->data); -} - -static inline void -toku_serialized_rollback_log_destroy(SERIALIZED_ROLLBACK_LOG_NODE log) { - toku_static_serialized_rollback_log_destroy(log); - toku_free(log); -} - -void rollback_empty_log_init(ROLLBACK_LOG_NODE log); -void make_rollback_log_empty(ROLLBACK_LOG_NODE log); - -static inline bool rollback_log_is_unused(ROLLBACK_LOG_NODE log) { - return (log->txnid.parent_id64 == TXNID_NONE); -} - - -#endif // TOKU_ROLLBACK_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback_log_node_cache.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback_log_node_cache.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback_log_node_cache.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback_log_node_cache.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,160 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include - -#include "rollback_log_node_cache.h" - -void rollback_log_node_cache::init (uint32_t max_num_avail_nodes) { - XMALLOC_N(max_num_avail_nodes, m_avail_blocknums); - m_max_num_avail = max_num_avail_nodes; - m_first = 0; - m_num_avail = 0; - toku_pthread_mutexattr_t attr; - toku_mutexattr_init(&attr); - toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE); - toku_mutex_init(&m_mutex, &attr); - toku_mutexattr_destroy(&attr); -} - -void rollback_log_node_cache::destroy() { - toku_mutex_destroy(&m_mutex); - toku_free(m_avail_blocknums); -} - -// returns true if rollback log node was successfully added, -// false otherwise -bool rollback_log_node_cache::give_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_NODE log){ - bool retval = false; - toku_mutex_lock(&m_mutex); - if (m_num_avail < m_max_num_avail) { - retval = true; - uint32_t index = m_first + m_num_avail; - if (index >= m_max_num_avail) { - index -= m_max_num_avail; - } - m_avail_blocknums[index].b = log->blocknum.b; - m_num_avail++; - } - toku_mutex_unlock(&m_mutex); - // - // now unpin the rollback log node - // - if (retval) { - make_rollback_log_empty(log); - toku_rollback_log_unpin(txn, log); - } - return retval; -} - -// if a rollback log node is available, will set log to it, -// otherwise, will set log to NULL and caller is on his own -// for getting a rollback log node -void rollback_log_node_cache::get_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_NODE* log){ - BLOCKNUM b = ROLLBACK_NONE; - toku_mutex_lock(&m_mutex); - if (m_num_avail > 0) { - b.b = m_avail_blocknums[m_first].b; - m_num_avail--; - if (++m_first >= m_max_num_avail) { - m_first = 0; - } - } - toku_mutex_unlock(&m_mutex); - if (b.b != ROLLBACK_NONE.b) { - toku_get_and_pin_rollback_log(txn, b, log); - invariant(rollback_log_is_unused(*log)); - } else { - *log = NULL; - } -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback_log_node_cache.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback_log_node_cache.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/rollback_log_node_cache.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/rollback_log_node_cache.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,119 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_ROLLBACK_LOG_NODE_CACHE_H -#define TOKU_ROLLBACK_LOG_NODE_CACHE_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "rollback.h" - -class rollback_log_node_cache { -public: - void init (uint32_t max_num_avail_nodes); - void destroy(); - // returns true if rollback log node was successfully added, - // false otherwise - bool give_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_NODE log); - // if a rollback log node is available, will set log to it, - // otherwise, will set log to NULL and caller is on his own - // for getting a rollback log node - void get_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_NODE* log); - -private: - BLOCKNUM* m_avail_blocknums; - uint32_t m_first; - uint32_t m_num_avail; - uint32_t m_max_num_avail; - toku_mutex_t m_mutex; -}; - -ENSURE_POD(rollback_log_node_cache); - -#endif // TOKU_ROLLBACK_LOG_NODE_CACHE_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/roll.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/roll.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/roll.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/roll.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,636 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -/* rollback and rollforward routines. */ - -#include - -#include "ft.h" -#include "ft-ops.h" -#include "log-internal.h" -//#include "txn_manager.h" -#include "xids.h" -#include "rollback-apply.h" - -// functionality provided by roll.c is exposed by an autogenerated -// header file, logheader.h -// -// this (poorly) explains the absense of "roll.h" - -// these flags control whether or not we send commit messages for -// various operations - -// When a transaction is committed, should we send a FT_COMMIT message -// for each FT_INSERT message sent earlier by the transaction? -#define TOKU_DO_COMMIT_CMD_INSERT 0 - -// When a transaction is committed, should we send a FT_COMMIT message -// for each FT_DELETE_ANY message sent earlier by the transaction? -#define TOKU_DO_COMMIT_CMD_DELETE 1 - -// When a transaction is committed, should we send a FT_COMMIT message -// for each FT_UPDATE message sent earlier by the transaction? -#define TOKU_DO_COMMIT_CMD_UPDATE 0 - -int -toku_commit_fdelete (FILENUM filenum, - TOKUTXN txn, - LSN UU(oplsn)) //oplsn is the lsn of the commit -{ - int r; - CACHEFILE cf; - CACHETABLE ct = txn->logger->ct; - - // Try to get the cachefile for this filenum. A missing file on recovery - // is not an error, but a missing file outside of recovery is. - r = toku_cachefile_of_filenum(ct, filenum, &cf); - if (r == ENOENT) { - assert(txn->for_recovery); - r = 0; - goto done; - } - assert_zero(r); - - // bug fix for #4718 - // bug was introduced in with fix for #3590 - // Before Maxwell (and fix for #3590), - // the recovery log was fsynced after the xcommit was loged but - // before we processed rollback entries and before we released - // the row locks (in the lock tree). Due to performance concerns, - // the fsync was moved to after the release of row locks, which comes - // after processing rollback entries. As a result, we may be unlinking a file - // here as part of a transactoin that may abort if we do not fsync the log. - // So, we fsync the log here. - if (txn->logger) { - toku_logger_fsync_if_lsn_not_fsynced(txn->logger, txn->do_fsync_lsn); - } - - // Mark the cachefile as unlink on close. There are two ways for close - // to be eventually called on the cachefile: - // - // - when this txn completes, it will release a reference on the - // ft and close it, UNLESS it was pinned by checkpoint - // - if the cf was pinned by checkpoint, an unpin will release the - // final reference and call close. it must be the final reference - // since this txn has exclusive access to dictionary (by the - // directory row lock for its dname) and we would not get this - // far if there were other live handles. - toku_cachefile_unlink_on_close(cf); -done: - return r; -} - -int -toku_rollback_fdelete (FILENUM UU(filenum), - TOKUTXN UU(txn), - LSN UU(oplsn)) //oplsn is the lsn of the abort -{ - //Rolling back an fdelete is an no-op. - return 0; -} - -int -toku_commit_fcreate (FILENUM UU(filenum), - BYTESTRING UU(bs_fname), - TOKUTXN UU(txn), - LSN UU(oplsn)) -{ - return 0; -} - -int -toku_rollback_fcreate (FILENUM filenum, - BYTESTRING UU(bs_fname), - TOKUTXN txn, - LSN UU(oplsn)) -{ - int r; - CACHEFILE cf; - CACHETABLE ct = txn->logger->ct; - - // Try to get the cachefile for this filenum. A missing file on recovery - // is not an error, but a missing file outside of recovery is. - r = toku_cachefile_of_filenum(ct, filenum, &cf); - if (r == ENOENT) { - r = 0; - goto done; - } - assert_zero(r); - - // Mark the cachefile as unlink on close. There are two ways for close - // to be eventually called on the cachefile: - // - // - when this txn completes, it will release a reference on the - // ft and close it, UNLESS it was pinned by checkpoint - // - if the cf was pinned by checkpoint, an unpin will release the - // final reference and call close. it must be the final reference - // since this txn has exclusive access to dictionary (by the - // directory row lock for its dname) and we would not get this - // far if there were other live handles. - toku_cachefile_unlink_on_close(cf); -done: - return 0; -} - -int find_ft_from_filenum (const FT &h, const FILENUM &filenum); -int find_ft_from_filenum (const FT &h, const FILENUM &filenum) { - FILENUM thisfnum = toku_cachefile_filenum(h->cf); - if (thisfnum.fileidfilenum.fileid) return +1; - return 0; -} - -// Input arg reset_root_xid_that_created true means that this operation has changed the definition of this dictionary. -// (Example use is for schema change committed with txn that inserted cmdupdatebroadcast message.) -// The oplsn argument is ZERO_LSN for normal operation. When this function is called for recovery, it has the LSN of -// the operation (insert, delete, update, etc). -static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key, BYTESTRING *data, TOKUTXN txn, LSN oplsn, - bool reset_root_xid_that_created) { - int r = 0; - //printf("%s:%d committing insert %s %s\n", __FILE__, __LINE__, key.data, data.data); - FT h; - h = NULL; - r = txn->open_fts.find_zero(filenum, &h, NULL); - if (r == DB_NOTFOUND) { - assert(txn->for_recovery); - r = 0; - goto done; - } - assert(r==0); - - if (oplsn.lsn != 0) { // if we are executing the recovery algorithm - LSN treelsn = toku_ft_checkpoint_lsn(h); - if (oplsn.lsn <= treelsn.lsn) { // if operation was already applied to tree ... - r = 0; // ... do not apply it again. - goto done; - } - } - - DBT key_dbt,data_dbt; - XIDS xids; - xids = toku_txn_get_xids(txn); - { - FT_MSG_S ftmsg = { type, ZERO_MSN, xids, - .u = { .id = { (key.len > 0) - ? toku_fill_dbt(&key_dbt, key.data, key.len) - : toku_init_dbt(&key_dbt), - data - ? toku_fill_dbt(&data_dbt, data->data, data->len) - : toku_init_dbt(&data_dbt) } } }; - - TXN_MANAGER txn_manager = toku_logger_get_txn_manager(txn->logger); - txn_manager_state txn_state_for_gc(txn_manager); - - TXNID oldest_referenced_xid_estimate = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_estimate, - // no messages above us, we can implicitly promote uxrs based on this xid - oldest_referenced_xid_estimate, - !txn->for_recovery); - toku_ft_root_put_msg(h, &ftmsg, &gc_info); - if (reset_root_xid_that_created) { - TXNID new_root_xid_that_created = xids_get_outermost_xid(xids); - toku_reset_root_xid_that_created(h, new_root_xid_that_created); - } - } -done: - return r; -} - - -static int do_nothing_with_filenum(TOKUTXN UU(txn), FILENUM UU(filenum)) { - return 0; -} - - -int toku_commit_cmdinsert (FILENUM filenum, BYTESTRING UU(key), TOKUTXN txn, LSN UU(oplsn)) { -#if TOKU_DO_COMMIT_CMD_INSERT - return do_insertion (FT_COMMIT_ANY, filenum, key, 0, txn, oplsn, false); -#else - return do_nothing_with_filenum(txn, filenum); -#endif -} - -int -toku_rollback_cmdinsert (FILENUM filenum, - BYTESTRING key, - TOKUTXN txn, - LSN oplsn) -{ - return do_insertion (FT_ABORT_ANY, filenum, key, 0, txn, oplsn, false); -} - -int -toku_commit_cmdupdate(FILENUM filenum, - BYTESTRING UU(key), - TOKUTXN txn, - LSN UU(oplsn)) -{ -#if TOKU_DO_COMMIT_CMD_UPDATE - return do_insertion(FT_COMMIT_ANY, filenum, key, 0, txn, oplsn, false); -#else - return do_nothing_with_filenum(txn, filenum); -#endif -} - -int -toku_rollback_cmdupdate(FILENUM filenum, - BYTESTRING key, - TOKUTXN txn, - LSN oplsn) -{ - return do_insertion(FT_ABORT_ANY, filenum, key, 0, txn, oplsn, false); -} - -int -toku_commit_cmdupdatebroadcast(FILENUM filenum, - bool is_resetting_op, - TOKUTXN txn, - LSN oplsn) -{ - // if is_resetting_op, reset root_xid_that_created in - // relevant ft. - bool reset_root_xid_that_created = (is_resetting_op ? true : false); - const enum ft_msg_type msg_type = (is_resetting_op - ? FT_COMMIT_BROADCAST_ALL - : FT_COMMIT_BROADCAST_TXN); - BYTESTRING nullkey = { 0, NULL }; - return do_insertion(msg_type, filenum, nullkey, 0, txn, oplsn, reset_root_xid_that_created); -} - -int -toku_rollback_cmdupdatebroadcast(FILENUM filenum, - bool UU(is_resetting_op), - TOKUTXN txn, - LSN oplsn) -{ - BYTESTRING nullkey = { 0, NULL }; - return do_insertion(FT_ABORT_BROADCAST_TXN, filenum, nullkey, 0, txn, oplsn, false); -} - -int -toku_commit_cmddelete (FILENUM filenum, - BYTESTRING key, - TOKUTXN txn, - LSN oplsn) -{ -#if TOKU_DO_COMMIT_CMD_DELETE - return do_insertion (FT_COMMIT_ANY, filenum, key, 0, txn, oplsn, false); -#else - key = key; oplsn = oplsn; - return do_nothing_with_filenum(txn, filenum); -#endif -} - -int -toku_rollback_cmddelete (FILENUM filenum, - BYTESTRING key, - TOKUTXN txn, - LSN oplsn) -{ - return do_insertion (FT_ABORT_ANY, filenum, key, 0, txn, oplsn, false); -} - -static int -toku_apply_rollinclude (TXNID_PAIR xid, - uint64_t num_nodes, - BLOCKNUM spilled_head, - BLOCKNUM spilled_tail, - TOKUTXN txn, - LSN oplsn, - apply_rollback_item func) { - int r = 0; - struct roll_entry *item; - - BLOCKNUM next_log = spilled_tail; - uint64_t last_sequence = num_nodes; - - bool found_head = false; - assert(next_log.b != ROLLBACK_NONE.b); - while (next_log.b != ROLLBACK_NONE.b) { - //pin log - ROLLBACK_LOG_NODE log; - toku_get_and_pin_rollback_log(txn, next_log, &log); - toku_rollback_verify_contents(log, xid, last_sequence - 1); - last_sequence = log->sequence; - - toku_maybe_prefetch_previous_rollback_log(txn, log); - - while ((item=log->newest_logentry)) { - log->newest_logentry = item->prev; - r = func(txn, item, oplsn); - if (r!=0) return r; - } - if (next_log.b == spilled_head.b) { - assert(!found_head); - found_head = true; - assert(log->sequence == 0); - } - next_log = log->previous; - { - //Clean up transaction structure to prevent - //toku_txn_close from double-freeing - spilled_tail = next_log; - if (found_head) { - assert(next_log.b == ROLLBACK_NONE.b); - spilled_head = next_log; - } - } - toku_rollback_log_unpin_and_remove(txn, log); - } - return r; -} - -int -toku_commit_rollinclude (TXNID_PAIR xid, - uint64_t num_nodes, - BLOCKNUM spilled_head, - BLOCKNUM spilled_tail, - TOKUTXN txn, - LSN oplsn) { - int r; - r = toku_apply_rollinclude(xid, num_nodes, - spilled_head, - spilled_tail, - txn, oplsn, - toku_commit_rollback_item); - return r; -} - -int -toku_rollback_rollinclude (TXNID_PAIR xid, - uint64_t num_nodes, - BLOCKNUM spilled_head, - BLOCKNUM spilled_tail, - TOKUTXN txn, - LSN oplsn) { - int r; - r = toku_apply_rollinclude(xid, num_nodes, - spilled_head, - spilled_tail, - txn, oplsn, - toku_abort_rollback_item); - return r; -} - -int -toku_commit_load (FILENUM old_filenum, - BYTESTRING UU(new_iname), - TOKUTXN txn, - LSN UU(oplsn)) -{ - int r; - CACHEFILE old_cf; - CACHETABLE ct = txn->logger->ct; - - // To commit a dictionary load, we delete the old file - // - // Try to get the cachefile for the old filenum. A missing file on recovery - // is not an error, but a missing file outside of recovery is. - r = toku_cachefile_of_filenum(ct, old_filenum, &old_cf); - if (r == ENOENT) { - invariant(txn->for_recovery); - r = 0; - goto done; - } - lazy_assert(r == 0); - - // bug fix for #4718 - // bug was introduced in with fix for #3590 - // Before Maxwell (and fix for #3590), - // the recovery log was fsynced after the xcommit was loged but - // before we processed rollback entries and before we released - // the row locks (in the lock tree). Due to performance concerns, - // the fsync was moved to after the release of row locks, which comes - // after processing rollback entries. As a result, we may be unlinking a file - // here as part of a transactoin that may abort if we do not fsync the log. - // So, we fsync the log here. - if (txn->logger) { - toku_logger_fsync_if_lsn_not_fsynced(txn->logger, txn->do_fsync_lsn); - } - - // TODO: Zardosht - // Explain why this condition is valid, because I forget. - if (!toku_cachefile_is_unlink_on_close(old_cf)) { - toku_cachefile_unlink_on_close(old_cf); - } -done: - return r; -} - -int -toku_rollback_load (FILENUM UU(old_filenum), - BYTESTRING new_iname, - TOKUTXN txn, - LSN UU(oplsn)) -{ - int r; - CACHEFILE new_cf; - CACHETABLE ct = txn->logger->ct; - - // To rollback a dictionary load, we delete the new file. - // Try to get the cachefile for the new fname. - char *fname_in_env = fixup_fname(&new_iname); - r = toku_cachefile_of_iname_in_env(ct, fname_in_env, &new_cf); - if (r == ENOENT) { - // It's possible the new iname was never created, so just try to - // unlink it if it's there and ignore the error if it's not. - char *fname_in_cwd = toku_cachetable_get_fname_in_cwd(ct, fname_in_env); - r = unlink(fname_in_cwd); - assert(r == 0 || get_error_errno() == ENOENT); - toku_free(fname_in_cwd); - r = 0; - } else { - assert_zero(r); - toku_cachefile_unlink_on_close(new_cf); - } - toku_free(fname_in_env); - return r; -} - -//2954 -int -toku_commit_hot_index (FILENUMS UU(hot_index_filenums), - TOKUTXN UU(txn), - LSN UU(oplsn)) -{ - // nothing - return 0; -} - -int -toku_rollback_hot_index (FILENUMS UU(hot_index_filenums), - TOKUTXN UU(txn), - LSN UU(oplsn)) -{ - return 0; -} - -int -toku_commit_dictionary_redirect (FILENUM UU(old_filenum), - FILENUM UU(new_filenum), - TOKUTXN UU(txn), - LSN UU(oplsn)) //oplsn is the lsn of the commit -{ - //Redirect only has meaning during normal operation (NOT during recovery). - if (!txn->for_recovery) { - //NO-OP - } - return 0; -} - -int -toku_rollback_dictionary_redirect (FILENUM old_filenum, - FILENUM new_filenum, - TOKUTXN txn, - LSN UU(oplsn)) //oplsn is the lsn of the abort -{ - int r = 0; - //Redirect only has meaning during normal operation (NOT during recovery). - if (!txn->for_recovery) { - CACHEFILE new_cf = NULL; - r = toku_cachefile_of_filenum(txn->logger->ct, new_filenum, &new_cf); - assert(r == 0); - FT CAST_FROM_VOIDP(new_h, toku_cachefile_get_userdata(new_cf)); - - CACHEFILE old_cf = NULL; - r = toku_cachefile_of_filenum(txn->logger->ct, old_filenum, &old_cf); - assert(r == 0); - FT CAST_FROM_VOIDP(old_h, toku_cachefile_get_userdata(old_cf)); - - //Redirect back from new to old. - r = toku_dictionary_redirect_abort(old_h, new_h, txn); - assert(r==0); - } - return r; -} - -int -toku_commit_change_fdescriptor(FILENUM filenum, - BYTESTRING UU(old_descriptor), - TOKUTXN txn, - LSN UU(oplsn)) -{ - return do_nothing_with_filenum(txn, filenum); -} - -int -toku_rollback_change_fdescriptor(FILENUM filenum, - BYTESTRING old_descriptor, - TOKUTXN txn, - LSN UU(oplsn)) -{ - CACHEFILE cf; - int r; - r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf); - if (r == ENOENT) { //Missing file on recovered transaction is not an error - assert(txn->for_recovery); - r = 0; - goto done; - } - // file must be open, because the txn that created it opened it and - // noted it, - assert(r == 0); - - FT ft; - ft = NULL; - r = txn->open_fts.find_zero(filenum, &ft, NULL); - assert(r == 0); - - DESCRIPTOR_S d; - toku_fill_dbt(&d.dbt, old_descriptor.data, old_descriptor.len); - toku_ft_update_descriptor(ft, &d); -done: - return r; -} - - - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_allocator.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_allocator.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_allocator.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_allocator.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,513 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#ident "$Id$" + +#include + +#include + +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_stdint.h" +#include "portability/toku_stdlib.h" + +#include "ft/serialize/block_allocator.h" +#include "ft/serialize/block_allocator_strategy.h" + +#if TOKU_DEBUG_PARANOID +#define VALIDATE() validate() +#else +#define VALIDATE() +#endif + +static FILE *ba_trace_file = nullptr; + +void block_allocator::maybe_initialize_trace(void) { + const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH"); + if (ba_trace_path != nullptr) { + ba_trace_file = toku_os_fopen(ba_trace_path, "w"); + if (ba_trace_file == nullptr) { + fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), " + "but it could not be opened for writing (errno %d)\n", + ba_trace_path, get_maybe_error_errno()); + } else { + fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path); + } + } +} + +void block_allocator::maybe_close_trace() { + if (ba_trace_file != nullptr) { + int r = toku_os_fclose(ba_trace_file); + if (r != 0) { + fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n", + r, get_maybe_error_errno()); + } else { + fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n"); + } + } +} + +void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) { + // the alignment must be at least 512 and aligned with 512 to work with direct I/O + assert(alignment >= 512 && (alignment % 512) == 0); + + _reserve_at_beginning = reserve_at_beginning; + _alignment = alignment; + _n_blocks = 0; + _blocks_array_size = 1; + XMALLOC_N(_blocks_array_size, _blocks_array); + _n_bytes_in_use = reserve_at_beginning; + _strategy = BA_STRATEGY_FIRST_FIT; + + memset(&_trace_lock, 0, sizeof(toku_mutex_t)); + toku_mutex_init(&_trace_lock, nullptr); + + VALIDATE(); +} + +void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) { + _create_internal(reserve_at_beginning, alignment); + _trace_create(); +} + +void block_allocator::destroy() { + toku_free(_blocks_array); + _trace_destroy(); + toku_mutex_destroy(&_trace_lock); +} + +void block_allocator::set_strategy(enum allocation_strategy strategy) { + _strategy = strategy; +} + +void block_allocator::grow_blocks_array_by(uint64_t n_to_add) { + if (_n_blocks + n_to_add > _blocks_array_size) { + uint64_t new_size = _n_blocks + n_to_add; + uint64_t at_least = _blocks_array_size * 2; + if (at_least > new_size) { + new_size = at_least; + } + _blocks_array_size = new_size; + XREALLOC_N(_blocks_array_size, _blocks_array); + } +} + +void block_allocator::grow_blocks_array() { + grow_blocks_array_by(1); +} + +void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment, + struct blockpair *pairs, uint64_t n_blocks) { + _create_internal(reserve_at_beginning, alignment); + + _n_blocks = n_blocks; + grow_blocks_array_by(_n_blocks); + memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair)); + std::sort(_blocks_array, _blocks_array + _n_blocks); + for (uint64_t i = 0; i < _n_blocks; i++) { + // Allocator does not support size 0 blocks. See block_allocator_free_block. + invariant(_blocks_array[i].size > 0); + invariant(_blocks_array[i].offset >= _reserve_at_beginning); + invariant(_blocks_array[i].offset % _alignment == 0); + + _n_bytes_in_use += _blocks_array[i].size; + } + + VALIDATE(); + + _trace_create_from_blockpairs(); +} + +// Effect: align a value by rounding up. +static inline uint64_t align(uint64_t value, uint64_t ba_alignment) { + return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; +} + +struct block_allocator::blockpair * +block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) { + switch (_strategy) { + case BA_STRATEGY_FIRST_FIT: + return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment); + case BA_STRATEGY_BEST_FIT: + return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment); + case BA_STRATEGY_HEAT_ZONE: + return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat); + case BA_STRATEGY_PADDED_FIT: + return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment); + default: + abort(); + } +} + +// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512). +void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) { + struct blockpair *bp; + + // Allocator does not support size 0 blocks. See block_allocator_free_block. + invariant(size > 0); + + grow_blocks_array(); + _n_bytes_in_use += size; + + uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment); + + if (_n_blocks == 0) { + // First and only block + assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use + _blocks_array[0].offset = align(_reserve_at_beginning, _alignment); + _blocks_array[0].size = size; + *offset = _blocks_array[0].offset; + goto done; + } else if (end_of_reserve + size <= _blocks_array[0].offset ) { + // Check to see if the space immediately after the reserve is big enough to hold the new block. + bp = &_blocks_array[0]; + memmove(bp + 1, bp, _n_blocks * sizeof(*bp)); + bp[0].offset = end_of_reserve; + bp[0].size = size; + *offset = end_of_reserve; + goto done; + } + + bp = choose_block_to_alloc_after(size, heat); + if (bp != nullptr) { + // our allocation strategy chose the space after `bp' to fit the new block + uint64_t answer_offset = align(bp->offset + bp->size, _alignment); + uint64_t blocknum = bp - _blocks_array; + invariant(&_blocks_array[blocknum] == bp); + invariant(blocknum < _n_blocks); + memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp)); + bp[1].offset = answer_offset; + bp[1].size = size; + *offset = answer_offset; + } else { + // It didn't fit anywhere, so fit it on the end. + assert(_n_blocks < _blocks_array_size); + bp = &_blocks_array[_n_blocks]; + uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment); + bp->offset = answer_offset; + bp->size = size; + *offset = answer_offset; + } + +done: + _n_blocks++; + VALIDATE(); + + _trace_alloc(size, heat, *offset); +} + +// Find the index in the blocks array that has a particular offset. Requires that the block exist. +// Use binary search so it runs fast. +int64_t block_allocator::find_block(uint64_t offset) { + VALIDATE(); + if (_n_blocks == 1) { + assert(_blocks_array[0].offset == offset); + return 0; + } + + uint64_t lo = 0; + uint64_t hi = _n_blocks; + while (1) { + assert(lo < hi); // otherwise no such block exists. + uint64_t mid = (lo + hi) / 2; + uint64_t thisoff = _blocks_array[mid].offset; + if (thisoff < offset) { + lo = mid + 1; + } else if (thisoff > offset) { + hi = mid; + } else { + return mid; + } + } +} + +// To support 0-sized blocks, we need to include size as an input to this function. +// All 0-sized blocks at the same offset can be considered identical, but +// a 0-sized block can share offset with a non-zero sized block. +// The non-zero sized block is not exchangable with a zero sized block (or vice versa), +// so inserting 0-sized blocks can cause corruption here. +void block_allocator::free_block(uint64_t offset) { + VALIDATE(); + int64_t bn = find_block(offset); + assert(bn >= 0); // we require that there is a block with that offset. + _n_bytes_in_use -= _blocks_array[bn].size; + memmove(&_blocks_array[bn], &_blocks_array[bn + 1], + (_n_blocks - bn - 1) * sizeof(struct blockpair)); + _n_blocks--; + VALIDATE(); + + _trace_free(offset); +} + +uint64_t block_allocator::block_size(uint64_t offset) { + int64_t bn = find_block(offset); + assert(bn >=0); // we require that there is a block with that offset. + return _blocks_array[bn].size; +} + +uint64_t block_allocator::allocated_limit() const { + if (_n_blocks == 0) { + return _reserve_at_beginning; + } else { + struct blockpair *last = &_blocks_array[_n_blocks - 1]; + return last->offset + last->size; + } +} + +// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. +// Return the offset and size of the block with that number. +// Return 0 if there is a block that big, return nonzero if b is too big. +int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) { + if (b ==0 ) { + *offset = 0; + *size = _reserve_at_beginning; + return 0; + } else if (b > _n_blocks) { + return -1; + } else { + *offset =_blocks_array[b - 1].offset; + *size =_blocks_array[b - 1].size; + return 0; + } +} + +// Requires: report->file_size_bytes is filled in +// Requires: report->data_bytes is filled in +// Requires: report->checkpoint_bytes_additional is filled in +void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) { + assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional); + + report->unused_bytes = 0; + report->unused_blocks = 0; + report->largest_unused_block = 0; + if (_n_blocks > 0) { + //Deal with space before block 0 and after reserve: + { + struct blockpair *bp = &_blocks_array[0]; + assert(bp->offset >= align(_reserve_at_beginning, _alignment)); + uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment); + if (free_space > 0) { + report->unused_bytes += free_space; + report->unused_blocks++; + if (free_space > report->largest_unused_block) { + report->largest_unused_block = free_space; + } + } + } + + //Deal with space between blocks: + for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) { + // Consider the space after blocknum + struct blockpair *bp = &_blocks_array[blocknum]; + uint64_t this_offset = bp[0].offset; + uint64_t this_size = bp[0].size; + uint64_t end_of_this_block = align(this_offset+this_size, _alignment); + uint64_t next_offset = bp[1].offset; + uint64_t free_space = next_offset - end_of_this_block; + if (free_space > 0) { + report->unused_bytes += free_space; + report->unused_blocks++; + if (free_space > report->largest_unused_block) { + report->largest_unused_block = free_space; + } + } + } + + //Deal with space after last block + { + struct blockpair *bp = &_blocks_array[_n_blocks-1]; + uint64_t this_offset = bp[0].offset; + uint64_t this_size = bp[0].size; + uint64_t end_of_this_block = align(this_offset+this_size, _alignment); + if (end_of_this_block < report->file_size_bytes) { + uint64_t free_space = report->file_size_bytes - end_of_this_block; + assert(free_space > 0); + report->unused_bytes += free_space; + report->unused_blocks++; + if (free_space > report->largest_unused_block) { + report->largest_unused_block = free_space; + } + } + } + } else { + // No blocks. Just the reserve. + uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment); + if (end_of_this_block < report->file_size_bytes) { + uint64_t free_space = report->file_size_bytes - end_of_this_block; + assert(free_space > 0); + report->unused_bytes += free_space; + report->unused_blocks++; + if (free_space > report->largest_unused_block) { + report->largest_unused_block = free_space; + } + } + } +} + +void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) { + report->data_bytes = _n_bytes_in_use; + report->data_blocks = _n_blocks; + report->file_size_bytes = 0; + report->checkpoint_bytes_additional = 0; + get_unused_statistics(report); +} + +void block_allocator::validate() const { + uint64_t n_bytes_in_use = _reserve_at_beginning; + for (uint64_t i = 0; i < _n_blocks; i++) { + n_bytes_in_use += _blocks_array[i].size; + if (i > 0) { + assert(_blocks_array[i].offset > _blocks_array[i - 1].offset); + assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size ); + } + } + assert(n_bytes_in_use == _n_bytes_in_use); +} + +// Tracing + +void block_allocator::_trace_create(void) { + if (ba_trace_file != nullptr) { + toku_mutex_lock(&_trace_lock); + fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n", + this, _reserve_at_beginning, _alignment); + toku_mutex_unlock(&_trace_lock); + + fflush(ba_trace_file); + } +} + +void block_allocator::_trace_create_from_blockpairs(void) { + if (ba_trace_file != nullptr) { + toku_mutex_lock(&_trace_lock); + fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ", + this, _reserve_at_beginning, _alignment); + for (uint64_t i = 0; i < _n_blocks; i++) { + fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ", + _blocks_array[i].offset, _blocks_array[i].size); + } + fprintf(ba_trace_file, "\n"); + toku_mutex_unlock(&_trace_lock); + + fflush(ba_trace_file); + } +} + +void block_allocator::_trace_destroy(void) { + if (ba_trace_file != nullptr) { + toku_mutex_lock(&_trace_lock); + fprintf(ba_trace_file, "ba_trace_destroy %p\n", this); + toku_mutex_unlock(&_trace_lock); + + fflush(ba_trace_file); + } +} + +void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) { + if (ba_trace_file != nullptr) { + toku_mutex_lock(&_trace_lock); + fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + this, size, heat, offset); + toku_mutex_unlock(&_trace_lock); + + fflush(ba_trace_file); + } +} + +void block_allocator::_trace_free(uint64_t offset) { + if (ba_trace_file != nullptr) { + toku_mutex_lock(&_trace_lock); + fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset); + toku_mutex_unlock(&_trace_lock); + + fflush(ba_trace_file); + } +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_allocator.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_allocator.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_allocator.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_allocator.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,267 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/toku_pthread.h" +#include "portability/toku_stdint.h" + +// Block allocator. +// +// A block allocator manages the allocation of variable-sized blocks. +// The translation of block numbers to addresses is handled elsewhere. +// The allocation of block numbers is handled elsewhere. +// +// When creating a block allocator we also specify a certain-sized +// block at the beginning that is preallocated (and cannot be allocated or freed) +// +// We can allocate blocks of a particular size at a particular location. +// We can allocate blocks of a particular size at a location chosen by the allocator. +// We can free blocks. +// We can determine the size of a block. + +class block_allocator { +public: + static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096; + + // How much must be reserved at the beginning for the block? + // The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root. + // So 4096 should be enough. + static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096; + + static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0, + "block allocator header must have proper alignment"); + + static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2; + + enum allocation_strategy { + BA_STRATEGY_FIRST_FIT = 1, + BA_STRATEGY_BEST_FIT, + BA_STRATEGY_PADDED_FIT, + BA_STRATEGY_HEAT_ZONE + }; + + struct blockpair { + uint64_t offset; + uint64_t size; + blockpair(uint64_t o, uint64_t s) : + offset(o), size(s) { + } + int operator<(const struct blockpair &rhs) const { + return offset < rhs.offset; + } + int operator<(const uint64_t &o) const { + return offset < o; + } + }; + + // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. + // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT) + // All blocks be start on a multiple of ALIGNMENT. + // Aborts if we run out of memory. + // Parameters + // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned. + // alignment (IN) Block alignment. + void create(uint64_t reserve_at_beginning, uint64_t alignment); + + // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. + // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT) + // The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs' + // All blocks be start on a multiple of ALIGNMENT. + // Aborts if we run out of memory. + // Parameters + // pairs, unowned array of pairs to copy + // n_blocks, Size of pairs array + // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned. + // alignment (IN) Block alignment. + void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment, + struct blockpair *pairs, uint64_t n_blocks); + + // Effect: Destroy this block allocator + void destroy(); + + // Effect: Set the allocation strategy that the allocator should use + // Requires: No other threads are operating on this block allocator + void set_strategy(enum allocation_strategy strategy); + + // Effect: Allocate a block of the specified size at an address chosen by the allocator. + // Aborts if anything goes wrong. + // The block address will be a multiple of the alignment. + // Parameters: + // size (IN): The size of the block. (The size does not have to be aligned.) + // offset (OUT): The location of the block. + // heat (IN): A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint) + // Heat values are lexiographically ordered (like integers), but their specific values are arbitrary + void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset); + + // Effect: Free the block at offset. + // Requires: There must be a block currently allocated at that offset. + // Parameters: + // offset (IN): The offset of the block. + void free_block(uint64_t offset); + + // Effect: Return the size of the block that starts at offset. + // Requires: There must be a block currently allocated at that offset. + // Parameters: + // offset (IN): The offset of the block. + uint64_t block_size(uint64_t offset); + + // Effect: Check to see if the block allocator is OK. This may take a long time. + // Usage Hints: Probably only use this for unit tests. + // TODO: Private? + void validate() const; + + // Effect: Return the unallocated block address of "infinite" size. + // That is, return the smallest address that is above all the allocated blocks. + uint64_t allocated_limit() const; + + // Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. + // Return the offset and size of the block with that number. + // Return 0 if there is a block that big, return nonzero if b is too big. + // Rationale: This is probably useful only for tests. + int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size); + + // Effect: Fill in report to indicate how the file is used. + // Requires: + // report->file_size_bytes is filled in + // report->data_bytes is filled in + // report->checkpoint_bytes_additional is filled in + void get_unused_statistics(TOKU_DB_FRAGMENTATION report); + + // Effect: Fill in report->data_bytes with the number of bytes in use + // Fill in report->data_blocks with the number of blockpairs in use + // Fill in unused statistics using this->get_unused_statistics() + // Requires: + // report->file_size is ignored on return + // report->checkpoint_bytes_additional is ignored on return + void get_statistics(TOKU_DB_FRAGMENTATION report); + + // Block allocator tracing. + // - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file + // should be written to. + // - Trace may be replayed by ba_trace_replay tool in tools/ directory + // eg: "cat mytracefile | ba_trace_replay" + static void maybe_initialize_trace(); + static void maybe_close_trace(); + +private: + void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment); + void grow_blocks_array_by(uint64_t n_to_add); + void grow_blocks_array(); + int64_t find_block(uint64_t offset); + struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat); + + // Tracing + toku_mutex_t _trace_lock; + void _trace_create(void); + void _trace_create_from_blockpairs(void); + void _trace_destroy(void); + void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset); + void _trace_free(uint64_t offset); + + // How much to reserve at the beginning + uint64_t _reserve_at_beginning; + // Block alignment + uint64_t _alignment; + // How many blocks + uint64_t _n_blocks; + // How big is the blocks_array. Must be >= n_blocks. + uint64_t _blocks_array_size; + // These blocks are sorted by address. + struct blockpair *_blocks_array; + // Including the reserve_at_beginning + uint64_t _n_bytes_in_use; + // The allocation strategy are we using + enum allocation_strategy _strategy; +}; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,274 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include + +#include + +#include "portability/toku_assert.h" + +#include "ft/serialize/block_allocator_strategy.h" + +static uint64_t _align(uint64_t value, uint64_t ba_alignment) { + return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; +} + +static uint64_t _roundup_to_power_of_two(uint64_t value) { + uint64_t r = 4096; + while (r < value) { + r *= 2; + invariant(r > 0); + } + return r; +} + +// First fit block allocation +static struct block_allocator::blockpair * +_first_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment, + uint64_t max_padding) { + if (n_blocks == 1) { + // won't enter loop, can't underflow the direction < 0 case + return nullptr; + } + + struct block_allocator::blockpair *bp = &blocks_array[0]; + for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0; + n_spaces_to_check--, bp++) { + // Consider the space after bp + uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment; + uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment); + if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1] + invariant(bp - blocks_array < (int64_t) n_blocks); + return bp; + } + } + return nullptr; +} + +static struct block_allocator::blockpair * +_first_fit_bw(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment, + uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) { + if (n_blocks == 1) { + // won't enter loop, can't underflow the direction < 0 case + return nullptr; + } + + struct block_allocator::blockpair *bp = &blocks_array[-1]; + for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0; + n_spaces_to_check--, bp--) { + // Consider the space after bp + uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment; + uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment); + if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) { + invariant(blocks_array - bp < (int64_t) n_blocks); + return bp; + } + } + return nullptr; +} + +struct block_allocator::blockpair * +block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment) { + return _first_fit(blocks_array, n_blocks, size, alignment, 0); +} + +// Best fit block allocation +struct block_allocator::blockpair * +block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment) { + struct block_allocator::blockpair *best_bp = nullptr; + uint64_t best_hole_size = 0; + for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) { + // Consider the space after blocknum + struct block_allocator::blockpair *bp = &blocks_array[blocknum]; + uint64_t possible_offset = _align(bp->offset + bp->size, alignment); + uint64_t possible_end_offset = possible_offset + size; + if (possible_end_offset <= bp[1].offset) { + // It fits here. Is it the best fit? + uint64_t hole_size = bp[1].offset - possible_end_offset; + if (best_bp == nullptr || hole_size < best_hole_size) { + best_hole_size = hole_size; + best_bp = bp; + } + } + } + return best_bp; +} + +static uint64_t padded_fit_alignment = 4096; + +// TODO: These compiler specific directives should be abstracted in a portability header +// portability/toku_compiler.h? +__attribute__((__constructor__)) +static void determine_padded_fit_alignment_from_env(void) { + // TODO: Should be in portability as 'toku_os_getenv()?' + const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT"); + if (s != nullptr && strlen(s) > 0) { + const int64_t alignment = strtoll(s, nullptr, 10); + if (alignment <= 0) { + fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), " + "but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n", + s, padded_fit_alignment); + } else { + padded_fit_alignment = _roundup_to_power_of_two(alignment); + fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n", + padded_fit_alignment); + } + } +} + +// First fit into a block that is oversized by up to max_padding. +// The hope is that if we purposefully waste a bit of space at allocation +// time we'll be more likely to reuse this block later. +struct block_allocator::blockpair * +block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment) { + return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment); +} + +static double hot_zone_threshold = 0.85; + +// TODO: These compiler specific directives should be abstracted in a portability header +// portability/toku_compiler.h? +__attribute__((__constructor__)) +static void determine_hot_zone_threshold_from_env(void) { + // TODO: Should be in portability as 'toku_os_getenv()?' + const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD"); + if (s != nullptr && strlen(s) > 0) { + const double hot_zone = strtod(s, nullptr); + if (hot_zone < 1 || hot_zone > 99) { + fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), " + "but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s); + hot_zone_threshold = 85 / 100; + } else { + fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s); + hot_zone_threshold = hot_zone / 100; + } + } +} + +struct block_allocator::blockpair * +block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment, + uint64_t heat) { + if (heat > 0) { + struct block_allocator::blockpair *bp, *boundary_bp; + + // Hot allocation. Find the beginning of the hot zone. + boundary_bp = &blocks_array[n_blocks - 1]; + uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment); + uint64_t hot_zone_offset = static_cast(hot_zone_threshold * highest_offset); + + boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset); + uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp; + uint64_t blocks_outside_zone = boundary_bp - blocks_array; + invariant(blocks_in_zone + blocks_outside_zone == n_blocks); + + if (blocks_in_zone > 0) { + // Find the first fit in the hot zone, going forward. + bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0); + if (bp != nullptr) { + return bp; + } + } + if (blocks_outside_zone > 0) { + // Find the first fit in the cold zone, going backwards. + bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]); + if (bp != nullptr) { + return bp; + } + } + } else { + // Cold allocations are simply first-fit from the beginning. + return _first_fit(blocks_array, n_blocks, size, alignment, 0); + } + return nullptr; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,115 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#include + +#include "ft/serialize/block_allocator.h" + +// Block allocation strategy implementations + +class block_allocator_strategy { +public: + static struct block_allocator::blockpair * + first_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment); + + static struct block_allocator::blockpair * + best_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment); + + static struct block_allocator::blockpair * + padded_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment); + + static struct block_allocator::blockpair * + heat_zone(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment, + uint64_t heat); +}; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_table.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_table.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_table.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_table.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,1048 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_portability.h" +#include "portability/toku_pthread.h" + +// ugly but pragmatic, need access to dirty bits while holding translation lock +// TODO: Refactor this (possibly with FT-301) +#include "ft/ft-internal.h" + +// TODO: reorganize this dependency (FT-303) +#include "ft/ft-ops.h" // for toku_maybe_truncate_file +#include "ft/serialize/block_table.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/wbuf.h" +#include "ft/serialize/block_allocator.h" + +#include "util/nb_mutex.h" +#include "util/scoped_malloc.h" + +// indicates the end of a freelist +static const BLOCKNUM freelist_null = { -1 }; + +// value of block_translation_pair.size if blocknum is unused +static const DISKOFF size_is_free = (DISKOFF) -1; + +// value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock +static const DISKOFF diskoff_unused = (DISKOFF) -2; + +void block_table::_mutex_lock() { + toku_mutex_lock(&_mutex); +} + +void block_table::_mutex_unlock() { + toku_mutex_unlock(&_mutex); +} + +// TODO: Move lock to FT +void toku_ft_lock(FT ft) { + block_table *bt = &ft->blocktable; + bt->_mutex_lock(); +} + +// TODO: Move lock to FT +void toku_ft_unlock(FT ft) { + block_table *bt = &ft->blocktable; + toku_mutex_assert_locked(&bt->_mutex); + bt->_mutex_unlock(); +} + +// There are two headers: the reserve must fit them both and be suitably aligned. +static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE % + block_allocator::BLOCK_ALLOCATOR_ALIGNMENT == 0, + "Block allocator's header reserve must be suitibly aligned"); +static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 == + block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + "Block allocator's total header reserve must exactly fit two headers"); + +// does NOT initialize the block allocator: the caller is responsible +void block_table::_create_internal() { + memset(&_current, 0, sizeof(struct translation)); + memset(&_inprogress, 0, sizeof(struct translation)); + memset(&_checkpointed, 0, sizeof(struct translation)); + memset(&_mutex, 0, sizeof(_mutex)); + toku_mutex_init(&_mutex, nullptr); + nb_mutex_init(&_safe_file_size_lock); +} + +// Fill in the checkpointed translation from buffer, and copy checkpointed to current. +// The one read from disk is the last known checkpointed one, so we are keeping it in +// place and then setting current (which is never stored on disk) for current use. +// The translation_buffer has translation only, we create the rest of the block_table. +int block_table::create_from_buffer(int fd, + DISKOFF location_on_disk, //Location of translation_buffer + DISKOFF size_on_disk, + unsigned char *translation_buffer) { + // Does not initialize the block allocator + _create_internal(); + + // Deserialize the translation and copy it to current + int r = _translation_deserialize_from_buffer(&_checkpointed, + location_on_disk, size_on_disk, + translation_buffer); + if (r != 0) { + return r; + } + _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT); + + // Determine the file size + int64_t file_size; + r = toku_os_get_file_size(fd, &file_size); + lazy_assert_zero(r); + invariant(file_size >= 0); + _safe_file_size = file_size; + + // Gather the non-empty translations and use them to create the block allocator + toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b * + sizeof(struct block_allocator::blockpair)); + struct block_allocator::blockpair *CAST_FROM_VOIDP(pairs, pairs_buf.get()); + uint64_t n_pairs = 0; + for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) { + struct block_translation_pair pair = _checkpointed.block_translation[i]; + if (pair.size > 0) { + invariant(pair.u.diskoff != diskoff_unused); + pairs[n_pairs++] = block_allocator::blockpair(pair.u.diskoff, pair.size); + } + } + + _bt_block_allocator.create_from_blockpairs(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + block_allocator::BLOCK_ALLOCATOR_ALIGNMENT, + pairs, n_pairs); + + return 0; +} + +void block_table::create() { + // Does not initialize the block allocator + _create_internal(); + + _checkpointed.type = TRANSLATION_CHECKPOINTED; + _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS); + _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b; + _checkpointed.blocknum_freelist_head = freelist_null; + XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation); + for (int64_t i = 0; i < _checkpointed.length_of_array; i++) { + _checkpointed.block_translation[i].size = 0; + _checkpointed.block_translation[i].u.diskoff = diskoff_unused; + } + + // we just created a default checkpointed, now copy it to current. + _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT); + + // Create an empty block allocator. + _bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + block_allocator::BLOCK_ALLOCATOR_ALIGNMENT); +} + +// TODO: Refactor with FT-303 +static void ft_set_dirty(FT ft, bool for_checkpoint) { + invariant(ft->h->type == FT_CURRENT); + if (for_checkpoint) { + invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); + ft->checkpoint_header->dirty = 1; + } else { + ft->h->dirty = 1; + } +} + +void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) { + toku_mutex_assert_locked(&_mutex); + uint64_t new_size_needed = _bt_block_allocator.allocated_limit(); + //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful. + if (new_size_needed < size_needed_before && new_size_needed < _safe_file_size) { + nb_mutex_lock(&_safe_file_size_lock, &_mutex); + + // Must hold _safe_file_size_lock to change _safe_file_size. + if (new_size_needed < _safe_file_size) { + int64_t safe_file_size_before = _safe_file_size; + // Not safe to use the 'to-be-truncated' portion until truncate is done. + _safe_file_size = new_size_needed; + _mutex_unlock(); + + uint64_t size_after; + toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after); + _mutex_lock(); + + _safe_file_size = size_after; + } + nb_mutex_unlock(&_safe_file_size_lock); + } +} + +void block_table::maybe_truncate_file_on_open(int fd) { + _mutex_lock(); + _maybe_truncate_file(fd, _safe_file_size); + _mutex_unlock(); +} + +void block_table::_copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype) { + // We intend to malloc a fresh block, so the incoming translation should be empty + invariant_null(dst->block_translation); + + invariant(src->length_of_array >= src->smallest_never_used_blocknum.b); + invariant(newtype == TRANSLATION_DEBUG || + (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) || + (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT)); + dst->type = newtype; + dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum; + dst->blocknum_freelist_head = src->blocknum_freelist_head; + + // destination btt is of fixed size. Allocate + memcpy the exact length necessary. + dst->length_of_array = dst->smallest_never_used_blocknum.b; + XMALLOC_N(dst->length_of_array, dst->block_translation); + memcpy(dst->block_translation, src->block_translation, dst->length_of_array * sizeof(*dst->block_translation)); + + // New version of btt is not yet stored on disk. + dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0; + dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused; +} + +int64_t block_table::get_blocks_in_use_unlocked() { + BLOCKNUM b; + struct translation *t = &_current; + int64_t num_blocks = 0; + { + //Reserved blocknums do not get upgraded; They are part of the header. + for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { + if (t->block_translation[b.b].size != size_is_free) { + num_blocks++; + } + } + } + return num_blocks; +} + +void block_table::_maybe_optimize_translation(struct translation *t) { + //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just + //on a free list. Doing so requires us to regenerate the free list. + //This is O(n) work, so do it only if you're already doing that. + + BLOCKNUM b; + paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); + //Calculate how large the free suffix is. + int64_t freed; + { + for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) { + if (t->block_translation[b.b-1].size != size_is_free) { + break; + } + } + freed = t->smallest_never_used_blocknum.b - b.b; + } + if (freed>0) { + t->smallest_never_used_blocknum.b = b.b; + if (t->length_of_array/4 > t->smallest_never_used_blocknum.b) { + //We're using more memory than necessary to represent this now. Reduce. + uint64_t new_length = t->smallest_never_used_blocknum.b * 2; + XREALLOC_N(new_length, t->block_translation); + t->length_of_array = new_length; + //No need to zero anything out. + } + + //Regenerate free list. + t->blocknum_freelist_head.b = freelist_null.b; + for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { + if (t->block_translation[b.b].size == size_is_free) { + t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; + t->blocknum_freelist_head = b; + } + } + } +} + +// block table must be locked by caller of this function +void block_table::note_start_checkpoint_unlocked() { + toku_mutex_assert_locked(&_mutex); + + // We're going to do O(n) work to copy the translation, so we + // can afford to do O(n) work by optimizing the translation + _maybe_optimize_translation(&_current); + + // Copy current translation to inprogress translation. + _copy_translation(&_inprogress, &_current, TRANSLATION_INPROGRESS); + + _checkpoint_skipped = false; +} + +void block_table::note_skipped_checkpoint() { + //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header + _mutex_lock(); + paranoid_invariant_notnull(_inprogress.block_translation); + _checkpoint_skipped = true; + _mutex_unlock(); +} + +// Purpose: free any disk space used by previous checkpoint that isn't in use by either +// - current state +// - in-progress checkpoint +// capture inprogress as new checkpointed. +// For each entry in checkpointBTT +// if offset does not match offset in inprogress +// assert offset does not match offset in current +// free (offset,len) from checkpoint +// move inprogress to checkpoint (resetting type) +// inprogress = NULL +void block_table::note_end_checkpoint(int fd) { + // Free unused blocks + _mutex_lock(); + uint64_t allocated_limit_at_start = _bt_block_allocator.allocated_limit(); + paranoid_invariant_notnull(_inprogress.block_translation); + if (_checkpoint_skipped) { + toku_free(_inprogress.block_translation); + memset(&_inprogress, 0, sizeof(_inprogress)); + goto end; + } + + //Make certain inprogress was allocated space on disk + assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0); + assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0); + + { + struct translation *t = &_checkpointed; + for (int64_t i = 0; i < t->length_of_array; i++) { + struct block_translation_pair *pair = &t->block_translation[i]; + if (pair->size > 0 && !_translation_prevents_freeing(&_inprogress, make_blocknum(i), pair)) { + assert(!_translation_prevents_freeing(&_current, make_blocknum(i), pair)); + _bt_block_allocator.free_block(pair->u.diskoff); + } + } + toku_free(_checkpointed.block_translation); + _checkpointed = _inprogress; + _checkpointed.type = TRANSLATION_CHECKPOINTED; + memset(&_inprogress, 0, sizeof(_inprogress)); + _maybe_truncate_file(fd, allocated_limit_at_start); + } +end: + _mutex_unlock(); +} + +bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) { + invariant(t->length_of_array >= t->smallest_never_used_blocknum.b); + return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b; +} + +void block_table::_verify_valid_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) { + invariant(_is_valid_blocknum(t, b)); +} + +bool block_table::_is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) { + invariant(t->length_of_array >= t->smallest_never_used_blocknum.b); + return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b; +} + +// should be freeable +void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) { + invariant(_is_valid_freeable_blocknum(t, b)); +} + +// Also used only in ft-serialize-test. +void block_table::block_free(uint64_t offset) { + _mutex_lock(); + _bt_block_allocator.free_block(offset); + _mutex_unlock(); +} + +int64_t block_table::_calculate_size_on_disk(struct translation *t) { + return 8 + // smallest_never_used_blocknum + 8 + // blocknum_freelist_head + t->smallest_never_used_blocknum.b * 16 + // Array + 4; // 4 for checksum +} + +// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table. +bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) { + return t->block_translation && + b.b < t->smallest_never_used_blocknum.b && + old_pair->u.diskoff == t->block_translation[b.b].u.diskoff; +} + +void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) { + toku_mutex_assert_locked(&_mutex); + ft_set_dirty(ft, for_checkpoint); + + struct translation *t = &_current; + struct block_translation_pair old_pair = t->block_translation[b.b]; + //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint + bool cannot_free = (bool) + ((!for_checkpoint && _translation_prevents_freeing(&_inprogress, b, &old_pair)) || + _translation_prevents_freeing(&_checkpointed, b, &old_pair)); + if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) { + _bt_block_allocator.free_block(old_pair.u.diskoff); + } + + uint64_t allocator_offset = diskoff_unused; + t->block_translation[b.b].size = size; + if (size > 0) { + // Allocate a new block if the size is greater than 0, + // if the size is just 0, offset will be set to diskoff_unused + _bt_block_allocator.alloc_block(size, heat, &allocator_offset); + } + t->block_translation[b.b].u.diskoff = allocator_offset; + *offset = allocator_offset; + + //Update inprogress btt if appropriate (if called because Pending bit is set). + if (for_checkpoint) { + paranoid_invariant(b.b < _inprogress.length_of_array); + _inprogress.block_translation[b.b] = t->block_translation[b.b]; + } +} + +void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset) { + // Requires: holding _mutex + uint64_t size_needed = block_size + block_offset; + if (size_needed > _safe_file_size) { + // Must hold _safe_file_size_lock to change _safe_file_size. + nb_mutex_lock(&_safe_file_size_lock, &_mutex); + if (size_needed > _safe_file_size) { + _mutex_unlock(); + + int64_t size_after; + toku_maybe_preallocate_in_file(fd, size_needed, _safe_file_size, &size_after); + + _mutex_lock(); + _safe_file_size = size_after; + } + nb_mutex_unlock(&_safe_file_size_lock); + } +} + +void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) { + _mutex_lock(); + struct translation *t = &_current; + _verify_valid_freeable_blocknum(t, b); + _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat); + + _ensure_safe_write_unlocked(fd, size, *offset); + _mutex_unlock(); +} + +bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) { + return pair->size == 0 && pair->u.diskoff == diskoff_unused; +} + +// Effect: figure out where to put the inprogress btt on disk, allocate space for it there. +// The space must be 512-byte aligned (both the starting address and the size). +// As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt. +void block_table::_alloc_inprogress_translation_on_disk_unlocked() { + toku_mutex_assert_locked(&_mutex); + + struct translation *t = &_inprogress; + paranoid_invariant_notnull(t->block_translation); + BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); + //Each inprogress is allocated only once + paranoid_invariant(_pair_is_unallocated(&t->block_translation[b.b])); + + //Allocate a new block + int64_t size = _calculate_size_on_disk(t); + uint64_t offset; + _bt_block_allocator.alloc_block(size, 0, &offset); + t->block_translation[b.b].u.diskoff = offset; + t->block_translation[b.b].size = size; +} + +// Effect: Serializes the blocktable to a wbuf (which starts uninitialized) +// A clean shutdown runs checkpoint start so that current and inprogress are copies. +// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd) +// The address is guaranteed to be 512-byte aligned, but the size is not guaranteed. +// It *is* guaranteed that we can read up to the next 512-byte boundary, however +void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w, + int64_t *address, int64_t *size) { + _mutex_lock(); + struct translation *t = &_inprogress; + + BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); + _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block must be 512-byte aligned to make O_DIRECT happy. + uint64_t size_translation = _calculate_size_on_disk(t); + uint64_t size_aligned = roundup_to_multiple(512, size_translation); + assert((int64_t)size_translation==t->block_translation[b.b].size); + { + //Init wbuf + if (0) + printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff); + char *XMALLOC_N_ALIGNED(512, size_aligned, buf); + for (uint64_t i=size_translation; ismallest_never_used_blocknum); + wbuf_BLOCKNUM(w, t->blocknum_freelist_head); + int64_t i; + for (i=0; ismallest_never_used_blocknum.b; i++) { + if (0) + printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size); + wbuf_DISKOFF(w, t->block_translation[i].u.diskoff); + wbuf_DISKOFF(w, t->block_translation[i].size); + } + uint32_t checksum = toku_x1764_finish(&w->checksum); + wbuf_int(w, checksum); + *address = t->block_translation[b.b].u.diskoff; + *size = size_translation; + assert((*address)%512 == 0); + + _ensure_safe_write_unlocked(fd, size_aligned, *address); + _mutex_unlock(); +} + +// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) +void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { + struct translation *t = &_current; + _verify_valid_blocknum(t, b); + if (offset) { + *offset = t->block_translation[b.b].u.diskoff; + } + if (size) { + *size = t->block_translation[b.b].size; + } +} + +// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) +void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { + _mutex_lock(); + _translate_blocknum_to_offset_size_unlocked(b, offset, size); + _mutex_unlock(); +} + +// Only called by toku_allocate_blocknum +// Effect: expand the array to maintain size invariant +// given that one more never-used blocknum will soon be used. +void block_table::_maybe_expand_translation(struct translation *t) { + if (t->length_of_array <= t->smallest_never_used_blocknum.b) { + //expansion is necessary + uint64_t new_length = t->smallest_never_used_blocknum.b * 2; + XREALLOC_N(new_length, t->block_translation); + uint64_t i; + for (i = t->length_of_array; i < new_length; i++) { + t->block_translation[i].u.next_free_blocknum = freelist_null; + t->block_translation[i].size = size_is_free; + } + t->length_of_array = new_length; + } +} + +void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) { + toku_mutex_assert_locked(&_mutex); + BLOCKNUM result; + struct translation *t = &_current; + if (t->blocknum_freelist_head.b == freelist_null.b) { + // no previously used blocknums are available + // use a never used blocknum + _maybe_expand_translation(t); //Ensure a never used blocknums is available + result = t->smallest_never_used_blocknum; + t->smallest_never_used_blocknum.b++; + } else { // reuse a previously used blocknum + result = t->blocknum_freelist_head; + BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum; + t->blocknum_freelist_head = next; + } + //Verify the blocknum is free + paranoid_invariant(t->block_translation[result.b].size == size_is_free); + //blocknum is not free anymore + t->block_translation[result.b].u.diskoff = diskoff_unused; + t->block_translation[result.b].size = 0; + _verify_valid_freeable_blocknum(t, result); + *res = result; + ft_set_dirty(ft, false); +} + +void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) { + _mutex_lock(); + _allocate_blocknum_unlocked(res, ft); + _mutex_unlock(); +} + +void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM b) { + _verify_valid_freeable_blocknum(t, b); + paranoid_invariant(t->block_translation[b.b].size != size_is_free); + + t->block_translation[b.b].size = size_is_free; + t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; + t->blocknum_freelist_head = b; +} + +// Effect: Free a blocknum. +// If the blocknum holds the only reference to a block on disk, free that block +void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoint) { + toku_mutex_assert_locked(&_mutex); + BLOCKNUM b = *bp; + bp->b = 0; //Remove caller's reference. + + struct block_translation_pair old_pair = _current.block_translation[b.b]; + + _free_blocknum_in_translation(&_current, b); + if (for_checkpoint) { + paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); + _free_blocknum_in_translation(&_inprogress, b); + } + + //If the size is 0, no disk block has ever been assigned to this blocknum. + if (old_pair.size > 0) { + //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint + bool cannot_free = (bool) + (_translation_prevents_freeing(&_inprogress, b, &old_pair) || + _translation_prevents_freeing(&_checkpointed, b, &old_pair)); + if (!cannot_free) { + _bt_block_allocator.free_block(old_pair.u.diskoff); + } + } + else { + paranoid_invariant(old_pair.size==0); + paranoid_invariant(old_pair.u.diskoff == diskoff_unused); + } + ft_set_dirty(ft, for_checkpoint); +} + +void block_table::free_blocknum(BLOCKNUM *bp, FT ft, bool for_checkpoint) { + _mutex_lock(); + _free_blocknum_unlocked(bp, ft, for_checkpoint); + _mutex_unlock(); +} + +// Verify there are no free blocks. +void block_table::verify_no_free_blocknums() { + invariant(_current.blocknum_freelist_head.b == freelist_null.b); +} + +// Frees blocknums that have a size of 0 and unused diskoff +// Currently used for eliminating unused cached rollback log nodes +void block_table::free_unused_blocknums(BLOCKNUM root) { + _mutex_lock(); + int64_t smallest = _current.smallest_never_used_blocknum.b; + for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) { + if (i == root.b) { + continue; + } + BLOCKNUM b = make_blocknum(i); + if (_current.block_translation[b.b].size == 0) { + invariant(_current.block_translation[b.b].u.diskoff == diskoff_unused); + _free_blocknum_in_translation(&_current, b); + } + } + _mutex_unlock(); +} + +bool block_table::_no_data_blocks_except_root(BLOCKNUM root) { + bool ok = true; + _mutex_lock(); + int64_t smallest = _current.smallest_never_used_blocknum.b; + if (root.b < RESERVED_BLOCKNUMS) { + ok = false; + goto cleanup; + } + for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) { + if (i == root.b) { + continue; + } + BLOCKNUM b = make_blocknum(i); + if (_current.block_translation[b.b].size != size_is_free) { + ok = false; + goto cleanup; + } + } + cleanup: + _mutex_unlock(); + return ok; +} + +// Verify there are no data blocks except root. +// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now. +void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) { + paranoid_invariant(_no_data_blocks_except_root(root)); +} + +bool block_table::_blocknum_allocated(BLOCKNUM b) { + _mutex_lock(); + struct translation *t = &_current; + _verify_valid_blocknum(t, b); + bool ok = t->block_translation[b.b].size != size_is_free; + _mutex_unlock(); + return ok; +} + +// Verify a blocknum is currently allocated. +void block_table::verify_blocknum_allocated(BLOCKNUM UU(b)) { + paranoid_invariant(_blocknum_allocated(b)); +} + +// Only used by toku_dump_translation table (debug info) +void block_table::_dump_translation_internal(FILE *f, struct translation *t) { + if (t->block_translation) { + BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); + fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array); + fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b); + fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b); + fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size); + fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff); + int64_t i; + for (i=0; ilength_of_array; i++) { + fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); + } + fprintf(f, "\n"); + } else { + fprintf(f, " does not exist\n"); + } +} + +// Only used by toku_ft_dump which is only for debugging purposes +// "pretty" just means we use tabs so we can parse output easier later +void block_table::dump_translation_table_pretty(FILE *f) { + _mutex_lock(); + struct translation *t = &_checkpointed; + assert(t->block_translation != nullptr); + for (int64_t i = 0; i < t->length_of_array; ++i) { + fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); + } + _mutex_unlock(); +} + +// Only used by toku_ft_dump which is only for debugging purposes +void block_table::dump_translation_table(FILE *f) { + _mutex_lock(); + fprintf(f, "Current block translation:"); + _dump_translation_internal(f, &_current); + fprintf(f, "Checkpoint in progress block translation:"); + _dump_translation_internal(f, &_inprogress); + fprintf(f, "Checkpointed block translation:"); + _dump_translation_internal(f, &_checkpointed); + _mutex_unlock(); +} + +// Only used by ftdump +void block_table::blocknum_dump_translation(BLOCKNUM b) { + _mutex_lock(); + + struct translation *t = &_current; + if (b.b < t->length_of_array) { + struct block_translation_pair *bx = &t->block_translation[b.b]; + printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size); + } + _mutex_unlock(); +} + +// Must not call this function when anything else is using the blocktable. +// No one may use the blocktable afterwards. +void block_table::destroy(void) { + // TODO: translation.destroy(); + toku_free(_current.block_translation); + toku_free(_inprogress.block_translation); + toku_free(_checkpointed.block_translation); + + _bt_block_allocator.destroy(); + toku_mutex_destroy(&_mutex); + nb_mutex_destroy(&_safe_file_size_lock); +} + +int block_table::_translation_deserialize_from_buffer(struct translation *t, + DISKOFF location_on_disk, + uint64_t size_on_disk, + // out: buffer with serialized translation + unsigned char *translation_buffer) { + int r = 0; + assert(location_on_disk != 0); + t->type = TRANSLATION_CHECKPOINTED; + + // check the checksum + uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4); + uint64_t offset = size_on_disk - 4; + uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset)); + if (x1764 != stored_x1764) { + fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764); + r = TOKUDB_BAD_CHECKSUM; + goto exit; + } + + struct rbuf rb; + rb.buf = translation_buffer; + rb.ndone = 0; + rb.size = size_on_disk-4;//4==checksum + + t->smallest_never_used_blocknum = rbuf_blocknum(&rb); + t->length_of_array = t->smallest_never_used_blocknum.b; + invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); + t->blocknum_freelist_head = rbuf_blocknum(&rb); + XMALLOC_N(t->length_of_array, t->block_translation); + for (int64_t i = 0; i < t->length_of_array; i++) { + t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rb); + t->block_translation[i].size = rbuf_DISKOFF(&rb); + } + invariant(_calculate_size_on_disk(t) == (int64_t) size_on_disk); + invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t) size_on_disk); + invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk); + +exit: + return r; +} + +int block_table::iterate(enum translation_type type, + BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) { + struct translation *src; + + int r = 0; + switch (type) { + case TRANSLATION_CURRENT: + src = &_current; + break; + case TRANSLATION_INPROGRESS: + src = &_inprogress; + break; + case TRANSLATION_CHECKPOINTED: + src = &_checkpointed; + break; + default: + r = EINVAL; + } + + struct translation fakecurrent; + memset(&fakecurrent, 0, sizeof(struct translation)); + + struct translation *t = &fakecurrent; + if (r == 0) { + _mutex_lock(); + _copy_translation(t, src, TRANSLATION_DEBUG); + t->block_translation[RESERVED_BLOCKNUM_TRANSLATION] = + src->block_translation[RESERVED_BLOCKNUM_TRANSLATION]; + _mutex_unlock(); + int64_t i; + for (i=0; ismallest_never_used_blocknum.b; i++) { + struct block_translation_pair pair = t->block_translation[i]; + if (data_only && i< RESERVED_BLOCKNUMS) continue; + if (used_only && pair.size <= 0) continue; + r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra); + if (r!=0) break; + } + toku_free(t->block_translation); + } + return r; +} + +typedef struct { + int64_t used_space; + int64_t total_space; +} frag_extra; + +static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) { + frag_extra *info = (frag_extra *) extra; + + if (size + address > info->total_space) + info->total_space = size + address; + info->used_space += size; + return 0; +} + +void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep) { + frag_extra info = { 0, 0 }; + int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true); + assert_zero(r); + + if (total_sizep) *total_sizep = info.total_space; + if (used_sizep) *used_sizep = info.used_space; +} + +void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) { + toku_mutex_assert_locked(&_mutex); + BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); + _realloc_on_disk_internal(b, size, offset, ft, false, 0); +} + +void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) { + _mutex_lock(); + _realloc_descriptor_on_disk_unlocked(size, offset, ft); + _ensure_safe_write_unlocked(fd, size, *offset); + _mutex_unlock(); +} + +void block_table::get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size) { + _mutex_lock(); + BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); + _translate_blocknum_to_offset_size_unlocked(b, offset, size); + _mutex_unlock(); +} + +void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) { + // Requires: blocktable lock is held. + // Requires: report->file_size_bytes is already filled in. + + // Count the headers. + report->data_bytes = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + report->data_blocks = 1; + report->checkpoint_bytes_additional = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + report->checkpoint_blocks_additional = 1; + + struct translation *current = &_current; + for (int64_t i = 0; i < current->length_of_array; i++) { + struct block_translation_pair *pair = ¤t->block_translation[i]; + if (pair->size > 0) { + report->data_bytes += pair->size; + report->data_blocks++; + } + } + + struct translation *checkpointed = &_checkpointed; + for (int64_t i = 0; i < checkpointed->length_of_array; i++) { + struct block_translation_pair *pair = &checkpointed->block_translation[i]; + if (pair->size > 0 && !(i < current->length_of_array && + current->block_translation[i].size > 0 && + current->block_translation[i].u.diskoff == pair->u.diskoff)) { + report->checkpoint_bytes_additional += pair->size; + report->checkpoint_blocks_additional++; + } + } + + struct translation *inprogress = &_inprogress; + for (int64_t i = 0; i < inprogress->length_of_array; i++) { + struct block_translation_pair *pair = &inprogress->block_translation[i]; + if (pair->size > 0 && !(i < current->length_of_array && + current->block_translation[i].size > 0 && + current->block_translation[i].u.diskoff == pair->u.diskoff) && + !(i < checkpointed->length_of_array && + checkpointed->block_translation[i].size > 0 && + checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)) { + report->checkpoint_bytes_additional += pair->size; + report->checkpoint_blocks_additional++; + } + } + + _bt_block_allocator.get_unused_statistics(report); +} + +void block_table::get_info64(struct ftinfo64 *s) { + _mutex_lock(); + + struct translation *current = &_current; + s->num_blocks_allocated = current->length_of_array; + s->num_blocks_in_use = 0; + s->size_allocated = 0; + s->size_in_use = 0; + + for (int64_t i = 0; i < current->length_of_array; ++i) { + struct block_translation_pair *block = ¤t->block_translation[i]; + if (block->size != size_is_free) { + ++s->num_blocks_in_use; + s->size_in_use += block->size; + if (block->u.diskoff != diskoff_unused) { + uint64_t limit = block->u.diskoff + block->size; + if (limit > s->size_allocated) { + s->size_allocated = limit; + } + } + } + } + + _mutex_unlock(); +} + +int block_table::iterate_translation_tables(uint64_t checkpoint_count, + int (*iter)(uint64_t checkpoint_count, + int64_t total_num_rows, + int64_t blocknum, + int64_t diskoff, + int64_t size, + void *extra), + void *iter_extra) { + int error = 0; + _mutex_lock(); + + int64_t total_num_rows = _current.length_of_array + _checkpointed.length_of_array; + for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) { + struct block_translation_pair *block = &_current.block_translation[i]; + error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra); + } + for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) { + struct block_translation_pair *block = &_checkpointed.block_translation[i]; + error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra); + } + + _mutex_unlock(); + return error; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_table.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_table.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/block_table.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/block_table.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,338 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2014 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/toku_stdint.h" +#include "portability/toku_pthread.h" + +#include "ft/serialize/block_allocator.h" +#include "util/nb_mutex.h" + +struct ft; + +typedef struct blocknum_s { int64_t b; } BLOCKNUM; + +// Offset in a disk. -1 is the 'null' pointer. +typedef int64_t DISKOFF; + +// Unmovable reserved first, then reallocable. +// We reserve one blocknum for the translation table itself. +enum { + RESERVED_BLOCKNUM_NULL = 0, + RESERVED_BLOCKNUM_TRANSLATION = 1, + RESERVED_BLOCKNUM_DESCRIPTOR = 2, + RESERVED_BLOCKNUMS +}; + +typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra); + +static inline BLOCKNUM make_blocknum(int64_t b) { + BLOCKNUM result = { .b = b }; + return result; +} +static const BLOCKNUM ROLLBACK_NONE = { .b = 0 }; + +/** + * There are three copies of the translation table (btt) in the block table: + * + * checkpointed Is initialized by deserializing from disk, + * and is the only version ever read from disk. + * When read from disk it is copied to current. + * It is immutable. It can be replaced by an inprogress btt. + * + * inprogress Is only filled by copying from current, + * and is the only version ever serialized to disk. + * (It is serialized to disk on checkpoint and clean shutdown.) + * At end of checkpoint it replaces 'checkpointed'. + * During a checkpoint, any 'pending' dirty writes will update + * inprogress. + * + * current Is initialized by copying from checkpointed, + * is the only version ever modified while the database is in use, + * and is the only version ever copied to inprogress. + * It is never stored on disk. + */ +class block_table { +public: + enum translation_type { + TRANSLATION_NONE = 0, + TRANSLATION_CURRENT, + TRANSLATION_INPROGRESS, + TRANSLATION_CHECKPOINTED, + TRANSLATION_DEBUG + }; + + void create(); + + int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer); + + void destroy(); + + // Checkpointing + void note_start_checkpoint_unlocked(); + void note_end_checkpoint(int fd); + void note_skipped_checkpoint(); + void maybe_truncate_file_on_open(int fd); + + // Blocknums + void allocate_blocknum(BLOCKNUM *res, struct ft *ft); + void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat); + void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint); + void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size); + void free_unused_blocknums(BLOCKNUM root); + void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd); + void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size); + + // External verfication + void verify_blocknum_allocated(BLOCKNUM b); + void verify_no_data_blocks_except_root(BLOCKNUM root); + void verify_no_free_blocknums(); + + // Serialization + void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size); + + // DEBUG ONLY (ftdump included), tests included + void blocknum_dump_translation(BLOCKNUM b); + void dump_translation_table_pretty(FILE *f); + void dump_translation_table(FILE *f); + void block_free(uint64_t offset); + + int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); + void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep); + + // Requires: blocktable lock is held. + // Requires: report->file_size_bytes is already filled in. + void get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report); + + int64_t get_blocks_in_use_unlocked(); + + void get_info64(struct ftinfo64 *); + + int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *); + +private: + struct block_translation_pair { + // If in the freelist, use next_free_blocknum, otherwise diskoff. + union { + DISKOFF diskoff; + BLOCKNUM next_free_blocknum; + } u; + + // Set to 0xFFFFFFFFFFFFFFFF for free + DISKOFF size; + }; + + // This is the BTT (block translation table) + // When the translation (btt) is stored on disk: + // In Header: + // size_on_disk + // location_on_disk + // In block translation table (in order): + // smallest_never_used_blocknum + // blocknum_freelist_head + // array + // a checksum + struct translation { + enum translation_type type; + + // Number of elements in array (block_translation). always >= smallest_never_used_blocknum + int64_t length_of_array; + BLOCKNUM smallest_never_used_blocknum; + + // Next (previously used) unused blocknum (free list) + BLOCKNUM blocknum_freelist_head; + struct block_translation_pair *block_translation; + + // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size + // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff + }; + + void _create_internal(); + int _translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize + DISKOFF location_on_disk, // location of translation_buffer + uint64_t size_on_disk, + unsigned char * translation_buffer); // buffer with serialized translation + + void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype); + void _maybe_optimize_translation(struct translation *t); + void _maybe_expand_translation(struct translation *t); + bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair); + void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b); + int64_t _calculate_size_on_disk(struct translation *t); + bool _pair_is_unallocated(struct block_translation_pair *pair); + void _alloc_inprogress_translation_on_disk_unlocked(); + void _dump_translation_internal(FILE *f, struct translation *t); + + // Blocknum management + void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft); + void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint); + void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft); + void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat); + void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size); + + // File management + void _maybe_truncate_file(int fd, uint64_t size_needed_before); + void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset); + + // Verification + bool _is_valid_blocknum(struct translation *t, BLOCKNUM b); + void _verify_valid_blocknum(struct translation *t, BLOCKNUM b); + bool _is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b); + void _verify_valid_freeable_blocknum(struct translation *t, BLOCKNUM b); + bool _no_data_blocks_except_root(BLOCKNUM root); + bool _blocknum_allocated(BLOCKNUM b); + + // Locking + // + // TODO: Move the lock to the FT + void _mutex_lock(); + void _mutex_unlock(); + + // The current translation is the one used by client threads. + // It is not represented on disk. + struct translation _current; + + // The translation used by the checkpoint currently in progress. + // If the checkpoint thread allocates a block, it must also update the current translation. + struct translation _inprogress; + + // The translation for the data that shall remain inviolate on disk until the next checkpoint finishes, + // after which any blocks used only in this translation can be freed. + struct translation _checkpointed; + + // The in-memory data structure for block allocation. + // There is no on-disk data structure for block allocation. + // Note: This is *allocation* not *translation* - the block allocator is unaware of which + // blocks are used for which translation, but simply allocates and deallocates blocks. + block_allocator _bt_block_allocator; + toku_mutex_t _mutex; + struct nb_mutex _safe_file_size_lock; + bool _checkpoint_skipped; + uint64_t _safe_file_size; + + // Because the lock is in a weird place right now + friend void toku_ft_lock(struct ft *ft); + friend void toku_ft_unlock(struct ft *ft); +}; + +// For serialize / deserialize + +#include "ft/serialize/wbuf.h" + +static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { + wbuf_ulonglong(w, b.b); +} + +static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { + wbuf_nocrc_ulonglong(w, b.b); +} + +static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) { + wbuf_ulonglong(wb, (uint64_t) off); +} + +#include "ft/serialize/rbuf.h" + +static inline DISKOFF rbuf_DISKOFF(struct rbuf *rb) { + return rbuf_ulonglong(rb); +} + +static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) { + BLOCKNUM result = make_blocknum(rbuf_longlong(rb)); + return result; +} + +static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) { + *blocknum = rbuf_blocknum(rb); +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/compress.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/compress.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/compress.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/compress.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,297 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." +#ident "$Id$" + +#include + +#include +#include + +#include +#include + +#include "compress.h" +#include "memory.h" +#include "quicklz.h" +#include "toku_assert.h" + +static inline enum toku_compression_method +normalize_compression_method(enum toku_compression_method method) +// Effect: resolve "friendly" names like "fast" and "small" into their real values. +{ + switch (method) { + case TOKU_DEFAULT_COMPRESSION_METHOD: + case TOKU_FAST_COMPRESSION_METHOD: + return TOKU_QUICKLZ_METHOD; + case TOKU_SMALL_COMPRESSION_METHOD: + return TOKU_LZMA_METHOD; + default: + return method; // everything else is fine + } +} + +size_t toku_compress_bound (enum toku_compression_method a, size_t size) +// See compress.h for the specification of this function. +{ + a = normalize_compression_method(a); + switch (a) { + case TOKU_NO_COMPRESSION: + return size + 1; + case TOKU_LZMA_METHOD: + return 1+lzma_stream_buffer_bound(size); // We need one extra for the rfc1950-style header byte (bits -03 are TOKU_LZMA_METHOD (1), bits 4-7 are the compression level) + case TOKU_QUICKLZ_METHOD: + return size+400 + 1; // quicklz manual says 400 bytes is enough. We need one more byte for the rfc1950-style header byte. bits 0-3 are 9, bits 4-7 are the QLZ_COMPRESSION_LEVEL. + case TOKU_ZLIB_METHOD: + return compressBound (size); + case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: + return 2+deflateBound(nullptr, size); // We need one extra for the rfc1950-style header byte, and one extra to store windowBits (a bit over cautious about future upgrades maybe). + default: + break; + } + // fall through for bad enum (thus compiler can warn us if we didn't use all the enums + assert(0); return 0; +} + +void toku_compress (enum toku_compression_method a, + // the following types and naming conventions come from zlib.h + Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen) +// See compress.h for the specification of this function. +{ + static const int zlib_compression_level = 5; + static const int zlib_without_checksum_windowbits = -15; + + a = normalize_compression_method(a); + assert(sourceLen < (1LL << 32)); + switch (a) { + case TOKU_NO_COMPRESSION: + dest[0] = TOKU_NO_COMPRESSION; + memcpy(dest + 1, source, sourceLen); + *destLen = sourceLen + 1; + return; + case TOKU_ZLIB_METHOD: { + int r = compress2(dest, destLen, source, sourceLen, zlib_compression_level); + assert(r == Z_OK); + assert((dest[0]&0xF) == TOKU_ZLIB_METHOD); + return; + } + case TOKU_QUICKLZ_METHOD: { + if (sourceLen==0) { + // quicklz requires at least one byte, so we handle this ourselves + assert(1 <= *destLen); + *destLen = 1; + } else { + toku::scoped_calloc qsc_buf(sizeof(qlz_state_compress)); + qlz_state_compress *qsc = reinterpret_cast(qsc_buf.get()); + size_t actual_destlen = qlz_compress(source, (char*)(dest+1), sourceLen, qsc); + assert(actual_destlen + 1 <= *destLen); + // add one for the rfc1950-style header byte. + *destLen = actual_destlen + 1; + } + // Fill in that first byte + dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4); + return; + } + case TOKU_LZMA_METHOD: { + const int lzma_compression_level = 2; + if (sourceLen==0) { + // lzma version 4.999 requires at least one byte, so we'll do it ourselves. + assert(1<=*destLen); + *destLen = 1; + } else { + size_t out_pos = 1; + lzma_ret r = lzma_easy_buffer_encode(lzma_compression_level, LZMA_CHECK_NONE, NULL, + source, sourceLen, + dest, &out_pos, *destLen); + assert(out_pos < *destLen); + if (r != LZMA_OK) { + fprintf(stderr, "lzma_easy_buffer_encode() returned %d\n", (int) r); + } + assert(r==LZMA_OK); + *destLen = out_pos; + } + dest[0] = TOKU_LZMA_METHOD + (lzma_compression_level << 4); + + return; + } + case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: { + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.next_in = const_cast(source); + strm.avail_in = sourceLen; + int r = deflateInit2(&strm, zlib_compression_level, Z_DEFLATED, + zlib_without_checksum_windowbits, 8, Z_DEFAULT_STRATEGY); + lazy_assert(r == Z_OK); + strm.next_out = dest + 2; + strm.avail_out = *destLen - 2; + r = deflate(&strm, Z_FINISH); + lazy_assert(r == Z_STREAM_END); + r = deflateEnd(&strm); + lazy_assert(r == Z_OK); + *destLen = strm.total_out + 2; + dest[0] = TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD + (zlib_compression_level << 4); + dest[1] = zlib_without_checksum_windowbits; + return; + } + default: + break; + } + // default fall through to error. + assert(0); +} + +void toku_decompress (Bytef *dest, uLongf destLen, + const Bytef *source, uLongf sourceLen) +// See compress.h for the specification of this function. +{ + assert(sourceLen>=1); // need at least one byte for the RFC header. + switch (source[0] & 0xF) { + case TOKU_NO_COMPRESSION: + memcpy(dest, source + 1, sourceLen - 1); + return; + case TOKU_ZLIB_METHOD: { + uLongf actual_destlen = destLen; + int r = uncompress(dest, &actual_destlen, source, sourceLen); + assert(r == Z_OK); + assert(actual_destlen == destLen); + return; + } + case TOKU_QUICKLZ_METHOD: + if (sourceLen>1) { + toku::scoped_calloc state_buf(sizeof(qlz_state_decompress)); + qlz_state_decompress *qsd = reinterpret_cast(state_buf.get()); + uLongf actual_destlen = qlz_decompress((char*)source+1, dest, qsd); + assert(actual_destlen == destLen); + } else { + // length 1 means there is no data, so do nothing. + assert(destLen==0); + } + return; + case TOKU_LZMA_METHOD: { + if (sourceLen>1) { + uint64_t memlimit = UINT64_MAX; + size_t out_pos = 0; + size_t in_pos = 1; + lzma_ret r = lzma_stream_buffer_decode(&memlimit, // memlimit, use UINT64_MAX to disable this check + 0, // flags + NULL, // allocator + source, &in_pos, sourceLen, + dest, &out_pos, destLen); + assert(r==LZMA_OK); + assert(out_pos == destLen); + } else { + // length 1 means there is no data, so do nothing. + assert(destLen==0); + } + return; + } + case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: { + z_stream strm; + strm.next_in = const_cast(source + 2); + strm.avail_in = sourceLen - 2; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + char windowBits = source[1]; + int r = inflateInit2(&strm, windowBits); + lazy_assert(r == Z_OK); + strm.next_out = dest; + strm.avail_out = destLen; + r = inflate(&strm, Z_FINISH); + lazy_assert(r == Z_STREAM_END); + r = inflateEnd(&strm); + lazy_assert(r == Z_OK); + return; + } + } + // default fall through to error. + assert(0); +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/compress.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/compress.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/compress.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/compress.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,131 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include + +// The following provides an abstraction of quicklz and zlib. +// We offer three compression methods: ZLIB, QUICKLZ, and LZMA, as well as a "no compression" option. These options are declared in make_tdb.c. +// The resulting byte string includes enough information for us to decompress it. That is, we can tell whether it's z-compressed or qz-compressed or xz-compressed. + +size_t toku_compress_bound (enum toku_compression_method a, size_t size); +// Effect: Return the number of bytes needed to compress a buffer of size SIZE using compression method A. +// Typically, the result is a little bit larger than SIZE, since some data cannot be compressed. +// Usage note: It may help to know roughly how much space is involved. +// zlib's bound is something like (size + (size>>12) + (size>>14) + (size>>25) + 13. +// quicklz's bound is something like size+400. + +void toku_compress (enum toku_compression_method a, + // the following types and naming conventions come from zlib.h + Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +// Effect: Using compression method A, compress SOURCE into DEST. The number of bytes to compress is passed in SOURCELEN. +// On input: *destLen is the size of the buffer. +// On output: *destLen is the size of the actual compressed data. +// Usage note: sourceLen may be be zero (unlike for quicklz, which requires sourceLen>0). +// Requires: The buffer must be big enough to hold the compressed data. (That is *destLen >= compressBound(a, sourceLen)) +// Requires: sourceLen < 2^32. +// Usage note: Although we *try* to assert if the DESTLEN isn't big enough, it's possible that it's too late by then (in the case of quicklz which offers +// no way to avoid a buffer overrun.) So we require that that DESTLEN is big enough. +// Rationale: zlib's argument order is DEST then SOURCE with the size of the buffer passed in *destLen, and the size of the result returned in *destLen. +// quicklz's argument order is SOURCE then DEST with the size returned (and it has no way to verify that an overright didn't happen). +// We use zlib's calling conventions partly because it is safer, and partly because it is more established. +// We also use zlib's ugly camel case convention for destLen and sourceLen. +// Unlike zlib, we return no error codes. Instead, we require that the data be OK and the size of the buffers is OK, and assert if there's a problem. + +void toku_decompress (Bytef *dest, uLongf destLen, + const Bytef *source, uLongf sourceLen); +// Effect: Decompress source (length sourceLen) into dest (length destLen) +// This function can decompress data compressed with either zlib or quicklz compression methods (calling toku_compress(), which puts an appropriate header on so we know which it is.) +// Requires: destLen is equal to the actual decompressed size of the data. +// Requires: The source must have been properly compressed. diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft_layout_version.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft_layout_version.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft_layout_version.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft_layout_version.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,132 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +//Must be defined before other recursive headers could include logger/recover.h +enum ft_layout_version_e { + FT_LAYOUT_VERSION_5 = 5, + FT_LAYOUT_VERSION_6 = 6, // Diff from 5 to 6: Add leafentry_estimate + FT_LAYOUT_VERSION_7 = 7, // Diff from 6 to 7: Add exact-bit to leafentry_estimate #818, add magic to header #22, add per-subdatase flags #333 + FT_LAYOUT_VERSION_8 = 8, // Diff from 7 to 8: Use murmur instead of crc32. We are going to make a simplification and stop supporting version 7 and before. Current As of Beta 1.0.6 + FT_LAYOUT_VERSION_9 = 9, // Diff from 8 to 9: Variable-sized blocks and compression. + FT_LAYOUT_VERSION_10 = 10, // Diff from 9 to 10: Variable number of compressed sub-blocks per block, disk byte order == intel byte order, Subtree estimates instead of just leafentry estimates, translation table, dictionary descriptors, checksum in header, subdb support removed from ft layer + FT_LAYOUT_VERSION_11 = 11, // Diff from 10 to 11: Nested transaction leafentries (completely redesigned). FT_CMDs on disk now support XIDS (multiple txnids) instead of exactly one. + FT_LAYOUT_VERSION_12 = 12, // Diff from 11 to 12: Added FT_CMD 'FT_INSERT_NO_OVERWRITE', compressed block format, num old blocks + FT_LAYOUT_VERSION_13 = 13, // Diff from 12 to 13: Fixed loader pivot bug, added build_id to every node, timestamps to ft + FT_LAYOUT_VERSION_14 = 14, // Diff from 13 to 14: Added MVCC; deprecated TOKU_DB_VALCMP_BUILTIN(_13); Remove fingerprints; Support QUICKLZ; add end-to-end checksum on uncompressed data. + FT_LAYOUT_VERSION_15 = 15, // Diff from 14 to 15: basement nodes, last verification time + FT_LAYOUT_VERSION_16 = 16, // Dr. No: No subtree estimates, partition layout information represented more transparently. + // ALERT ALERT ALERT: version 16 never released to customers, internal and beta use only + FT_LAYOUT_VERSION_17 = 17, // Dr. No: Add STAT64INFO_S to ft header + FT_LAYOUT_VERSION_18 = 18, // Dr. No: Add HOT info to ft header + FT_LAYOUT_VERSION_19 = 19, // Doofenshmirtz: Add compression method, highest_unused_msn_for_upgrade + FT_LAYOUT_VERSION_20 = 20, // Deadshot: Add compression method to log_fcreate, + // mgr_last_xid after begin checkpoint, + // last_xid to shutdown + FT_LAYOUT_VERSION_21 = 21, // Ming: Add max_msn_in_ft to header, + // Removed log suppression logentry + FT_LAYOUT_VERSION_22 = 22, // Ming: Add oldest known referenced xid to each ftnode, for better garbage collection + FT_LAYOUT_VERSION_23 = 23, // Ming: Fix upgrade path #5902 + FT_LAYOUT_VERSION_24 = 24, // Riddler: change logentries that log transactions to store TXNID_PAIRs instead of TXNIDs + FT_LAYOUT_VERSION_25 = 25, // SecretSquirrel: ROLLBACK_LOG_NODES (on disk and in memory) now just use blocknum (instead of blocknum + hash) to point to other log nodes. same for xstillopen log entry + FT_LAYOUT_VERSION_26 = 26, // Hojo: basements store key/vals separately on disk for fixed klpair length BNs + FT_LAYOUT_VERSION_27 = 27, // serialize message trees with nonleaf buffers to avoid key, msn sort on deserialize + FT_NEXT_VERSION, // the version after the current version + FT_LAYOUT_VERSION = FT_NEXT_VERSION-1, // A hack so I don't have to change this line. + FT_LAYOUT_MIN_SUPPORTED_VERSION = FT_LAYOUT_VERSION_13, // Minimum version supported + + // Define these symbolically so the knowledge of exactly which layout version got rid of fingerprints isn't spread all over the code. + FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT = FT_LAYOUT_VERSION_13, + FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM = FT_LAYOUT_VERSION_14, + FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES = FT_LAYOUT_VERSION_15, +}; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft-node-deserialize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft-node-deserialize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft-node-deserialize.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft-node-deserialize.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,241 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "ft/node.h" +#include "ft/ft-internal.h" +#include "ft/serialize/ft_node-serialize.h" + +/* + * ft-node-deserialize.c - + * This file contains functions used by deserializtion + * code paths in and out of the engine. The functions can, + * essentially, be broken up into two types. Some of these + * functions return error codes based expected values inside + * the fractal tree node, others merely read the specific + * quantities of bytes out of the buffer. It is expeceted + * that these will be called in the correct order by users + * of these functions/this API. + * + */ + +// Sets initial values for the given fractal tree node to be +// deserialized +void +initialize_ftnode(FTNODE node, BLOCKNUM blocknum) +{ + node->fullhash = 0xDEADBEEF; // Is this 'spoof' ok? + node->blocknum = blocknum; + node->dirty = 0; + node->bp = NULL; + // Can we use this initialization as a correctness assert in + // a later function? + node->layout_version_read_from_disk = 0; +} + +/************************ + * TODO: In other deserialization code, we check the rb size member. We + * verify that it is greater than or equal to 24. Ignoring this magic + * number for a moment, should we put this check in its own function? * +*************************/ + + +// Read and check the 'magic' bytes on disk. Returns an error if +// the magic does not match. +int +read_and_check_magic(struct rbuf *rb) +{ + int r = 0; + const void *magic; + rbuf_literal_bytes(rb, &magic, 8); + if (memcmp(magic, "tokuleaf", 8)!=0 && + memcmp(magic, "tokunode", 8)!=0) { + r = DB_BADFORMAT; // TODO: Return more meaningful error. + } + + return r; +} + +// Read the version number from the given buffer +// and returns an error if the version is too old. +int +read_and_check_version(FTNODE node, struct rbuf *rb) +{ + int r = 0; + int version = rbuf_int(rb); + node->layout_version_read_from_disk = version; + if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) { + r = 1; // TODO: Better error reporting. + } + + return r; +} + +// Reads the basic version, build, and child info from +// the given buffer. +void +read_node_info(FTNODE node, struct rbuf *rb, int version) +{ + node->layout_version = version; + node->layout_version_original = rbuf_int(rb); + node->build_id = rbuf_int(rb); + node->n_children = rbuf_int(rb); +} + +// Allocates the partitions based on the given node's nubmer +// of children. It then reads, out of the given buffer, +// the start and size of each child partition. +// TODO: Should these be two seperate functions? +void +allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd) +{ + XMALLOC_N(node->n_children, node->bp); + // TODO: Fix this to use xmalloc_n + XMALLOC_N(node->n_children, *ndd); + // Read the partition locations. + for (int i = 0; i < node->n_children; i++) { + BP_START(*ndd, i) = rbuf_int(rb); + BP_SIZE (*ndd, i) = rbuf_int(rb); + } +} + +// Compares checksum of stored (in the given buffer) checksum +// and the checksum of the buffer itself. If these are NOT +// equal, this function returns an appropriate error code. +int +check_node_info_checksum(struct rbuf *rb) +{ + int r = 0; + // Verify checksum of header stored. + uint32_t checksum = toku_x1764_memory(rb->buf, rb->ndone); + uint32_t stored_checksum = rbuf_int(rb); + + if (stored_checksum != checksum) { + // TODO: dump_bad_block(rb->buf, rb->size); + r = TOKUDB_BAD_CHECKSUM; + } + + return r; +} + +// Reads node info from older (13 and 14) fractal tree nodes +// out of the given buffer. +void +read_legacy_node_info(FTNODE node, struct rbuf *rb, int version) +{ + (void)rbuf_int(rb); // 1. nodesize + node->flags = rbuf_int(rb); // 2. flags + node->height = rbuf_int(rb); // 3. height + + // If the version is less than 14, there are two extra ints here. + // we would need to ignore them if they are there. + if (version == FT_LAYOUT_VERSION_13) { + (void) rbuf_int(rb); // 4. rand4 + (void) rbuf_int(rb); // 5. local + } +} + +// Assuming the given buffer is in the correct position, +// this checks to see if the stored checksum matches the +// checksum of the entire buffer. +int +check_legacy_end_checksum(struct rbuf *rb) +{ + int r = 0; + uint32_t expected_xsum = rbuf_int(rb); + uint32_t actual_xsum = toku_x1764_memory(rb->buf, rb->size - 4); + if (expected_xsum != actual_xsum) { + r = TOKUDB_BAD_CHECKSUM; + } + + return r; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,2927 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/toku_atomic.h" + +#include "ft/cachetable/cachetable.h" +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/node.h" +#include "ft/logger/log-internal.h" +#include "ft/txn/rollback.h" +#include "ft/serialize/block_allocator.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/compress.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/serialize/sub_block.h" +#include "util/sort.h" +#include "util/threadpool.h" +#include "util/status.h" +#include "util/scoped_malloc.h" + +static FT_UPGRADE_STATUS_S ft_upgrade_status; + +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_upgrade_status, k, c, t, "ft upgrade: " l, inc) + +static void +status_init(void) +{ + // Note, this function initializes the keyname, type, and legend fields. + // Value fields are initialized to zero by compiler. + STATUS_INIT(FT_UPGRADE_FOOTPRINT, nullptr, UINT64, "footprint", TOKU_ENGINE_STATUS); + ft_upgrade_status.initialized = true; +} +#undef STATUS_INIT + +#define UPGRADE_STATUS_VALUE(x) ft_upgrade_status.status[x].value.num + +void +toku_ft_upgrade_get_status(FT_UPGRADE_STATUS s) { + if (!ft_upgrade_status.initialized) { + status_init(); + } + UPGRADE_STATUS_VALUE(FT_UPGRADE_FOOTPRINT) = toku_log_upgrade_get_footprint(); + *s = ft_upgrade_status; +} + +static int num_cores = 0; // cache the number of cores for the parallelization +static struct toku_thread_pool *ft_pool = NULL; +bool toku_serialize_in_parallel; + +int get_num_cores(void) { + return num_cores; +} + +struct toku_thread_pool *get_ft_pool(void) { + return ft_pool; +} + +void toku_serialize_set_parallel(bool in_parallel) { + toku_serialize_in_parallel = in_parallel; +} + +void toku_ft_serialize_layer_init(void) { + num_cores = toku_os_get_number_active_processors(); + int r = toku_thread_pool_create(&ft_pool, num_cores); + lazy_assert_zero(r); + block_allocator::maybe_initialize_trace(); + toku_serialize_in_parallel = false; +} + +void toku_ft_serialize_layer_destroy(void) { + toku_thread_pool_destroy(&ft_pool); + block_allocator::maybe_close_trace(); +} + +enum { FILE_CHANGE_INCREMENT = (16 << 20) }; + +static inline uint64_t +alignup64(uint64_t a, uint64_t b) { + return ((a+b-1)/b)*b; +} + +// safe_file_size_lock must be held. +void +toku_maybe_truncate_file (int fd, uint64_t size_used, uint64_t expected_size, uint64_t *new_sizep) +// Effect: If file size >= SIZE+32MiB, reduce file size. +// (32 instead of 16.. hysteresis). +// Return 0 on success, otherwise an error number. +{ + int64_t file_size; + { + int r = toku_os_get_file_size(fd, &file_size); + lazy_assert_zero(r); + invariant(file_size >= 0); + } + invariant(expected_size == (uint64_t)file_size); + // If file space is overallocated by at least 32M + if ((uint64_t)file_size >= size_used + (2*FILE_CHANGE_INCREMENT)) { + toku_off_t new_size = alignup64(size_used, (2*FILE_CHANGE_INCREMENT)); //Truncate to new size_used. + invariant(new_size < file_size); + invariant(new_size >= 0); + int r = ftruncate(fd, new_size); + lazy_assert_zero(r); + *new_sizep = new_size; + } + else { + *new_sizep = file_size; + } + return; +} + +static int64_t +min64(int64_t a, int64_t b) { + if (a= 0); + invariant(expected_size == file_size); + // We want to double the size of the file, or add 16MiB, whichever is less. + // We emulate calling this function repeatedly until it satisfies the request. + int64_t to_write = 0; + if (file_size == 0) { + // Prevent infinite loop by starting with stripe_width as a base case. + to_write = stripe_width; + } + while (file_size + to_write < size) { + to_write += alignup64(min64(file_size + to_write, FILE_CHANGE_INCREMENT), stripe_width); + } + if (to_write > 0) { + assert(to_write%512==0); + toku::scoped_malloc_aligned wbuf_aligned(to_write, 512); + char *wbuf = reinterpret_cast(wbuf_aligned.get()); + memset(wbuf, 0, to_write); + toku_off_t start_write = alignup64(file_size, stripe_width); + invariant(start_write >= file_size); + toku_os_full_pwrite(fd, wbuf, to_write, start_write); + *new_size = start_write + to_write; + } + else { + *new_size = file_size; + } +} + +// Don't include the sub_block header +// Overhead calculated in same order fields are written to wbuf +enum { + node_header_overhead = (8+ // magic "tokunode" or "tokuleaf" or "tokuroll" + 4+ // layout_version + 4+ // layout_version_original + 4), // build_id +}; + +// uncompressed header offsets +enum { + uncompressed_magic_offset = 0, + uncompressed_version_offset = 8, +}; + +static uint32_t +serialize_node_header_size(FTNODE node) { + uint32_t retval = 0; + retval += 8; // magic + retval += sizeof(node->layout_version); + retval += sizeof(node->layout_version_original); + retval += 4; // BUILD_ID + retval += 4; // n_children + retval += node->n_children*8; // encode start offset and length of each partition + retval += 4; // checksum + return retval; +} + +static void +serialize_node_header(FTNODE node, FTNODE_DISK_DATA ndd, struct wbuf *wbuf) { + if (node->height == 0) + wbuf_nocrc_literal_bytes(wbuf, "tokuleaf", 8); + else + wbuf_nocrc_literal_bytes(wbuf, "tokunode", 8); + paranoid_invariant(node->layout_version == FT_LAYOUT_VERSION); + wbuf_nocrc_int(wbuf, node->layout_version); + wbuf_nocrc_int(wbuf, node->layout_version_original); + wbuf_nocrc_uint(wbuf, BUILD_ID); + wbuf_nocrc_int (wbuf, node->n_children); + for (int i=0; in_children; i++) { + assert(BP_SIZE(ndd,i)>0); + wbuf_nocrc_int(wbuf, BP_START(ndd, i)); // save the beginning of the partition + wbuf_nocrc_int(wbuf, BP_SIZE (ndd, i)); // and the size + } + // checksum the header + uint32_t end_to_end_checksum = toku_x1764_memory(wbuf->buf, wbuf_get_woffset(wbuf)); + wbuf_nocrc_int(wbuf, end_to_end_checksum); + invariant(wbuf->ndone == wbuf->size); +} + +static uint32_t +serialize_ftnode_partition_size (FTNODE node, int i) +{ + uint32_t result = 0; + paranoid_invariant(node->bp[i].state == PT_AVAIL); + result++; // Byte that states what the partition is + if (node->height > 0) { + NONLEAF_CHILDINFO bnc = BNC(node, i); + // number of messages (4 bytes) plus size of the buffer + result += (4 + toku_bnc_nbytesinbuf(bnc)); + // number of offsets (4 bytes) plus an array of 4 byte offsets, for each message tree + result += (4 + (4 * bnc->fresh_message_tree.size())); + result += (4 + (4 * bnc->stale_message_tree.size())); + result += (4 + (4 * bnc->broadcast_list.size())); + } + else { + result += 4 + bn_data::HEADER_LENGTH; // n_entries in buffer table + basement header + result += BLB_NBYTESINDATA(node, i); + } + result += 4; // checksum + return result; +} + +#define FTNODE_PARTITION_DMT_LEAVES 0xaa +#define FTNODE_PARTITION_MSG_BUFFER 0xbb + +UU() static int +assert_fresh(const int32_t &offset, const uint32_t UU(idx), message_buffer *const msg_buffer) { + bool is_fresh = msg_buffer->get_freshness(offset); + assert(is_fresh); + return 0; +} + +UU() static int +assert_stale(const int32_t &offset, const uint32_t UU(idx), message_buffer *const msg_buffer) { + bool is_fresh = msg_buffer->get_freshness(offset); + assert(!is_fresh); + return 0; +} + +static void bnc_verify_message_trees(NONLEAF_CHILDINFO UU(bnc)) { +#ifdef TOKU_DEBUG_PARANOID + bnc->fresh_message_tree.iterate(&bnc->msg_buffer); + bnc->stale_message_tree.iterate(&bnc->msg_buffer); +#endif +} + +static int +wbuf_write_offset(const int32_t &offset, const uint32_t UU(idx), struct wbuf *const wb) { + wbuf_nocrc_int(wb, offset); + return 0; +} + +static void serialize_child_buffer(NONLEAF_CHILDINFO bnc, struct wbuf *wb) { + unsigned char ch = FTNODE_PARTITION_MSG_BUFFER; + wbuf_nocrc_char(wb, ch); + + // serialize the message buffer + bnc->msg_buffer.serialize_to_wbuf(wb); + + // serialize the message trees (num entries, offsets array): + // first, verify their contents are consistent with the message buffer + bnc_verify_message_trees(bnc); + + // fresh + wbuf_nocrc_int(wb, bnc->fresh_message_tree.size()); + bnc->fresh_message_tree.iterate(wb); + + // stale + wbuf_nocrc_int(wb, bnc->stale_message_tree.size()); + bnc->stale_message_tree.iterate(wb); + + // broadcast + wbuf_nocrc_int(wb, bnc->broadcast_list.size()); + bnc->broadcast_list.iterate(wb); +} + +// +// Serialize the i'th partition of node into sb +// For leaf nodes, this would be the i'th basement node +// For internal nodes, this would be the i'th internal node +// +static void +serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) { + // Caller should have allocated memory. + invariant_notnull(sb->uncompressed_ptr); + invariant(sb->uncompressed_size > 0); + paranoid_invariant(sb->uncompressed_size == serialize_ftnode_partition_size(node, i)); + + // + // Now put the data into sb->uncompressed_ptr + // + struct wbuf wb; + wbuf_init(&wb, sb->uncompressed_ptr, sb->uncompressed_size); + if (node->height > 0) { + // TODO: (Zardosht) possibly exit early if there are no messages + serialize_child_buffer(BNC(node, i), &wb); + } + else { + unsigned char ch = FTNODE_PARTITION_DMT_LEAVES; + bn_data* bd = BLB_DATA(node, i); + + wbuf_nocrc_char(&wb, ch); + wbuf_nocrc_uint(&wb, bd->num_klpairs()); + + bd->serialize_to_wbuf(&wb); + } + uint32_t end_to_end_checksum = toku_x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb)); + wbuf_nocrc_int(&wb, end_to_end_checksum); + invariant(wb.ndone == wb.size); + invariant(sb->uncompressed_size==wb.ndone); +} + +// +// Takes the data in sb->uncompressed_ptr, and compresses it +// into a newly allocated buffer sb->compressed_ptr +// +static void +compress_ftnode_sub_block(struct sub_block *sb, enum toku_compression_method method) { + invariant(sb->compressed_ptr != nullptr); + invariant(sb->compressed_size_bound > 0); + paranoid_invariant(sb->compressed_size_bound == toku_compress_bound(method, sb->uncompressed_size)); + + // + // This probably seems a bit complicated. Here is what is going on. + // In TokuFT 5.0, sub_blocks were compressed and the compressed data + // was checksummed. The checksum did NOT include the size of the compressed data + // and the size of the uncompressed data. The fields of sub_block only reference the + // compressed data, and it is the responsibility of the user of the sub_block + // to write the length + // + // For Dr. No, we want the checksum to also include the size of the compressed data, and the + // size of the decompressed data, because this data + // may be read off of disk alone, so it must be verifiable alone. + // + // So, we pass in a buffer to compress_nocrc_sub_block that starts 8 bytes after the beginning + // of sb->compressed_ptr, so we have space to put in the sizes, and then run the checksum. + // + sb->compressed_size = compress_nocrc_sub_block( + sb, + (char *)sb->compressed_ptr + 8, + sb->compressed_size_bound, + method + ); + + uint32_t* extra = (uint32_t *)(sb->compressed_ptr); + // store the compressed and uncompressed size at the beginning + extra[0] = toku_htod32(sb->compressed_size); + extra[1] = toku_htod32(sb->uncompressed_size); + // now checksum the entire thing + sb->compressed_size += 8; // now add the eight bytes that we saved for the sizes + sb->xsum = toku_x1764_memory(sb->compressed_ptr,sb->compressed_size); + + // + // This is the end result for Dr. No and forward. For ftnodes, sb->compressed_ptr contains + // two integers at the beginning, the size and uncompressed size, and then the compressed + // data. sb->xsum contains the checksum of this entire thing. + // + // In TokuFT 5.0, sb->compressed_ptr only contained the compressed data, sb->xsum + // checksummed only the compressed data, and the checksumming of the sizes were not + // done here. + // +} + +// +// Returns the size needed to serialize the ftnode info +// Does not include header information that is common with rollback logs +// such as the magic, layout_version, and build_id +// Includes only node specific info such as pivot information, n_children, and so on +// +static uint32_t +serialize_ftnode_info_size(FTNODE node) +{ + uint32_t retval = 0; + retval += 8; // max_msn_applied_to_node_on_disk + retval += 4; // nodesize + retval += 4; // flags + retval += 4; // height; + retval += 8; // oldest_referenced_xid_known + retval += node->pivotkeys.serialized_size(); + retval += (node->n_children-1)*4; // encode length of each pivot + if (node->height > 0) { + retval += node->n_children*8; // child blocknum's + } + retval += 4; // checksum + return retval; +} + +static void serialize_ftnode_info(FTNODE node, SUB_BLOCK sb) { + // Memory must have been allocated by our caller. + invariant(sb->uncompressed_size > 0); + invariant_notnull(sb->uncompressed_ptr); + paranoid_invariant(sb->uncompressed_size == serialize_ftnode_info_size(node)); + + struct wbuf wb; + wbuf_init(&wb, sb->uncompressed_ptr, sb->uncompressed_size); + + wbuf_MSN(&wb, node->max_msn_applied_to_node_on_disk); + wbuf_nocrc_uint(&wb, 0); // write a dummy value for where node->nodesize used to be + wbuf_nocrc_uint(&wb, node->flags); + wbuf_nocrc_int (&wb, node->height); + wbuf_TXNID(&wb, node->oldest_referenced_xid_known); + node->pivotkeys.serialize_to_wbuf(&wb); + + // child blocks, only for internal nodes + if (node->height > 0) { + for (int i = 0; i < node->n_children; i++) { + wbuf_nocrc_BLOCKNUM(&wb, BP_BLOCKNUM(node,i)); + } + } + + uint32_t end_to_end_checksum = toku_x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb)); + wbuf_nocrc_int(&wb, end_to_end_checksum); + invariant(wb.ndone == wb.size); + invariant(sb->uncompressed_size==wb.ndone); +} + +// This is the size of the uncompressed data, not including the compression headers +unsigned int +toku_serialize_ftnode_size (FTNODE node) { + unsigned int result = 0; + // + // As of now, this seems to be called if and only if the entire node is supposed + // to be in memory, so we will assert it. + // + toku_ftnode_assert_fully_in_memory(node); + result += serialize_node_header_size(node); + result += serialize_ftnode_info_size(node); + for (int i = 0; i < node->n_children; i++) { + result += serialize_ftnode_partition_size(node,i); + } + return result; +} + +struct serialize_times { + tokutime_t serialize_time; + tokutime_t compress_time; +}; + +static void +serialize_and_compress_partition(FTNODE node, + int childnum, + enum toku_compression_method compression_method, + SUB_BLOCK sb, + struct serialize_times *st) +{ + // serialize, compress, update status + tokutime_t t0 = toku_time_now(); + serialize_ftnode_partition(node, childnum, sb); + tokutime_t t1 = toku_time_now(); + compress_ftnode_sub_block(sb, compression_method); + tokutime_t t2 = toku_time_now(); + + st->serialize_time += t1 - t0; + st->compress_time += t2 - t1; +} + +void +toku_create_compressed_partition_from_available( + FTNODE node, + int childnum, + enum toku_compression_method compression_method, + SUB_BLOCK sb + ) +{ + tokutime_t t0 = toku_time_now(); + + // serialize + sb->uncompressed_size = serialize_ftnode_partition_size(node, childnum); + toku::scoped_malloc uncompressed_buf(sb->uncompressed_size); + sb->uncompressed_ptr = uncompressed_buf.get(); + serialize_ftnode_partition(node, childnum, sb); + + tokutime_t t1 = toku_time_now(); + + // compress. no need to pad with extra bytes for sizes/xsum - we're not storing them + set_compressed_size_bound(sb, compression_method); + sb->compressed_ptr = toku_xmalloc(sb->compressed_size_bound); + sb->compressed_size = compress_nocrc_sub_block( + sb, + sb->compressed_ptr, + sb->compressed_size_bound, + compression_method + ); + sb->uncompressed_ptr = NULL; + + tokutime_t t2 = toku_time_now(); + + toku_ft_status_update_serialize_times(node, t1 - t0, t2 - t1); +} + +static void +serialize_and_compress_serially(FTNODE node, + int npartitions, + enum toku_compression_method compression_method, + struct sub_block sb[], + struct serialize_times *st) { + for (int i = 0; i < npartitions; i++) { + serialize_and_compress_partition(node, i, compression_method, &sb[i], st); + } +} + +struct serialize_compress_work { + struct work base; + FTNODE node; + int i; + enum toku_compression_method compression_method; + struct sub_block *sb; + struct serialize_times st; +}; + +static void * +serialize_and_compress_worker(void *arg) { + struct workset *ws = (struct workset *) arg; + while (1) { + struct serialize_compress_work *w = (struct serialize_compress_work *) workset_get(ws); + if (w == NULL) + break; + int i = w->i; + serialize_and_compress_partition(w->node, i, w->compression_method, &w->sb[i], &w->st); + } + workset_release_ref(ws); + return arg; +} + +static void +serialize_and_compress_in_parallel(FTNODE node, + int npartitions, + enum toku_compression_method compression_method, + struct sub_block sb[], + struct serialize_times *st) { + if (npartitions == 1) { + serialize_and_compress_partition(node, 0, compression_method, &sb[0], st); + } else { + int T = num_cores; + if (T > npartitions) + T = npartitions; + if (T > 0) + T = T - 1; + struct workset ws; + ZERO_STRUCT(ws); + workset_init(&ws); + struct serialize_compress_work work[npartitions]; + workset_lock(&ws); + for (int i = 0; i < npartitions; i++) { + work[i] = (struct serialize_compress_work) { .base = {{NULL}}, + .node = node, + .i = i, + .compression_method = compression_method, + .sb = sb, + .st = { .serialize_time = 0, .compress_time = 0} }; + workset_put_locked(&ws, &work[i].base); + } + workset_unlock(&ws); + toku_thread_pool_run(ft_pool, 0, &T, serialize_and_compress_worker, &ws); + workset_add_ref(&ws, T); + serialize_and_compress_worker(&ws); + workset_join(&ws); + workset_destroy(&ws); + + // gather up the statistics from each thread's work item + for (int i = 0; i < npartitions; i++) { + st->serialize_time += work[i].st.serialize_time; + st->compress_time += work[i].st.compress_time; + } + } +} + +static void +serialize_and_compress_sb_node_info(FTNODE node, struct sub_block *sb, + enum toku_compression_method compression_method, struct serialize_times *st) { + // serialize, compress, update serialize times. + tokutime_t t0 = toku_time_now(); + serialize_ftnode_info(node, sb); + tokutime_t t1 = toku_time_now(); + compress_ftnode_sub_block(sb, compression_method); + tokutime_t t2 = toku_time_now(); + + st->serialize_time += t1 - t0; + st->compress_time += t2 - t1; +} + +int toku_serialize_ftnode_to_memory(FTNODE node, + FTNODE_DISK_DATA* ndd, + unsigned int basementnodesize, + enum toku_compression_method compression_method, + bool do_rebalancing, + bool in_parallel, // for loader is true, for toku_ftnode_flush_callback, is false + /*out*/ size_t *n_bytes_to_write, + /*out*/ size_t *n_uncompressed_bytes, + /*out*/ char **bytes_to_write) +// Effect: Writes out each child to a separate malloc'd buffer, then compresses +// all of them, and writes the uncompressed header, to bytes_to_write, +// which is malloc'd. +// +// The resulting buffer is guaranteed to be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needed). +// 512-byte padding is for O_DIRECT to work. +{ + toku_ftnode_assert_fully_in_memory(node); + + if (do_rebalancing && node->height == 0) { + toku_ftnode_leaf_rebalance(node, basementnodesize); + } + const int npartitions = node->n_children; + + // Each partition represents a compressed sub block + // For internal nodes, a sub block is a message buffer + // For leaf nodes, a sub block is a basement node + toku::scoped_calloc sb_buf(sizeof(struct sub_block) * npartitions); + struct sub_block *sb = reinterpret_cast(sb_buf.get()); + XREALLOC_N(npartitions, *ndd); + + // + // First, let's serialize and compress the individual sub blocks + // + + // determine how large our serialization and compression buffers need to be. + size_t serialize_buf_size = 0, compression_buf_size = 0; + for (int i = 0; i < node->n_children; i++) { + sb[i].uncompressed_size = serialize_ftnode_partition_size(node, i); + sb[i].compressed_size_bound = toku_compress_bound(compression_method, sb[i].uncompressed_size); + serialize_buf_size += sb[i].uncompressed_size; + compression_buf_size += sb[i].compressed_size_bound + 8; // add 8 extra bytes, 4 for compressed size, 4 for decompressed size + } + + // give each sub block a base pointer to enough buffer space for serialization and compression + toku::scoped_malloc serialize_buf(serialize_buf_size); + toku::scoped_malloc compression_buf(compression_buf_size); + for (size_t i = 0, uncompressed_offset = 0, compressed_offset = 0; i < (size_t) node->n_children; i++) { + sb[i].uncompressed_ptr = reinterpret_cast(serialize_buf.get()) + uncompressed_offset; + sb[i].compressed_ptr = reinterpret_cast(compression_buf.get()) + compressed_offset; + uncompressed_offset += sb[i].uncompressed_size; + compressed_offset += sb[i].compressed_size_bound + 8; // add 8 extra bytes, 4 for compressed size, 4 for decompressed size + invariant(uncompressed_offset <= serialize_buf_size); + invariant(compressed_offset <= compression_buf_size); + } + + // do the actual serialization now that we have buffer space + struct serialize_times st = { 0, 0 }; + if (in_parallel) { + serialize_and_compress_in_parallel(node, npartitions, compression_method, sb, &st); + } else { + serialize_and_compress_serially(node, npartitions, compression_method, sb, &st); + } + + // + // Now lets create a sub-block that has the common node information, + // This does NOT include the header + // + + // determine how large our serialization and copmression buffers need to be + struct sub_block sb_node_info; + sub_block_init(&sb_node_info); + size_t sb_node_info_uncompressed_size = serialize_ftnode_info_size(node); + size_t sb_node_info_compressed_size_bound = toku_compress_bound(compression_method, sb_node_info_uncompressed_size); + toku::scoped_malloc sb_node_info_uncompressed_buf(sb_node_info_uncompressed_size); + toku::scoped_malloc sb_node_info_compressed_buf(sb_node_info_compressed_size_bound + 8); // add 8 extra bytes, 4 for compressed size, 4 for decompressed size + sb_node_info.uncompressed_size = sb_node_info_uncompressed_size; + sb_node_info.uncompressed_ptr = sb_node_info_uncompressed_buf.get(); + sb_node_info.compressed_size_bound = sb_node_info_compressed_size_bound; + sb_node_info.compressed_ptr = sb_node_info_compressed_buf.get(); + + // do the actual serialization now that we have buffer space + serialize_and_compress_sb_node_info(node, &sb_node_info, compression_method, &st); + + // + // At this point, we have compressed each of our pieces into individual sub_blocks, + // we can put the header and all the subblocks into a single buffer and return it. + // + + // update the serialize times, ignore the header for simplicity. we captured all + // of the partitions' serialize times so that's probably good enough. + toku_ft_status_update_serialize_times(node, st.serialize_time, st.compress_time); + + // The total size of the node is: + // size of header + disk size of the n+1 sub_block's created above + uint32_t total_node_size = (serialize_node_header_size(node) // uncompressed header + + sb_node_info.compressed_size // compressed nodeinfo (without its checksum) + + 4); // nodeinfo's checksum + uint32_t total_uncompressed_size = (serialize_node_header_size(node) // uncompressed header + + sb_node_info.uncompressed_size // uncompressed nodeinfo (without its checksum) + + 4); // nodeinfo's checksum + // store the BP_SIZESs + for (int i = 0; i < node->n_children; i++) { + uint32_t len = sb[i].compressed_size + 4; // data and checksum + BP_SIZE (*ndd,i) = len; + BP_START(*ndd,i) = total_node_size; + total_node_size += sb[i].compressed_size + 4; + total_uncompressed_size += sb[i].uncompressed_size + 4; + } + + // now create the final serialized node + uint32_t total_buffer_size = roundup_to_multiple(512, total_node_size); // make the buffer be 512 bytes. + char *XMALLOC_N_ALIGNED(512, total_buffer_size, data); + char *curr_ptr = data; + + // write the header + struct wbuf wb; + wbuf_init(&wb, curr_ptr, serialize_node_header_size(node)); + serialize_node_header(node, *ndd, &wb); + assert(wb.ndone == wb.size); + curr_ptr += serialize_node_header_size(node); + + // now write sb_node_info + memcpy(curr_ptr, sb_node_info.compressed_ptr, sb_node_info.compressed_size); + curr_ptr += sb_node_info.compressed_size; + // write the checksum + *(uint32_t *)curr_ptr = toku_htod32(sb_node_info.xsum); + curr_ptr += sizeof(sb_node_info.xsum); + + for (int i = 0; i < npartitions; i++) { + memcpy(curr_ptr, sb[i].compressed_ptr, sb[i].compressed_size); + curr_ptr += sb[i].compressed_size; + // write the checksum + *(uint32_t *)curr_ptr = toku_htod32(sb[i].xsum); + curr_ptr += sizeof(sb[i].xsum); + } + // Zero the rest of the buffer + memset(data + total_node_size, 0, total_buffer_size - total_node_size); + + assert(curr_ptr - data == total_node_size); + *bytes_to_write = data; + *n_bytes_to_write = total_buffer_size; + *n_uncompressed_bytes = total_uncompressed_size; + + invariant(*n_bytes_to_write % 512 == 0); + invariant(reinterpret_cast(*bytes_to_write) % 512 == 0); + return 0; +} + +int +toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) { + + size_t n_to_write; + size_t n_uncompressed_bytes; + char *compressed_buf = nullptr; + + // because toku_serialize_ftnode_to is only called for + // in toku_ftnode_flush_callback, we pass false + // for in_parallel. The reasoning is that when we write + // nodes to disk via toku_ftnode_flush_callback, we + // assume that it is being done on a non-critical + // background thread (probably for checkpointing), and therefore + // should not hog CPU, + // + // Should the above facts change, we may want to revisit + // passing false for in_parallel here + // + // alternatively, we could have made in_parallel a parameter + // for toku_serialize_ftnode_to, but instead we did this. + int r = toku_serialize_ftnode_to_memory( + node, + ndd, + ft->h->basementnodesize, + ft->h->compression_method, + do_rebalancing, + toku_serialize_in_parallel, // in_parallel + &n_to_write, + &n_uncompressed_bytes, + &compressed_buf + ); + if (r != 0) { + return r; + } + + // If the node has never been written, then write the whole buffer, including the zeros + invariant(blocknum.b>=0); + DISKOFF offset; + + // Dirties the ft + ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset, + ft, fd, for_checkpoint, + // Allocations for nodes high in the tree are considered 'hot', + // as they are likely to move again in the next checkpoint. + node->height); + + tokutime_t t0 = toku_time_now(); + toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); + tokutime_t t1 = toku_time_now(); + + tokutime_t io_time = t1 - t0; + toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint); + + toku_free(compressed_buf); + node->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction. + return 0; +} + +static void +sort_and_steal_offset_arrays(NONLEAF_CHILDINFO bnc, + const toku::comparator &cmp, + int32_t **fresh_offsets, int32_t nfresh, + int32_t **stale_offsets, int32_t nstale, + int32_t **broadcast_offsets, int32_t nbroadcast) { + // We always have fresh / broadcast offsets (even if they are empty) + // but we may not have stale offsets, in the case of v13 upgrade. + invariant(fresh_offsets != nullptr); + invariant(broadcast_offsets != nullptr); + invariant(cmp.valid()); + + typedef toku::sort msn_sort; + + const int32_t n_in_this_buffer = nfresh + nstale + nbroadcast; + struct toku_msg_buffer_key_msn_cmp_extra extra(cmp, &bnc->msg_buffer); + msn_sort::mergesort_r(*fresh_offsets, nfresh, extra); + bnc->fresh_message_tree.destroy(); + bnc->fresh_message_tree.create_steal_sorted_array(fresh_offsets, nfresh, n_in_this_buffer); + if (stale_offsets) { + msn_sort::mergesort_r(*stale_offsets, nstale, extra); + bnc->stale_message_tree.destroy(); + bnc->stale_message_tree.create_steal_sorted_array(stale_offsets, nstale, n_in_this_buffer); + } + bnc->broadcast_list.destroy(); + bnc->broadcast_list.create_steal_sorted_array(broadcast_offsets, nbroadcast, n_in_this_buffer); +} + +static MSN +deserialize_child_buffer_v13(FT ft, NONLEAF_CHILDINFO bnc, struct rbuf *rb) { + // We skip 'stale' offsets for upgraded nodes. + int32_t nfresh = 0, nbroadcast = 0; + int32_t *fresh_offsets = nullptr, *broadcast_offsets = nullptr; + + // Only sort buffers if we have a valid comparison function. In certain scenarios, + // like deserialie_ft_versioned() or tokuftdump, we'll need to deserialize ftnodes + // for simple inspection and don't actually require that the message buffers are + // properly sorted. This is very ugly, but correct. + const bool sort = ft->cmp.valid(); + + MSN highest_msn_in_this_buffer = + bnc->msg_buffer.deserialize_from_rbuf_v13(rb, &ft->h->highest_unused_msn_for_upgrade, + sort ? &fresh_offsets : nullptr, &nfresh, + sort ? &broadcast_offsets : nullptr, &nbroadcast); + + if (sort) { + sort_and_steal_offset_arrays(bnc, ft->cmp, + &fresh_offsets, nfresh, + nullptr, 0, // no stale offsets + &broadcast_offsets, nbroadcast); + } + + return highest_msn_in_this_buffer; +} + +static void +deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rb, const toku::comparator &cmp) { + int32_t nfresh = 0, nstale = 0, nbroadcast = 0; + int32_t *fresh_offsets, *stale_offsets, *broadcast_offsets; + + // Only sort buffers if we have a valid comparison function. In certain scenarios, + // like deserialie_ft_versioned() or tokuftdump, we'll need to deserialize ftnodes + // for simple inspection and don't actually require that the message buffers are + // properly sorted. This is very ugly, but correct. + const bool sort = cmp.valid(); + + // read in the message buffer + bnc->msg_buffer.deserialize_from_rbuf(rb, + sort ? &fresh_offsets : nullptr, &nfresh, + sort ? &stale_offsets : nullptr, &nstale, + sort ? &broadcast_offsets : nullptr, &nbroadcast); + + if (sort) { + sort_and_steal_offset_arrays(bnc, cmp, + &fresh_offsets, nfresh, + &stale_offsets, nstale, + &broadcast_offsets, nbroadcast); + } +} + +static void +deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rb) { + // read in the message buffer + bnc->msg_buffer.deserialize_from_rbuf(rb, + nullptr, nullptr, // fresh_offsets, nfresh, + nullptr, nullptr, // stale_offsets, nstale, + nullptr, nullptr); // broadcast_offsets, nbroadcast + + // read in each message tree (fresh, stale, broadcast) + int32_t nfresh = rbuf_int(rb); + int32_t *XMALLOC_N(nfresh, fresh_offsets); + for (int i = 0; i < nfresh; i++) { + fresh_offsets[i] = rbuf_int(rb); + } + + int32_t nstale = rbuf_int(rb); + int32_t *XMALLOC_N(nstale, stale_offsets); + for (int i = 0; i < nstale; i++) { + stale_offsets[i] = rbuf_int(rb); + } + + int32_t nbroadcast = rbuf_int(rb); + int32_t *XMALLOC_N(nbroadcast, broadcast_offsets); + for (int i = 0; i < nbroadcast; i++) { + broadcast_offsets[i] = rbuf_int(rb); + } + + // build OMTs out of each offset array + bnc->fresh_message_tree.destroy(); + bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, nfresh); + bnc->stale_message_tree.destroy(); + bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, nstale); + bnc->broadcast_list.destroy(); + bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast, nbroadcast); +} + +// dump a buffer to stderr +// no locking around this for now +void +dump_bad_block(unsigned char *vp, uint64_t size) { + const uint64_t linesize = 64; + uint64_t n = size / linesize; + for (uint64_t i = 0; i < n; i++) { + fprintf(stderr, "%p: ", vp); + for (uint64_t j = 0; j < linesize; j++) { + unsigned char c = vp[j]; + fprintf(stderr, "%2.2X", c); + } + fprintf(stderr, "\n"); + vp += linesize; + } + size = size % linesize; + for (uint64_t i=0; idata_buffer.initialize_empty(); + return bn; +} + +BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) { + BASEMENTNODE bn = toku_create_empty_bn_no_buffer(); + bn->max_msn_applied = orig_bn->max_msn_applied; + bn->seqinsert = orig_bn->seqinsert; + bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied; + bn->stat64_delta = orig_bn->stat64_delta; + bn->data_buffer.clone(&orig_bn->data_buffer); + return bn; +} + +BASEMENTNODE toku_create_empty_bn_no_buffer(void) { + BASEMENTNODE XMALLOC(bn); + bn->max_msn_applied.msn = 0; + bn->seqinsert = 0; + bn->stale_ancestor_messages_applied = false; + bn->stat64_delta = ZEROSTATS; + bn->data_buffer.init_zero(); + return bn; +} + +NONLEAF_CHILDINFO toku_create_empty_nl(void) { + NONLEAF_CHILDINFO XMALLOC(cn); + cn->msg_buffer.create(); + cn->fresh_message_tree.create_no_array(); + cn->stale_message_tree.create_no_array(); + cn->broadcast_list.create_no_array(); + memset(cn->flow, 0, sizeof cn->flow); + return cn; +} + +// must clone the OMTs, since we serialize them along with the message buffer +NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo) { + NONLEAF_CHILDINFO XMALLOC(cn); + cn->msg_buffer.clone(&orig_childinfo->msg_buffer); + cn->fresh_message_tree.create_no_array(); + cn->fresh_message_tree.clone(orig_childinfo->fresh_message_tree); + cn->stale_message_tree.create_no_array(); + cn->stale_message_tree.clone(orig_childinfo->stale_message_tree); + cn->broadcast_list.create_no_array(); + cn->broadcast_list.clone(orig_childinfo->broadcast_list); + memset(cn->flow, 0, sizeof cn->flow); + return cn; +} + +void destroy_basement_node (BASEMENTNODE bn) +{ + bn->data_buffer.destroy(); + toku_free(bn); +} + +void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl) +{ + nl->msg_buffer.destroy(); + nl->fresh_message_tree.destroy(); + nl->stale_message_tree.destroy(); + nl->broadcast_list.destroy(); + toku_free(nl); +} + +void read_block_from_fd_into_rbuf( + int fd, + BLOCKNUM blocknum, + FT ft, + struct rbuf *rb + ) +{ + // get the file offset and block size for the block + DISKOFF offset, size; + ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); + DISKOFF size_aligned = roundup_to_multiple(512, size); + uint8_t *XMALLOC_N_ALIGNED(512, size_aligned, raw_block); + rbuf_init(rb, raw_block, size); + // read the block + ssize_t rlen = toku_os_pread(fd, raw_block, size_aligned, offset); + assert((DISKOFF)rlen >= size); + assert((DISKOFF)rlen <= size_aligned); +} + +static const int read_header_heuristic_max = 32*1024; + +#ifndef MIN +#define MIN(a,b) (((a)>(b)) ? (b) : (a)) +#endif + +// Effect: If the header part of the node is small enough, then read it into the rbuf. The rbuf will be allocated to be big enough in any case. +static void read_ftnode_header_from_fd_into_rbuf_if_small_enough(int fd, BLOCKNUM blocknum, + FT ft, struct rbuf *rb, + ftnode_fetch_extra *bfe) { + DISKOFF offset, size; + ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); + DISKOFF read_size = roundup_to_multiple(512, MIN(read_header_heuristic_max, size)); + uint8_t *XMALLOC_N_ALIGNED(512, roundup_to_multiple(512, size), raw_block); + rbuf_init(rb, raw_block, read_size); + + // read the block + tokutime_t t0 = toku_time_now(); + ssize_t rlen = toku_os_pread(fd, raw_block, read_size, offset); + tokutime_t t1 = toku_time_now(); + + assert(rlen >= 0); + rbuf_init(rb, raw_block, rlen); + + bfe->bytes_read = rlen; + bfe->io_time = t1 - t0; + toku_ft_status_update_pivot_fetch_reason(bfe); +} + +// +// read the compressed partition into the sub_block, +// validate the checksum of the compressed data +// +int +read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb) +{ + int r = 0; + sb->compressed_size = rbuf_int(rb); + sb->uncompressed_size = rbuf_int(rb); + const void **cp = (const void **) &sb->compressed_ptr; + rbuf_literal_bytes(rb, cp, sb->compressed_size); + sb->xsum = rbuf_int(rb); + // let's check the checksum + uint32_t actual_xsum = toku_x1764_memory((char *)sb->compressed_ptr-8, 8+sb->compressed_size); + if (sb->xsum != actual_xsum) { + r = TOKUDB_BAD_CHECKSUM; + } + return r; +} + +static int +read_and_decompress_sub_block(struct rbuf *rb, struct sub_block *sb) +{ + int r = 0; + r = read_compressed_sub_block(rb, sb); + if (r != 0) { + goto exit; + } + + just_decompress_sub_block(sb); +exit: + return r; +} + +// Allocates space for the sub-block and de-compresses the data from +// the supplied compressed pointer.. +void +just_decompress_sub_block(struct sub_block *sb) +{ + // TODO: Add assert that the subblock was read in. + sb->uncompressed_ptr = toku_xmalloc(sb->uncompressed_size); + + toku_decompress( + (Bytef *) sb->uncompressed_ptr, + sb->uncompressed_size, + (Bytef *) sb->compressed_ptr, + sb->compressed_size + ); +} + +// verify the checksum +int +verify_ftnode_sub_block (struct sub_block *sb) +{ + int r = 0; + // first verify the checksum + uint32_t data_size = sb->uncompressed_size - 4; // checksum is 4 bytes at end + uint32_t stored_xsum = toku_dtoh32(*((uint32_t *)((char *)sb->uncompressed_ptr + data_size))); + uint32_t actual_xsum = toku_x1764_memory(sb->uncompressed_ptr, data_size); + if (stored_xsum != actual_xsum) { + dump_bad_block((Bytef *) sb->uncompressed_ptr, sb->uncompressed_size); + r = TOKUDB_BAD_CHECKSUM; + } + return r; +} + +// This function deserializes the data stored by serialize_ftnode_info +static int +deserialize_ftnode_info( + struct sub_block *sb, + FTNODE node + ) +{ + // sb_node_info->uncompressed_ptr stores the serialized node information + // this function puts that information into node + + // first verify the checksum + int r = 0; + r = verify_ftnode_sub_block(sb); + if (r != 0) { + goto exit; + } + + uint32_t data_size; + data_size = sb->uncompressed_size - 4; // checksum is 4 bytes at end + + // now with the data verified, we can read the information into the node + struct rbuf rb; + rbuf_init(&rb, (unsigned char *) sb->uncompressed_ptr, data_size); + + node->max_msn_applied_to_node_on_disk = rbuf_MSN(&rb); + (void)rbuf_int(&rb); + node->flags = rbuf_int(&rb); + node->height = rbuf_int(&rb); + if (node->layout_version_read_from_disk < FT_LAYOUT_VERSION_19) { + (void) rbuf_int(&rb); // optimized_for_upgrade + } + if (node->layout_version_read_from_disk >= FT_LAYOUT_VERSION_22) { + rbuf_TXNID(&rb, &node->oldest_referenced_xid_known); + } + + // now create the basement nodes or childinfos, depending on whether this is a + // leaf node or internal node + // now the subtree_estimates + + // n_children is now in the header, nd the allocatio of the node->bp is in deserialize_ftnode_from_rbuf. + + // now the pivots + if (node->n_children > 1) { + node->pivotkeys.deserialize_from_rbuf(&rb, node->n_children - 1); + } else { + node->pivotkeys.create_empty(); + } + + // if this is an internal node, unpack the block nums, and fill in necessary fields + // of childinfo + if (node->height > 0) { + for (int i = 0; i < node->n_children; i++) { + BP_BLOCKNUM(node,i) = rbuf_blocknum(&rb); + BP_WORKDONE(node, i) = 0; + } + } + + // make sure that all the data was read + if (data_size != rb.ndone) { + dump_bad_block(rb.buf, rb.size); + abort(); + } +exit: + return r; +} + +static void +setup_available_ftnode_partition(FTNODE node, int i) { + if (node->height == 0) { + set_BLB(node, i, toku_create_empty_bn()); + BLB_MAX_MSN_APPLIED(node,i) = node->max_msn_applied_to_node_on_disk; + } + else { + set_BNC(node, i, toku_create_empty_nl()); + } +} + +// Assign the child_to_read member of the bfe from the given ftnode +// that has been brought into memory. +static void +update_bfe_using_ftnode(FTNODE node, ftnode_fetch_extra *bfe) +{ + if (bfe->type == ftnode_fetch_subset && bfe->search != NULL) { + // we do not take into account prefetching yet + // as of now, if we need a subset, the only thing + // we can possibly require is a single basement node + // we find out what basement node the query cares about + // and check if it is available + bfe->child_to_read = toku_ft_search_which_child( + bfe->ft->cmp, + node, + bfe->search + ); + } else if (bfe->type == ftnode_fetch_keymatch) { + // we do not take into account prefetching yet + // as of now, if we need a subset, the only thing + // we can possibly require is a single basement node + // we find out what basement node the query cares about + // and check if it is available + if (node->height == 0) { + int left_child = bfe->leftmost_child_wanted(node); + int right_child = bfe->rightmost_child_wanted(node); + if (left_child == right_child) { + bfe->child_to_read = left_child; + } + } + } +} + +// Using the search parameters in the bfe, this function will +// initialize all of the given ftnode's partitions. +static void +setup_partitions_using_bfe(FTNODE node, + ftnode_fetch_extra *bfe, + bool data_in_memory) +{ + // Leftmost and Rightmost Child bounds. + int lc, rc; + if (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch) { + lc = bfe->leftmost_child_wanted(node); + rc = bfe->rightmost_child_wanted(node); + } else { + lc = -1; + rc = -1; + } + + // + // setup memory needed for the node + // + //printf("node height %d, blocknum %" PRId64 ", type %d lc %d rc %d\n", node->height, node->blocknum.b, bfe->type, lc, rc); + for (int i = 0; i < node->n_children; i++) { + BP_INIT_UNTOUCHED_CLOCK(node,i); + if (data_in_memory) { + BP_STATE(node, i) = ((bfe->wants_child_available(i) || (lc <= i && i <= rc)) + ? PT_AVAIL : PT_COMPRESSED); + } else { + BP_STATE(node, i) = PT_ON_DISK; + } + BP_WORKDONE(node,i) = 0; + + switch (BP_STATE(node,i)) { + case PT_AVAIL: + setup_available_ftnode_partition(node, i); + BP_TOUCH_CLOCK(node,i); + break; + case PT_COMPRESSED: + set_BSB(node, i, sub_block_creat()); + break; + case PT_ON_DISK: + set_BNULL(node, i); + break; + case PT_INVALID: + abort(); + } + } +} + +static void setup_ftnode_partitions(FTNODE node, ftnode_fetch_extra *bfe, bool data_in_memory) +// Effect: Used when reading a ftnode into main memory, this sets up the partitions. +// We set bfe->child_to_read as well as the BP_STATE and the data pointers (e.g., with set_BSB or set_BNULL or other set_ operations). +// Arguments: Node: the node to set up. +// bfe: Describes the key range needed. +// data_in_memory: true if we have all the data (in which case we set the BP_STATE to be either PT_AVAIL or PT_COMPRESSED depending on the bfe. +// false if we don't have the partitions in main memory (in which case we set the state to PT_ON_DISK. +{ + // Set bfe->child_to_read. + update_bfe_using_ftnode(node, bfe); + + // Setup the partitions. + setup_partitions_using_bfe(node, bfe, data_in_memory); +} + +/* deserialize the partition from the sub-block's uncompressed buffer + * and destroy the uncompressed buffer + */ +static int +deserialize_ftnode_partition( + struct sub_block *sb, + FTNODE node, + int childnum, // which partition to deserialize + const toku::comparator &cmp + ) +{ + int r = 0; + r = verify_ftnode_sub_block(sb); + if (r != 0) { + goto exit; + } + uint32_t data_size; + data_size = sb->uncompressed_size - 4; // checksum is 4 bytes at end + + // now with the data verified, we can read the information into the node + struct rbuf rb; + rbuf_init(&rb, (unsigned char *) sb->uncompressed_ptr, data_size); + unsigned char ch; + ch = rbuf_char(&rb); + + if (node->height > 0) { + assert(ch == FTNODE_PARTITION_MSG_BUFFER); + NONLEAF_CHILDINFO bnc = BNC(node, childnum); + if (node->layout_version_read_from_disk <= FT_LAYOUT_VERSION_26) { + // Layout version <= 26 did not serialize sorted message trees to disk. + deserialize_child_buffer_v26(bnc, &rb, cmp); + } else { + deserialize_child_buffer(bnc, &rb); + } + BP_WORKDONE(node, childnum) = 0; + } + else { + assert(ch == FTNODE_PARTITION_DMT_LEAVES); + BLB_SEQINSERT(node, childnum) = 0; + uint32_t num_entries = rbuf_int(&rb); + // we are now at the first byte of first leafentry + data_size -= rb.ndone; // remaining bytes of leafentry data + + BASEMENTNODE bn = BLB(node, childnum); + bn->data_buffer.deserialize_from_rbuf(num_entries, &rb, data_size, node->layout_version_read_from_disk); + } + assert(rb.ndone == rb.size); +exit: + return r; +} + +static int +decompress_and_deserialize_worker(struct rbuf curr_rbuf, struct sub_block curr_sb, FTNODE node, int child, + const toku::comparator &cmp, tokutime_t *decompress_time) +{ + int r = 0; + tokutime_t t0 = toku_time_now(); + r = read_and_decompress_sub_block(&curr_rbuf, &curr_sb); + tokutime_t t1 = toku_time_now(); + if (r == 0) { + // at this point, sb->uncompressed_ptr stores the serialized node partition + r = deserialize_ftnode_partition(&curr_sb, node, child, cmp); + } + *decompress_time = t1 - t0; + + toku_free(curr_sb.uncompressed_ptr); + return r; +} + +static int +check_and_copy_compressed_sub_block_worker(struct rbuf curr_rbuf, struct sub_block curr_sb, FTNODE node, int child) +{ + int r = 0; + r = read_compressed_sub_block(&curr_rbuf, &curr_sb); + if (r != 0) { + goto exit; + } + + SUB_BLOCK bp_sb; + bp_sb = BSB(node, child); + bp_sb->compressed_size = curr_sb.compressed_size; + bp_sb->uncompressed_size = curr_sb.uncompressed_size; + bp_sb->compressed_ptr = toku_xmalloc(bp_sb->compressed_size); + memcpy(bp_sb->compressed_ptr, curr_sb.compressed_ptr, bp_sb->compressed_size); +exit: + return r; +} + +static FTNODE alloc_ftnode_for_deserialize(uint32_t fullhash, BLOCKNUM blocknum) { +// Effect: Allocate an FTNODE and fill in the values that are not read from + FTNODE XMALLOC(node); + node->fullhash = fullhash; + node->blocknum = blocknum; + node->dirty = 0; + node->bp = nullptr; + node->oldest_referenced_xid_known = TXNID_NONE; + return node; +} + +static int +deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode, + FTNODE_DISK_DATA* ndd, + BLOCKNUM blocknum, + uint32_t fullhash, + ftnode_fetch_extra *bfe, + struct rbuf *rb, + int fd) +// If we have enough information in the rbuf to construct a header, then do so. +// Also fetch in the basement node if needed. +// Return 0 if it worked. If something goes wrong (including that we are looking at some old data format that doesn't have partitions) then return nonzero. +{ + int r = 0; + + tokutime_t t0, t1; + tokutime_t decompress_time = 0; + tokutime_t deserialize_time = 0; + + t0 = toku_time_now(); + + FTNODE node = alloc_ftnode_for_deserialize(fullhash, blocknum); + + if (rb->size < 24) { + // TODO: What error do we return here? + // Does it even matter? + r = toku_db_badformat(); + goto cleanup; + } + + const void *magic; + rbuf_literal_bytes(rb, &magic, 8); + if (memcmp(magic, "tokuleaf", 8)!=0 && + memcmp(magic, "tokunode", 8)!=0) { + r = toku_db_badformat(); + goto cleanup; + } + + node->layout_version_read_from_disk = rbuf_int(rb); + if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) { + // This code path doesn't have to worry about upgrade. + r = toku_db_badformat(); + goto cleanup; + } + + // If we get here, we know the node is at least + // FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES. We haven't changed + // the serialization format since then (this comment is correct as of + // version 20, which is Deadshot) so we can go ahead and say the + // layout version is current (it will be as soon as we finish + // deserializing). + // TODO(leif): remove node->layout_version (#5174) + node->layout_version = FT_LAYOUT_VERSION; + + node->layout_version_original = rbuf_int(rb); + node->build_id = rbuf_int(rb); + node->n_children = rbuf_int(rb); + // Guaranteed to be have been able to read up to here. If n_children + // is too big, we may have a problem, so check that we won't overflow + // while reading the partition locations. + unsigned int nhsize; + nhsize = serialize_node_header_size(node); // we can do this because n_children is filled in. + unsigned int needed_size; + needed_size = nhsize + 12; // we need 12 more so that we can read the compressed block size information that follows for the nodeinfo. + if (needed_size > rb->size) { + r = toku_db_badformat(); + goto cleanup; + } + + XMALLOC_N(node->n_children, node->bp); + XMALLOC_N(node->n_children, *ndd); + // read the partition locations + for (int i=0; in_children; i++) { + BP_START(*ndd,i) = rbuf_int(rb); + BP_SIZE (*ndd,i) = rbuf_int(rb); + } + + uint32_t checksum; + checksum = toku_x1764_memory(rb->buf, rb->ndone); + uint32_t stored_checksum; + stored_checksum = rbuf_int(rb); + if (stored_checksum != checksum) { + dump_bad_block(rb->buf, rb->size); + r = TOKUDB_BAD_CHECKSUM; + goto cleanup; + } + + // Now we want to read the pivot information. + struct sub_block sb_node_info; + sub_block_init(&sb_node_info); + sb_node_info.compressed_size = rbuf_int(rb); // we'll be able to read these because we checked the size earlier. + sb_node_info.uncompressed_size = rbuf_int(rb); + if (rb->size-rb->ndone < sb_node_info.compressed_size + 8) { + r = toku_db_badformat(); + goto cleanup; + } + + // Finish reading compressed the sub_block + const void **cp; + cp = (const void **) &sb_node_info.compressed_ptr; + rbuf_literal_bytes(rb, cp, sb_node_info.compressed_size); + sb_node_info.xsum = rbuf_int(rb); + // let's check the checksum + uint32_t actual_xsum; + actual_xsum = toku_x1764_memory((char *)sb_node_info.compressed_ptr-8, 8+sb_node_info.compressed_size); + if (sb_node_info.xsum != actual_xsum) { + r = TOKUDB_BAD_CHECKSUM; + goto cleanup; + } + + // Now decompress the subblock + { + toku::scoped_malloc sb_node_info_buf(sb_node_info.uncompressed_size); + sb_node_info.uncompressed_ptr = sb_node_info_buf.get(); + tokutime_t decompress_t0 = toku_time_now(); + toku_decompress( + (Bytef *) sb_node_info.uncompressed_ptr, + sb_node_info.uncompressed_size, + (Bytef *) sb_node_info.compressed_ptr, + sb_node_info.compressed_size + ); + tokutime_t decompress_t1 = toku_time_now(); + decompress_time = decompress_t1 - decompress_t0; + + // at this point sb->uncompressed_ptr stores the serialized node info. + r = deserialize_ftnode_info(&sb_node_info, node); + if (r != 0) { + goto cleanup; + } + } + + // Now we have the ftnode_info. We have a bunch more stuff in the + // rbuf, so we might be able to store the compressed data for some + // objects. + // We can proceed to deserialize the individual subblocks. + + // setup the memory of the partitions + // for partitions being decompressed, create either message buffer or basement node + // for partitions staying compressed, create sub_block + setup_ftnode_partitions(node, bfe, false); + + // We must capture deserialize and decompression time before + // the pf_callback, otherwise we would double-count. + t1 = toku_time_now(); + deserialize_time = (t1 - t0) - decompress_time; + + // do partial fetch if necessary + if (bfe->type != ftnode_fetch_none) { + PAIR_ATTR attr; + r = toku_ftnode_pf_callback(node, *ndd, bfe, fd, &attr); + if (r != 0) { + goto cleanup; + } + } + + // handle clock + for (int i = 0; i < node->n_children; i++) { + if (bfe->wants_child_available(i)) { + paranoid_invariant(BP_STATE(node,i) == PT_AVAIL); + BP_TOUCH_CLOCK(node,i); + } + } + *ftnode = node; + r = 0; + +cleanup: + if (r == 0) { + bfe->deserialize_time += deserialize_time; + bfe->decompress_time += decompress_time; + toku_ft_status_update_deserialize_times(node, deserialize_time, decompress_time); + } + if (r != 0) { + if (node) { + toku_free(*ndd); + toku_free(node->bp); + toku_free(node); + } + } + return r; +} + +// This function takes a deserialized version 13 or 14 buffer and +// constructs the associated internal, non-leaf ftnode object. It +// also creates MSN's for older messages created in older versions +// that did not generate MSN's for messages. These new MSN's are +// generated from the root downwards, counting backwards from MIN_MSN +// and persisted in the ft header. +static int +deserialize_and_upgrade_internal_node(FTNODE node, + struct rbuf *rb, + ftnode_fetch_extra *bfe, + STAT64INFO info) +{ + int version = node->layout_version_read_from_disk; + + if (version == FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT) { + (void) rbuf_int(rb); // 10. fingerprint + } + + node->n_children = rbuf_int(rb); // 11. n_children + + // Sub-tree esitmates... + for (int i = 0; i < node->n_children; ++i) { + if (version == FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT) { + (void) rbuf_int(rb); // 12. fingerprint + } + uint64_t nkeys = rbuf_ulonglong(rb); // 13. nkeys + uint64_t ndata = rbuf_ulonglong(rb); // 14. ndata + uint64_t dsize = rbuf_ulonglong(rb); // 15. dsize + (void) rbuf_char(rb); // 16. exact (char) + invariant(nkeys == ndata); + if (info) { + // info is non-null if we're trying to upgrade old subtree + // estimates to stat64info + info->numrows += nkeys; + info->numbytes += dsize; + } + } + + // Pivot keys + node->pivotkeys.deserialize_from_rbuf(rb, node->n_children - 1); + + // Create space for the child node buffers (a.k.a. partitions). + XMALLOC_N(node->n_children, node->bp); + + // Set the child blocknums. + for (int i = 0; i < node->n_children; ++i) { + BP_BLOCKNUM(node, i) = rbuf_blocknum(rb); // 18. blocknums + BP_WORKDONE(node, i) = 0; + } + + // Read in the child buffer maps. + for (int i = 0; i < node->n_children; ++i) { + // The following fields were previously used by the `sub_block_map' + // They include: + // - 4 byte index + (void) rbuf_int(rb); + // - 4 byte offset + (void) rbuf_int(rb); + // - 4 byte size + (void) rbuf_int(rb); + } + + // We need to setup this node's partitions, but we can't call the + // existing call (setup_ftnode_paritions.) because there are + // existing optimizations that would prevent us from bringing all + // of this node's partitions into memory. Instead, We use the + // existing bfe and node to set the bfe's child_to_search member. + // Then we create a temporary bfe that needs all the nodes to make + // sure we properly intitialize our partitions before filling them + // in from our soon-to-be-upgraded node. + update_bfe_using_ftnode(node, bfe); + ftnode_fetch_extra temp_bfe; + temp_bfe.create_for_full_read(nullptr); + setup_partitions_using_bfe(node, &temp_bfe, true); + + // Cache the highest MSN generated for the message buffers. This + // will be set in the ftnode. + // + // The way we choose MSNs for upgraded messages is delicate. The + // field `highest_unused_msn_for_upgrade' in the header is always an + // MSN that no message has yet. So when we have N messages that need + // MSNs, we decrement it by N, and then use it and the N-1 MSNs less + // than it, but we do not use the value we decremented it to. + // + // In the code below, we initialize `lowest' with the value of + // `highest_unused_msn_for_upgrade' after it is decremented, so we + // need to be sure to increment it once before we enqueue our first + // message. + MSN highest_msn; + highest_msn.msn = 0; + + // Deserialize de-compressed buffers. + for (int i = 0; i < node->n_children; ++i) { + NONLEAF_CHILDINFO bnc = BNC(node, i); + MSN highest_msn_in_this_buffer = deserialize_child_buffer_v13(bfe->ft, bnc, rb); + if (highest_msn.msn == 0) { + highest_msn.msn = highest_msn_in_this_buffer.msn; + } + } + + // Assign the highest msn from our upgrade message buffers + node->max_msn_applied_to_node_on_disk = highest_msn; + // Since we assigned MSNs to this node's messages, we need to dirty it. + node->dirty = 1; + + // Must compute the checksum now (rather than at the end, while we + // still have the pointer to the buffer). + if (version >= FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) { + uint32_t expected_xsum = toku_dtoh32(*(uint32_t*)(rb->buf+rb->size-4)); // 27. checksum + uint32_t actual_xsum = toku_x1764_memory(rb->buf, rb->size-4); + if (expected_xsum != actual_xsum) { + fprintf(stderr, "%s:%d: Bad checksum: expected = %" PRIx32 ", actual= %" PRIx32 "\n", + __FUNCTION__, + __LINE__, + expected_xsum, + actual_xsum); + fprintf(stderr, + "Checksum failure while reading node in file %s.\n", + toku_cachefile_fname_in_env(bfe->ft->cf)); + fflush(stderr); + return toku_db_badformat(); + } + } + + return 0; +} + +// This function takes a deserialized version 13 or 14 buffer and +// constructs the associated leaf ftnode object. +static int +deserialize_and_upgrade_leaf_node(FTNODE node, + struct rbuf *rb, + ftnode_fetch_extra *bfe, + STAT64INFO info) +{ + int r = 0; + int version = node->layout_version_read_from_disk; + + // This is a leaf node, so the offsets in the buffer will be + // different from the internal node offsets above. + uint64_t nkeys = rbuf_ulonglong(rb); // 10. nkeys + uint64_t ndata = rbuf_ulonglong(rb); // 11. ndata + uint64_t dsize = rbuf_ulonglong(rb); // 12. dsize + invariant(nkeys == ndata); + if (info) { + // info is non-null if we're trying to upgrade old subtree + // estimates to stat64info + info->numrows += nkeys; + info->numbytes += dsize; + } + + // This is the optimized for upgrade field. + if (version == FT_LAYOUT_VERSION_14) { + (void) rbuf_int(rb); // 13. optimized + } + + // npartitions - This is really the number of leaf entries in + // our single basement node. There should only be 1 (ONE) + // partition, so there shouldn't be any pivot key stored. This + // means the loop will not iterate. We could remove the loop and + // assert that the value is indeed 1. + int npartitions = rbuf_int(rb); // 14. npartitions + assert(npartitions == 1); + + // Set number of children to 1, since we will only have one + // basement node. + node->n_children = 1; + XMALLOC_N(node->n_children, node->bp); + node->pivotkeys.create_empty(); + + // Create one basement node to contain all the leaf entries by + // setting up the single partition and updating the bfe. + update_bfe_using_ftnode(node, bfe); + ftnode_fetch_extra temp_bfe; + temp_bfe.create_for_full_read(bfe->ft); + setup_partitions_using_bfe(node, &temp_bfe, true); + + // 11. Deserialize the partition maps, though they are not used in the + // newer versions of ftnodes. + for (int i = 0; i < node->n_children; ++i) { + // The following fields were previously used by the `sub_block_map' + // They include: + // - 4 byte index + (void) rbuf_int(rb); + // - 4 byte offset + (void) rbuf_int(rb); + // - 4 byte size + (void) rbuf_int(rb); + } + + // Copy all of the leaf entries into the single basement node. + + // The number of leaf entries in buffer. + int n_in_buf = rbuf_int(rb); // 15. # of leaves + BLB_SEQINSERT(node,0) = 0; + BASEMENTNODE bn = BLB(node, 0); + + // Read the leaf entries from the buffer, advancing the buffer + // as we go. + bool has_end_to_end_checksum = (version >= FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM); + if (version <= FT_LAYOUT_VERSION_13) { + // Create our mempool. + // Loop through + for (int i = 0; i < n_in_buf; ++i) { + LEAFENTRY_13 le = reinterpret_cast(&rb->buf[rb->ndone]); + uint32_t disksize = leafentry_disksize_13(le); + rb->ndone += disksize; // 16. leaf entry (13) + invariant(rb->ndone<=rb->size); + LEAFENTRY new_le; + size_t new_le_size; + void* key = NULL; + uint32_t keylen = 0; + r = toku_le_upgrade_13_14(le, + &key, + &keylen, + &new_le_size, + &new_le); + assert_zero(r); + // Copy the pointer value straight into the OMT + LEAFENTRY new_le_in_bn = nullptr; + void *maybe_free; + bn->data_buffer.get_space_for_insert( + i, + key, + keylen, + new_le_size, + &new_le_in_bn, + &maybe_free + ); + if (maybe_free) { + toku_free(maybe_free); + } + memcpy(new_le_in_bn, new_le, new_le_size); + toku_free(new_le); + } + } else { + uint32_t data_size = rb->size - rb->ndone; + if (has_end_to_end_checksum) { + data_size -= sizeof(uint32_t); + } + bn->data_buffer.deserialize_from_rbuf(n_in_buf, rb, data_size, node->layout_version_read_from_disk); + } + + // Whatever this is must be less than the MSNs of every message above + // it, so it's ok to take it here. + bn->max_msn_applied = bfe->ft->h->highest_unused_msn_for_upgrade; + bn->stale_ancestor_messages_applied = false; + node->max_msn_applied_to_node_on_disk = bn->max_msn_applied; + + // Checksum (end to end) is only on version 14 + if (has_end_to_end_checksum) { + uint32_t expected_xsum = rbuf_int(rb); // 17. checksum + uint32_t actual_xsum = toku_x1764_memory(rb->buf, rb->size - 4); + if (expected_xsum != actual_xsum) { + fprintf(stderr, "%s:%d: Bad checksum: expected = %" PRIx32 ", actual= %" PRIx32 "\n", + __FUNCTION__, + __LINE__, + expected_xsum, + actual_xsum); + fprintf(stderr, + "Checksum failure while reading node in file %s.\n", + toku_cachefile_fname_in_env(bfe->ft->cf)); + fflush(stderr); + return toku_db_badformat(); + } + } + + // We should have read the whole block by this point. + if (rb->ndone != rb->size) { + // TODO: Error handling. + return 1; + } + + return r; +} + +static int +read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, + DISKOFF offset, DISKOFF size, + FT ft, + struct rbuf *rb, + /* out */ int *layout_version_p); + +// This function upgrades a version 14 or 13 ftnode to the current +// verison. NOTE: This code assumes the first field of the rbuf has +// already been read from the buffer (namely the layout_version of the +// ftnode.) +static int +deserialize_and_upgrade_ftnode(FTNODE node, + FTNODE_DISK_DATA* ndd, + BLOCKNUM blocknum, + ftnode_fetch_extra *bfe, + STAT64INFO info, + int fd) +{ + int r = 0; + int version; + + // I. First we need to de-compress the entire node, only then can + // we read the different sub-sections. + // get the file offset and block size for the block + DISKOFF offset, size; + bfe->ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); + + struct rbuf rb; + r = read_and_decompress_block_from_fd_into_rbuf(fd, + blocknum, + offset, + size, + bfe->ft, + &rb, + &version); + if (r != 0) { + goto exit; + } + + // Re-read the magic field from the previous call, since we are + // restarting with a fresh rbuf. + { + const void *magic; + rbuf_literal_bytes(&rb, &magic, 8); // 1. magic + } + + // II. Start reading ftnode fields out of the decompressed buffer. + + // Copy over old version info. + node->layout_version_read_from_disk = rbuf_int(&rb); // 2. layout version + version = node->layout_version_read_from_disk; + assert(version <= FT_LAYOUT_VERSION_14); + // Upgrade the current version number to the current version. + node->layout_version = FT_LAYOUT_VERSION; + + node->layout_version_original = rbuf_int(&rb); // 3. original layout + node->build_id = rbuf_int(&rb); // 4. build id + + // The remaining offsets into the rbuf do not map to the current + // version, so we need to fill in the blanks and ignore older + // fields. + (void)rbuf_int(&rb); // 5. nodesize + node->flags = rbuf_int(&rb); // 6. flags + node->height = rbuf_int(&rb); // 7. height + + // If the version is less than 14, there are two extra ints here. + // we would need to ignore them if they are there. + // These are the 'fingerprints'. + if (version == FT_LAYOUT_VERSION_13) { + (void) rbuf_int(&rb); // 8. rand4 + (void) rbuf_int(&rb); // 9. local + } + + // The next offsets are dependent on whether this is a leaf node + // or not. + + // III. Read in Leaf and Internal Node specific data. + + // Check height to determine whether this is a leaf node or not. + if (node->height > 0) { + r = deserialize_and_upgrade_internal_node(node, &rb, bfe, info); + } else { + r = deserialize_and_upgrade_leaf_node(node, &rb, bfe, info); + } + + XMALLOC_N(node->n_children, *ndd); + // Initialize the partition locations to zero, because version 14 + // and below have no notion of partitions on disk. + for (int i=0; in_children; i++) { + BP_START(*ndd,i) = 0; + BP_SIZE (*ndd,i) = 0; + } + + toku_free(rb.buf); +exit: + return r; +} + +static int +deserialize_ftnode_from_rbuf( + FTNODE *ftnode, + FTNODE_DISK_DATA* ndd, + BLOCKNUM blocknum, + uint32_t fullhash, + ftnode_fetch_extra *bfe, + STAT64INFO info, + struct rbuf *rb, + int fd + ) +// Effect: deserializes a ftnode that is in rb (with pointer of rb just past the magic) into a FTNODE. +{ + int r = 0; + struct sub_block sb_node_info; + + tokutime_t t0, t1; + tokutime_t decompress_time = 0; + tokutime_t deserialize_time = 0; + + t0 = toku_time_now(); + + FTNODE node = alloc_ftnode_for_deserialize(fullhash, blocknum); + + // now start reading from rbuf + // first thing we do is read the header information + const void *magic; + rbuf_literal_bytes(rb, &magic, 8); + if (memcmp(magic, "tokuleaf", 8)!=0 && + memcmp(magic, "tokunode", 8)!=0) { + r = toku_db_badformat(); + goto cleanup; + } + + node->layout_version_read_from_disk = rbuf_int(rb); + lazy_assert(node->layout_version_read_from_disk >= FT_LAYOUT_MIN_SUPPORTED_VERSION); + + // Check if we are reading in an older node version. + if (node->layout_version_read_from_disk <= FT_LAYOUT_VERSION_14) { + int version = node->layout_version_read_from_disk; + // Perform the upgrade. + r = deserialize_and_upgrade_ftnode(node, ndd, blocknum, bfe, info, fd); + if (r != 0) { + goto cleanup; + } + + if (version <= FT_LAYOUT_VERSION_13) { + // deprecate 'TOKU_DB_VALCMP_BUILTIN'. just remove the flag + node->flags &= ~TOKU_DB_VALCMP_BUILTIN_13; + } + + // If everything is ok, just re-assign the ftnode and retrn. + *ftnode = node; + r = 0; + goto cleanup; + } + + // Upgrade versions after 14 to current. This upgrade is trivial, it + // removes the optimized for upgrade field, which has already been + // removed in the deserialization code (see + // deserialize_ftnode_info()). + node->layout_version = FT_LAYOUT_VERSION; + node->layout_version_original = rbuf_int(rb); + node->build_id = rbuf_int(rb); + node->n_children = rbuf_int(rb); + XMALLOC_N(node->n_children, node->bp); + XMALLOC_N(node->n_children, *ndd); + // read the partition locations + for (int i=0; in_children; i++) { + BP_START(*ndd,i) = rbuf_int(rb); + BP_SIZE (*ndd,i) = rbuf_int(rb); + } + // verify checksum of header stored + uint32_t checksum; + checksum = toku_x1764_memory(rb->buf, rb->ndone); + uint32_t stored_checksum; + stored_checksum = rbuf_int(rb); + if (stored_checksum != checksum) { + dump_bad_block(rb->buf, rb->size); + invariant(stored_checksum == checksum); + } + + // now we read and decompress the pivot and child information + sub_block_init(&sb_node_info); + { + tokutime_t sb_decompress_t0 = toku_time_now(); + r = read_and_decompress_sub_block(rb, &sb_node_info); + tokutime_t sb_decompress_t1 = toku_time_now(); + decompress_time += sb_decompress_t1 - sb_decompress_t0; + } + if (r != 0) { + goto cleanup; + } + + // at this point, sb->uncompressed_ptr stores the serialized node info + r = deserialize_ftnode_info(&sb_node_info, node); + if (r != 0) { + goto cleanup; + } + toku_free(sb_node_info.uncompressed_ptr); + + // now that the node info has been deserialized, we can proceed to deserialize + // the individual sub blocks + + // setup the memory of the partitions + // for partitions being decompressed, create either message buffer or basement node + // for partitions staying compressed, create sub_block + setup_ftnode_partitions(node, bfe, true); + + // This loop is parallelizeable, since we don't have a dependency on the work done so far. + for (int i = 0; i < node->n_children; i++) { + uint32_t curr_offset = BP_START(*ndd,i); + uint32_t curr_size = BP_SIZE(*ndd,i); + // the compressed, serialized partitions start at where rb is currently pointing, + // which would be rb->buf + rb->ndone + // we need to intialize curr_rbuf to point to this place + struct rbuf curr_rbuf = {.buf = NULL, .size = 0, .ndone = 0}; + rbuf_init(&curr_rbuf, rb->buf + curr_offset, curr_size); + + // + // now we are at the point where we have: + // - read the entire compressed node off of disk, + // - decompressed the pivot and offset information, + // - have arrived at the individual partitions. + // + // Based on the information in bfe, we want to decompress a subset of + // of the compressed partitions (also possibly none or possibly all) + // The partitions that we want to decompress and make available + // to the node, we do, the rest we simply copy in compressed + // form into the node, and set the state of the partition to PT_COMPRESSED + // + + struct sub_block curr_sb; + sub_block_init(&curr_sb); + + // curr_rbuf is passed by value to decompress_and_deserialize_worker, so there's no ugly race condition. + // This would be more obvious if curr_rbuf were an array. + + // deserialize_ftnode_info figures out what the state + // should be and sets up the memory so that we are ready to use it + + switch (BP_STATE(node,i)) { + case PT_AVAIL: { + // case where we read and decompress the partition + tokutime_t partition_decompress_time; + r = decompress_and_deserialize_worker(curr_rbuf, curr_sb, node, i, + bfe->ft->cmp, &partition_decompress_time); + decompress_time += partition_decompress_time; + if (r != 0) { + goto cleanup; + } + break; + } + case PT_COMPRESSED: + // case where we leave the partition in the compressed state + r = check_and_copy_compressed_sub_block_worker(curr_rbuf, curr_sb, node, i); + if (r != 0) { + goto cleanup; + } + break; + case PT_INVALID: // this is really bad + case PT_ON_DISK: // it's supposed to be in memory. + abort(); + } + } + *ftnode = node; + r = 0; + +cleanup: + if (r == 0) { + t1 = toku_time_now(); + deserialize_time = (t1 - t0) - decompress_time; + bfe->deserialize_time += deserialize_time; + bfe->decompress_time += decompress_time; + toku_ft_status_update_deserialize_times(node, deserialize_time, decompress_time); + } + if (r != 0) { + // NOTE: Right now, callers higher in the stack will assert on + // failure, so this is OK for production. However, if we + // create tools that use this function to search for errors in + // the FT, then we will leak memory. + if (node) { + toku_free(node); + } + } + return r; +} + +int +toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, ftnode_fetch_extra *bfe) { + int r = 0; + assert(BP_STATE(node,childnum) == PT_ON_DISK); + assert(node->bp[childnum].ptr.tag == BCT_NULL); + + // + // setup the partition + // + setup_available_ftnode_partition(node, childnum); + BP_STATE(node,childnum) = PT_AVAIL; + + // + // read off disk and make available in memory + // + // get the file offset and block size for the block + DISKOFF node_offset, total_node_disk_size; + bfe->ft->blocktable.translate_blocknum_to_offset_size(node->blocknum, &node_offset, &total_node_disk_size); + + uint32_t curr_offset = BP_START(ndd, childnum); + uint32_t curr_size = BP_SIZE (ndd, childnum); + + struct rbuf rb; + rbuf_init(&rb, nullptr, 0); + + uint32_t pad_at_beginning = (node_offset+curr_offset)%512; + uint32_t padded_size = roundup_to_multiple(512, pad_at_beginning + curr_size); + + toku::scoped_malloc_aligned raw_block_buf(padded_size, 512); + uint8_t *raw_block = reinterpret_cast(raw_block_buf.get()); + rbuf_init(&rb, pad_at_beginning+raw_block, curr_size); + tokutime_t t0 = toku_time_now(); + + // read the block + assert(0==((unsigned long long)raw_block)%512); // for O_DIRECT + assert(0==(padded_size)%512); + assert(0==(node_offset+curr_offset-pad_at_beginning)%512); + ssize_t rlen = toku_os_pread(fd, raw_block, padded_size, node_offset+curr_offset-pad_at_beginning); + assert((DISKOFF)rlen >= pad_at_beginning + curr_size); // we read in at least enough to get what we wanted + assert((DISKOFF)rlen <= padded_size); // we didn't read in too much. + + tokutime_t t1 = toku_time_now(); + + // read sub block + struct sub_block curr_sb; + sub_block_init(&curr_sb); + r = read_compressed_sub_block(&rb, &curr_sb); + if (r != 0) { + return r; + } + invariant(curr_sb.compressed_ptr != NULL); + + // decompress + toku::scoped_malloc uncompressed_buf(curr_sb.uncompressed_size); + curr_sb.uncompressed_ptr = uncompressed_buf.get(); + toku_decompress((Bytef *) curr_sb.uncompressed_ptr, curr_sb.uncompressed_size, + (Bytef *) curr_sb.compressed_ptr, curr_sb.compressed_size); + + // deserialize + tokutime_t t2 = toku_time_now(); + + r = deserialize_ftnode_partition(&curr_sb, node, childnum, bfe->ft->cmp); + + tokutime_t t3 = toku_time_now(); + + // capture stats + tokutime_t io_time = t1 - t0; + tokutime_t decompress_time = t2 - t1; + tokutime_t deserialize_time = t3 - t2; + bfe->deserialize_time += deserialize_time; + bfe->decompress_time += decompress_time; + toku_ft_status_update_deserialize_times(node, deserialize_time, decompress_time); + + bfe->bytes_read = rlen; + bfe->io_time = io_time; + + return r; +} + +// Take a ftnode partition that is in the compressed state, and make it avail +int +toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe) { + int r = 0; + assert(BP_STATE(node, childnum) == PT_COMPRESSED); + SUB_BLOCK curr_sb = BSB(node, childnum); + + toku::scoped_malloc uncompressed_buf(curr_sb->uncompressed_size); + assert(curr_sb->uncompressed_ptr == NULL); + curr_sb->uncompressed_ptr = uncompressed_buf.get(); + + setup_available_ftnode_partition(node, childnum); + BP_STATE(node,childnum) = PT_AVAIL; + + // decompress the sub_block + tokutime_t t0 = toku_time_now(); + + toku_decompress( + (Bytef *) curr_sb->uncompressed_ptr, + curr_sb->uncompressed_size, + (Bytef *) curr_sb->compressed_ptr, + curr_sb->compressed_size + ); + + tokutime_t t1 = toku_time_now(); + + r = deserialize_ftnode_partition(curr_sb, node, childnum, bfe->ft->cmp); + + tokutime_t t2 = toku_time_now(); + + tokutime_t decompress_time = t1 - t0; + tokutime_t deserialize_time = t2 - t1; + bfe->deserialize_time += deserialize_time; + bfe->decompress_time += decompress_time; + toku_ft_status_update_deserialize_times(node, deserialize_time, decompress_time); + + toku_free(curr_sb->compressed_ptr); + toku_free(curr_sb); + return r; +} + +static int +deserialize_ftnode_from_fd(int fd, + BLOCKNUM blocknum, + uint32_t fullhash, + FTNODE *ftnode, + FTNODE_DISK_DATA *ndd, + ftnode_fetch_extra *bfe, + STAT64INFO info) +{ + struct rbuf rb = RBUF_INITIALIZER; + + tokutime_t t0 = toku_time_now(); + read_block_from_fd_into_rbuf(fd, blocknum, bfe->ft, &rb); + tokutime_t t1 = toku_time_now(); + + // Decompress and deserialize the ftnode. Time statistics + // are taken inside this function. + int r = deserialize_ftnode_from_rbuf(ftnode, ndd, blocknum, fullhash, bfe, info, &rb, fd); + if (r != 0) { + dump_bad_block(rb.buf,rb.size); + } + + bfe->bytes_read = rb.size; + bfe->io_time = t1 - t0; + toku_free(rb.buf); + return r; +} + +// Read ftnode from file into struct. Perform version upgrade if necessary. +int +toku_deserialize_ftnode_from (int fd, + BLOCKNUM blocknum, + uint32_t fullhash, + FTNODE *ftnode, + FTNODE_DISK_DATA* ndd, + ftnode_fetch_extra *bfe + ) +// Effect: Read a node in. If possible, read just the header. +{ + int r = 0; + struct rbuf rb = RBUF_INITIALIZER; + + // each function below takes the appropriate io/decompression/deserialize statistics + + if (!bfe->read_all_partitions) { + read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->ft, &rb, bfe); + r = deserialize_ftnode_header_from_rbuf_if_small_enough(ftnode, ndd, blocknum, fullhash, bfe, &rb, fd); + } else { + // force us to do it the old way + r = -1; + } + if (r != 0) { + // Something went wrong, go back to doing it the old way. + r = deserialize_ftnode_from_fd(fd, blocknum, fullhash, ftnode, ndd, bfe, NULL); + } + + toku_free(rb.buf); + return r; +} + +void +toku_verify_or_set_counts(FTNODE UU(node)) { +} + +int +toku_db_badformat(void) { + return DB_BADFORMAT; +} + +static size_t +serialize_rollback_log_size(ROLLBACK_LOG_NODE log) { + size_t size = node_header_overhead //8 "tokuroll", 4 version, 4 version_original, 4 build_id + +16 //TXNID_PAIR + +8 //sequence + +8 //blocknum + +8 //previous (blocknum) + +8 //resident_bytecount + +8 //memarena size + +log->rollentry_resident_bytecount; + return size; +} + +static void +serialize_rollback_log_node_to_buf(ROLLBACK_LOG_NODE log, char *buf, size_t calculated_size, int UU(n_sub_blocks), struct sub_block UU(sub_block[])) { + struct wbuf wb; + wbuf_init(&wb, buf, calculated_size); + { //Serialize rollback log to local wbuf + wbuf_nocrc_literal_bytes(&wb, "tokuroll", 8); + lazy_assert(log->layout_version == FT_LAYOUT_VERSION); + wbuf_nocrc_int(&wb, log->layout_version); + wbuf_nocrc_int(&wb, log->layout_version_original); + wbuf_nocrc_uint(&wb, BUILD_ID); + wbuf_nocrc_TXNID_PAIR(&wb, log->txnid); + wbuf_nocrc_ulonglong(&wb, log->sequence); + wbuf_nocrc_BLOCKNUM(&wb, log->blocknum); + wbuf_nocrc_BLOCKNUM(&wb, log->previous); + wbuf_nocrc_ulonglong(&wb, log->rollentry_resident_bytecount); + //Write down memarena size needed to restore + wbuf_nocrc_ulonglong(&wb, log->rollentry_arena.total_size_in_use()); + + { + //Store rollback logs + struct roll_entry *item; + size_t done_before = wb.ndone; + for (item = log->newest_logentry; item; item = item->prev) { + toku_logger_rollback_wbuf_nocrc_write(&wb, item); + } + lazy_assert(done_before + log->rollentry_resident_bytecount == wb.ndone); + } + } + lazy_assert(wb.ndone == wb.size); + lazy_assert(calculated_size==wb.ndone); +} + +static void +serialize_uncompressed_block_to_memory(char * uncompressed_buf, + int n_sub_blocks, + struct sub_block sub_block[/*n_sub_blocks*/], + enum toku_compression_method method, + /*out*/ size_t *n_bytes_to_write, + /*out*/ char **bytes_to_write) +// Guarantees that the malloc'd BYTES_TO_WRITE is 512-byte aligned (so that O_DIRECT will work) +{ + // allocate space for the compressed uncompressed_buf + size_t compressed_len = get_sum_compressed_size_bound(n_sub_blocks, sub_block, method); + size_t sub_block_header_len = sub_block_header_size(n_sub_blocks); + size_t header_len = node_header_overhead + sub_block_header_len + sizeof (uint32_t); // node + sub_block + checksum + char *XMALLOC_N_ALIGNED(512, roundup_to_multiple(512, header_len + compressed_len), compressed_buf); + + // copy the header + memcpy(compressed_buf, uncompressed_buf, node_header_overhead); + if (0) printf("First 4 bytes before compressing data are %02x%02x%02x%02x\n", + uncompressed_buf[node_header_overhead], uncompressed_buf[node_header_overhead+1], + uncompressed_buf[node_header_overhead+2], uncompressed_buf[node_header_overhead+3]); + + // compress all of the sub blocks + char *uncompressed_ptr = uncompressed_buf + node_header_overhead; + char *compressed_ptr = compressed_buf + header_len; + compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block, uncompressed_ptr, compressed_ptr, num_cores, ft_pool, method); + + //if (0) printf("Block %" PRId64 " Size before compressing %u, after compression %" PRIu64 "\n", blocknum.b, calculated_size-node_header_overhead, (uint64_t) compressed_len); + + // serialize the sub block header + uint32_t *ptr = (uint32_t *)(compressed_buf + node_header_overhead); + *ptr++ = toku_htod32(n_sub_blocks); + for (int i=0; ilen = calculated_size; + serialized->n_sub_blocks = 0; + // choose sub block parameters + int sub_block_size = 0; + size_t data_size = calculated_size - node_header_overhead; + choose_sub_block_size(data_size, max_sub_blocks, &sub_block_size, &serialized->n_sub_blocks); + lazy_assert(0 < serialized->n_sub_blocks && serialized->n_sub_blocks <= max_sub_blocks); + lazy_assert(sub_block_size > 0); + + // set the initial sub block size for all of the sub blocks + for (int i = 0; i < serialized->n_sub_blocks; i++) + sub_block_init(&serialized->sub_block[i]); + set_all_sub_block_sizes(data_size, sub_block_size, serialized->n_sub_blocks, serialized->sub_block); + + // allocate space for the serialized node + XMALLOC_N(calculated_size, serialized->data); + // serialize the node into buf + serialize_rollback_log_node_to_buf(log, serialized->data, calculated_size, serialized->n_sub_blocks, serialized->sub_block); + serialized->blocknum = log->blocknum; +} + +int +toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized, + FT ft, bool for_checkpoint) { + size_t n_to_write; + char *compressed_buf; + struct serialized_rollback_log_node serialized_local; + + if (is_serialized) { + invariant_null(log); + } else { + invariant_null(serialized_log); + serialized_log = &serialized_local; + toku_serialize_rollback_log_to_memory_uncompressed(log, serialized_log); + } + + BLOCKNUM blocknum = serialized_log->blocknum; + invariant(blocknum.b >= 0); + + // Compress and malloc buffer to write + serialize_uncompressed_block_to_memory(serialized_log->data, + serialized_log->n_sub_blocks, + serialized_log->sub_block, + ft->h->compression_method, + &n_to_write, &compressed_buf); + + // Dirties the ft + DISKOFF offset; + ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset, + ft, fd, for_checkpoint, + // We consider rollback log flushing the hottest possible allocation, + // since rollback logs are short-lived compared to FT nodes. + INT_MAX); + + toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); + toku_free(compressed_buf); + if (!is_serialized) { + toku_static_serialized_rollback_log_destroy(&serialized_local); + log->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction. + } + return 0; +} + +static int +deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p, struct rbuf *rb) { + ROLLBACK_LOG_NODE MALLOC(result); + int r; + if (result==NULL) { + r=get_error_errno(); + if (0) { died0: toku_free(result); } + return r; + } + + const void *magic; + rbuf_literal_bytes(rb, &magic, 8); + lazy_assert(!memcmp(magic, "tokuroll", 8)); + + result->layout_version = rbuf_int(rb); + lazy_assert((FT_LAYOUT_VERSION_25 <= result->layout_version && result->layout_version <= FT_LAYOUT_VERSION_27) || + (result->layout_version == FT_LAYOUT_VERSION)); + result->layout_version_original = rbuf_int(rb); + result->layout_version_read_from_disk = result->layout_version; + result->build_id = rbuf_int(rb); + result->dirty = false; + //TODO: Maybe add descriptor (or just descriptor version) here eventually? + //TODO: This is hard.. everything is shared in a single dictionary. + rbuf_TXNID_PAIR(rb, &result->txnid); + result->sequence = rbuf_ulonglong(rb); + result->blocknum = rbuf_blocknum(rb); + if (result->blocknum.b != blocknum.b) { + r = toku_db_badformat(); + goto died0; + } + result->previous = rbuf_blocknum(rb); + result->rollentry_resident_bytecount = rbuf_ulonglong(rb); + + size_t arena_initial_size = rbuf_ulonglong(rb); + result->rollentry_arena.create(arena_initial_size); + if (0) { died1: result->rollentry_arena.destroy(); goto died0; } + + //Load rollback entries + lazy_assert(rb->size > 4); + //Start with empty list + result->oldest_logentry = result->newest_logentry = NULL; + while (rb->ndone < rb->size) { + struct roll_entry *item; + uint32_t rollback_fsize = rbuf_int(rb); //Already read 4. Rest is 4 smaller + const void *item_vec; + rbuf_literal_bytes(rb, &item_vec, rollback_fsize-4); + unsigned char* item_buf = (unsigned char*)item_vec; + r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, &result->rollentry_arena); + if (r!=0) { + r = toku_db_badformat(); + goto died1; + } + //Add to head of list + if (result->oldest_logentry) { + result->oldest_logentry->prev = item; + result->oldest_logentry = item; + item->prev = NULL; + } + else { + result->oldest_logentry = result->newest_logentry = item; + item->prev = NULL; + } + } + + toku_free(rb->buf); + rb->buf = NULL; + *log_p = result; + return 0; +} + +static int +deserialize_rollback_log_from_rbuf_versioned (uint32_t version, BLOCKNUM blocknum, + ROLLBACK_LOG_NODE *log, + struct rbuf *rb) { + int r = 0; + ROLLBACK_LOG_NODE rollback_log_node = NULL; + invariant((FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) || version == FT_LAYOUT_VERSION); + r = deserialize_rollback_log_from_rbuf(blocknum, &rollback_log_node, rb); + if (r==0) { + *log = rollback_log_node; + } + return r; +} + +int +decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) { + int r = 0; + // get the number of compressed sub blocks + int n_sub_blocks; + n_sub_blocks = toku_dtoh32(*(uint32_t*)(&raw_block[node_header_overhead])); + + // verify the number of sub blocks + invariant(0 <= n_sub_blocks); + invariant(n_sub_blocks <= max_sub_blocks); + + { // verify the header checksum + uint32_t header_length = node_header_overhead + sub_block_header_size(n_sub_blocks); + invariant(header_length <= raw_block_size); + uint32_t xsum = toku_x1764_memory(raw_block, header_length); + uint32_t stored_xsum = toku_dtoh32(*(uint32_t *)(raw_block + header_length)); + if (xsum != stored_xsum) { + r = TOKUDB_BAD_CHECKSUM; + } + } + + // deserialize the sub block header + struct sub_block sub_block[n_sub_blocks]; + uint32_t *sub_block_header = (uint32_t *) &raw_block[node_header_overhead+4]; + for (int i = 0; i < n_sub_blocks; i++) { + sub_block_init(&sub_block[i]); + sub_block[i].compressed_size = toku_dtoh32(sub_block_header[0]); + sub_block[i].uncompressed_size = toku_dtoh32(sub_block_header[1]); + sub_block[i].xsum = toku_dtoh32(sub_block_header[2]); + sub_block_header += 3; + } + + // This predicate needs to be here and instead of where it is set + // for the compiler. + if (r == TOKUDB_BAD_CHECKSUM) { + goto exit; + } + + // verify sub block sizes + for (int i = 0; i < n_sub_blocks; i++) { + uint32_t compressed_size = sub_block[i].compressed_size; + if (compressed_size<=0 || compressed_size>(1<<30)) { + r = toku_db_badformat(); + goto exit; + } + + uint32_t uncompressed_size = sub_block[i].uncompressed_size; + if (0) printf("Block %" PRId64 " Compressed size = %u, uncompressed size=%u\n", blocknum.b, compressed_size, uncompressed_size); + if (uncompressed_size<=0 || uncompressed_size>(1<<30)) { + r = toku_db_badformat(); + goto exit; + } + } + + // sum up the uncompressed size of the sub blocks + size_t uncompressed_size; + uncompressed_size = get_sum_uncompressed_size(n_sub_blocks, sub_block); + + // allocate the uncompressed buffer + size_t size; + size = node_header_overhead + uncompressed_size; + unsigned char *buf; + XMALLOC_N(size, buf); + rbuf_init(rb, buf, size); + + // copy the uncompressed node header to the uncompressed buffer + memcpy(rb->buf, raw_block, node_header_overhead); + + // point at the start of the compressed data (past the node header, the sub block header, and the header checksum) + unsigned char *compressed_data; + compressed_data = raw_block + node_header_overhead + sub_block_header_size(n_sub_blocks) + sizeof (uint32_t); + + // point at the start of the uncompressed data + unsigned char *uncompressed_data; + uncompressed_data = rb->buf + node_header_overhead; + + // decompress all the compressed sub blocks into the uncompressed buffer + r = decompress_all_sub_blocks(n_sub_blocks, sub_block, compressed_data, uncompressed_data, num_cores, ft_pool); + if (r != 0) { + fprintf(stderr, "%s:%d block %" PRId64 " failed %d at %p size %lu\n", __FUNCTION__, __LINE__, blocknum.b, r, raw_block, raw_block_size); + dump_bad_block(raw_block, raw_block_size); + goto exit; + } + + rb->ndone=0; +exit: + return r; +} + +static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) { + // This function exists solely to accomodate future changes in compression. + int r = 0; + if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) || + (FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) || + version == FT_LAYOUT_VERSION) { + r = decompress_from_raw_block_into_rbuf(raw_block, raw_block_size, rb, blocknum); + } else { + abort(); + } + return r; +} + +static int +read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, + DISKOFF offset, DISKOFF size, + FT ft, + struct rbuf *rb, + /* out */ int *layout_version_p) { + int r = 0; + if (0) printf("Deserializing Block %" PRId64 "\n", blocknum.b); + + DISKOFF size_aligned = roundup_to_multiple(512, size); + uint8_t *XMALLOC_N_ALIGNED(512, size_aligned, raw_block); + { + // read the (partially compressed) block + ssize_t rlen = toku_os_pread(fd, raw_block, size_aligned, offset); + lazy_assert((DISKOFF)rlen >= size); + lazy_assert((DISKOFF)rlen <= size_aligned); + } + // get the layout_version + int layout_version; + { + uint8_t *magic = raw_block + uncompressed_magic_offset; + if (memcmp(magic, "tokuleaf", 8)!=0 && + memcmp(magic, "tokunode", 8)!=0 && + memcmp(magic, "tokuroll", 8)!=0) { + r = toku_db_badformat(); + goto cleanup; + } + uint8_t *version = raw_block + uncompressed_version_offset; + layout_version = toku_dtoh32(*(uint32_t*)version); + if (layout_version < FT_LAYOUT_MIN_SUPPORTED_VERSION || layout_version > FT_LAYOUT_VERSION) { + r = toku_db_badformat(); + goto cleanup; + } + } + + r = decompress_from_raw_block_into_rbuf_versioned(layout_version, raw_block, size, rb, blocknum); + if (r != 0) { + // We either failed the checksome, or there is a bad format in + // the buffer. + if (r == TOKUDB_BAD_CHECKSUM) { + fprintf(stderr, + "Checksum failure while reading raw block in file %s.\n", + toku_cachefile_fname_in_env(ft->cf)); + abort(); + } else { + r = toku_db_badformat(); + goto cleanup; + } + } + + *layout_version_p = layout_version; +cleanup: + if (r!=0) { + if (rb->buf) toku_free(rb->buf); + rb->buf = NULL; + } + if (raw_block) { + toku_free(raw_block); + } + return r; +} + +// Read rollback log node from file into struct. +// Perform version upgrade if necessary. +int toku_deserialize_rollback_log_from(int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft) { + int layout_version = 0; + int r; + + struct rbuf rb; + rbuf_init(&rb, nullptr, 0); + + // get the file offset and block size for the block + DISKOFF offset, size; + ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); + + // if the size is 0, then the blocknum is unused + if (size == 0) { + // blocknum is unused, just create an empty one and get out + ROLLBACK_LOG_NODE XMALLOC(log); + rollback_empty_log_init(log); + log->blocknum.b = blocknum.b; + r = 0; + *logp = log; + goto cleanup; + } + + r = read_and_decompress_block_from_fd_into_rbuf(fd, blocknum, offset, size, ft, &rb, &layout_version); + if (r!=0) goto cleanup; + + { + uint8_t *magic = rb.buf + uncompressed_magic_offset; + if (memcmp(magic, "tokuroll", 8)!=0) { + r = toku_db_badformat(); + goto cleanup; + } + } + + r = deserialize_rollback_log_from_rbuf_versioned(layout_version, blocknum, logp, &rb); + +cleanup: + if (rb.buf) { + toku_free(rb.buf); + } + return r; +} + +int +toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) +{ + int r = 0; + // 15 was the last version with subtree estimates + invariant(ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_15); + + FTNODE unused_node = NULL; + FTNODE_DISK_DATA unused_ndd = NULL; + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft); + r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &unused_node, &unused_ndd, + &bfe, &ft->h->on_disk_stats); + ft->in_memory_stats = ft->h->on_disk_stats; + + if (unused_node) { + toku_ftnode_free(&unused_node); + } + if (unused_ndd) { + toku_free(unused_ndd); + } + return r; +} + +int +toku_upgrade_msn_from_root_to_header(int fd, FT ft) +{ + int r; + // 21 was the first version with max_msn_in_ft in the header + invariant(ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_20); + + FTNODE node; + FTNODE_DISK_DATA ndd; + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft); + r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &node, &ndd, &bfe, nullptr); + if (r != 0) { + goto exit; + } + + ft->h->max_msn_in_ft = node->max_msn_applied_to_node_on_disk; + toku_ftnode_free(&node); + toku_free(ndd); + exit: + return r; +} + +#undef UPGRADE_STATUS_VALUE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,142 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#include "ft/ft.h" +#include "ft/node.h" +#include "ft/serialize/sub_block.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/wbuf.h" +#include "ft/serialize/block_table.h" + +unsigned int toku_serialize_ftnode_size(FTNODE node); +int toku_serialize_ftnode_to_memory(FTNODE node, FTNODE_DISK_DATA *ndd, + unsigned int basementnodesize, + enum toku_compression_method compression_method, + bool do_rebalancing, bool in_parallel, + size_t *n_bytes_to_write, size_t *n_uncompressed_bytes, + char **bytes_to_write); +int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA *ndd, bool do_rebalancing, FT ft, bool for_checkpoint); +int toku_serialize_rollback_log_to(int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized, + FT ft, bool for_checkpoint); +void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized); + +int toku_deserialize_rollback_log_from(int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft); +int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, ftnode_fetch_extra *bfe); +int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe); +int toku_deserialize_ftnode_from(int fd, BLOCKNUM off, uint32_t fullhash, FTNODE *node, FTNODE_DISK_DATA *ndd, ftnode_fetch_extra *bfe); + +void toku_serialize_set_parallel(bool); + +// used by nonleaf node partial eviction +void toku_create_compressed_partition_from_available(FTNODE node, int childnum, + enum toku_compression_method compression_method, SUB_BLOCK sb); + +// For verifying old, non-upgraded nodes (versions 13 and 14). +int decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum); + +// used by verify +int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version); +void read_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, FT ft, struct rbuf *rb); +int read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb); +int verify_ftnode_sub_block(struct sub_block *sb); +void just_decompress_sub_block(struct sub_block *sb); + +// used by ft-node-deserialize.cc +void initialize_ftnode(FTNODE node, BLOCKNUM blocknum); +int read_and_check_magic(struct rbuf *rb); +int read_and_check_version(FTNODE node, struct rbuf *rb); +void read_node_info(FTNODE node, struct rbuf *rb, int version); +void allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd); +int check_node_info_checksum(struct rbuf *rb); +void read_legacy_node_info(FTNODE node, struct rbuf *rb, int version); +int check_legacy_end_checksum(struct rbuf *rb); + +// exported so the loader can dump bad blocks +void dump_bad_block(unsigned char *vp, uint64_t size); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft-serialize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft-serialize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft-serialize.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft-serialize.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,858 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/msg.h" +#include "ft/serialize/block_allocator.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/compress.h" +#include "ft/serialize/ft-serialize.h" + +// not version-sensitive because we only serialize a descriptor using the current layout_version +uint32_t +toku_serialize_descriptor_size(DESCRIPTOR desc) { + //Checksum NOT included in this. Checksum only exists in header's version. + uint32_t size = 4; // four bytes for size of descriptor + size += desc->dbt.size; + return size; +} + +static uint32_t +deserialize_descriptor_size(DESCRIPTOR desc, int layout_version) { + //Checksum NOT included in this. Checksum only exists in header's version. + uint32_t size = 4; // four bytes for size of descriptor + if (layout_version == FT_LAYOUT_VERSION_13) + size += 4; // for version 13, include four bytes of "version" + size += desc->dbt.size; + return size; +} + +void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc) { + wbuf_bytes(wb, desc->dbt.data, desc->dbt.size); +} + +//Descriptor is written to disk during toku_ft_handle_open iff we have a new (or changed) +//descriptor. +//Descriptors are NOT written during the header checkpoint process. +void +toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset) { + // make the checksum + int64_t size = toku_serialize_descriptor_size(desc)+4; //4 for checksum + int64_t size_aligned = roundup_to_multiple(512, size); + struct wbuf w; + char *XMALLOC_N_ALIGNED(512, size_aligned, aligned_buf); + for (int64_t i=size; idbt, data, size); +} + +static int +deserialize_descriptor_from(int fd, block_table *bt, DESCRIPTOR desc, int layout_version) { + int r = 0; + DISKOFF offset; + DISKOFF size; + unsigned char *dbuf = nullptr; + bt->get_descriptor_offset_size(&offset, &size); + memset(desc, 0, sizeof(*desc)); + if (size > 0) { + lazy_assert(size>=4); //4 for checksum + { + ssize_t size_to_malloc = roundup_to_multiple(512, size); + XMALLOC_N_ALIGNED(512, size_to_malloc, dbuf); + { + + ssize_t sz_read = toku_os_pread(fd, dbuf, size_to_malloc, offset); + lazy_assert(sz_read==size_to_malloc); + } + { + // check the checksum + uint32_t x1764 = toku_x1764_memory(dbuf, size-4); + //printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, block_translation_address_on_disk, offset, block_translation_size_on_disk); + uint32_t stored_x1764 = toku_dtoh32(*(int*)(dbuf + size-4)); + if (x1764 != stored_x1764) { + fprintf(stderr, "Descriptor checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764); + r = TOKUDB_BAD_CHECKSUM; + toku_free(dbuf); + goto exit; + } + } + + struct rbuf rb = { .buf = dbuf, .size = (unsigned int) size, .ndone = 0 }; + deserialize_descriptor_from_rbuf(&rb, desc, layout_version); + lazy_assert(deserialize_descriptor_size(desc, layout_version) + 4 == size); + toku_free(dbuf); + } + } +exit: + return r; +} + +int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) +// Effect: Deserialize the ft header. +// We deserialize ft_header only once and then share everything with all the FTs. +{ + int r; + FT ft = NULL; + paranoid_invariant(version >= FT_LAYOUT_MIN_SUPPORTED_VERSION); + paranoid_invariant(version <= FT_LAYOUT_VERSION); + // We already know: + // we have an rbuf representing the header. + // The checksum has been validated + + //Verification of initial elements. + //Check magic number + const void *magic; + rbuf_literal_bytes(rb, &magic, 8); + lazy_assert(memcmp(magic,"tokudata",8)==0); + + XCALLOC(ft); + ft->checkpoint_header = NULL; + toku_list_init(&ft->live_ft_handles); + + //version MUST be in network order on disk regardless of disk order + ft->layout_version_read_from_disk = rbuf_network_int(rb); + invariant(ft->layout_version_read_from_disk >= FT_LAYOUT_MIN_SUPPORTED_VERSION); + invariant(ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION); + + //build_id MUST be in network order on disk regardless of disk order + uint32_t build_id; + build_id = rbuf_network_int(rb); + + //Size MUST be in network order regardless of disk order. + uint32_t size; + size = rbuf_network_int(rb); + lazy_assert(size == rb->size); + + const void *tmp_byte_order_check; + lazy_assert((sizeof tmp_byte_order_check) >= 8); + rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order + int64_t byte_order_stored; + byte_order_stored = *(int64_t*)tmp_byte_order_check; + lazy_assert(byte_order_stored == toku_byte_order_host); + + uint64_t checkpoint_count; + checkpoint_count = rbuf_ulonglong(rb); + LSN checkpoint_lsn; + checkpoint_lsn = rbuf_LSN(rb); + unsigned nodesize; + nodesize = rbuf_int(rb); + DISKOFF translation_address_on_disk; + translation_address_on_disk = rbuf_DISKOFF(rb); + DISKOFF translation_size_on_disk; + translation_size_on_disk = rbuf_DISKOFF(rb); + lazy_assert(translation_address_on_disk > 0); + lazy_assert(translation_size_on_disk > 0); + + // initialize the tree lock + toku_ft_init_reflock(ft); + + //Load translation table + { + size_t size_to_read = roundup_to_multiple(512, translation_size_on_disk); + unsigned char *XMALLOC_N_ALIGNED(512, size_to_read, tbuf); + { + // This cast is messed up in 32-bits if the block translation + // table is ever more than 4GB. But in that case, the + // translation table itself won't fit in main memory. + ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read, + translation_address_on_disk); + assert(readsz >= translation_size_on_disk); + assert(readsz <= (ssize_t)size_to_read); + } + // Create table and read in data. + r = ft->blocktable.create_from_buffer(fd, + translation_address_on_disk, + translation_size_on_disk, + tbuf); + toku_free(tbuf); + if (r != 0) { + goto exit; + } + } + + BLOCKNUM root_blocknum; + root_blocknum = rbuf_blocknum(rb); + unsigned flags; + flags = rbuf_int(rb); + if (ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_13) { + // deprecate 'TOKU_DB_VALCMP_BUILTIN'. just remove the flag + flags &= ~TOKU_DB_VALCMP_BUILTIN_13; + } + int layout_version_original; + layout_version_original = rbuf_int(rb); + uint32_t build_id_original; + build_id_original = rbuf_int(rb); + uint64_t time_of_creation; + time_of_creation = rbuf_ulonglong(rb); + uint64_t time_of_last_modification; + time_of_last_modification = rbuf_ulonglong(rb); + + if (ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_18) { + // 17 was the last version with these fields, we no longer store + // them, so read and discard them + (void) rbuf_ulonglong(rb); // num_blocks_to_upgrade_13 + if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_15) { + (void) rbuf_ulonglong(rb); // num_blocks_to_upgrade_14 + } + } + + // fake creation during the last checkpoint + TXNID root_xid_that_created; + root_xid_that_created = checkpoint_lsn.lsn; + if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_14) { + rbuf_TXNID(rb, &root_xid_that_created); + } + + // TODO(leif): get this to default to what's specified, not the + // hard-coded default + unsigned basementnodesize; + basementnodesize = FT_DEFAULT_BASEMENT_NODE_SIZE; + uint64_t time_of_last_verification; + time_of_last_verification = 0; + if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_15) { + basementnodesize = rbuf_int(rb); + time_of_last_verification = rbuf_ulonglong(rb); + } + + STAT64INFO_S on_disk_stats; + on_disk_stats = ZEROSTATS; + uint64_t time_of_last_optimize_begin; + time_of_last_optimize_begin = 0; + uint64_t time_of_last_optimize_end; + time_of_last_optimize_end = 0; + uint32_t count_of_optimize_in_progress; + count_of_optimize_in_progress = 0; + MSN msn_at_start_of_last_completed_optimize; + msn_at_start_of_last_completed_optimize = ZERO_MSN; + if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_18) { + on_disk_stats.numrows = rbuf_ulonglong(rb); + on_disk_stats.numbytes = rbuf_ulonglong(rb); + ft->in_memory_stats = on_disk_stats; + time_of_last_optimize_begin = rbuf_ulonglong(rb); + time_of_last_optimize_end = rbuf_ulonglong(rb); + count_of_optimize_in_progress = rbuf_int(rb); + msn_at_start_of_last_completed_optimize = rbuf_MSN(rb); + } + + enum toku_compression_method compression_method; + MSN highest_unused_msn_for_upgrade; + highest_unused_msn_for_upgrade.msn = (MIN_MSN.msn - 1); + if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_19) { + unsigned char method = rbuf_char(rb); + compression_method = (enum toku_compression_method) method; + highest_unused_msn_for_upgrade = rbuf_MSN(rb); + } else { + // we hard coded zlib until 5.2, then quicklz in 5.2 + if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_18) { + compression_method = TOKU_ZLIB_METHOD; + } else { + compression_method = TOKU_QUICKLZ_METHOD; + } + } + + MSN max_msn_in_ft; + max_msn_in_ft = ZERO_MSN; // We'll upgrade it from the root node later if necessary + if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_21) { + max_msn_in_ft = rbuf_MSN(rb); + } + + (void) rbuf_int(rb); //Read in checksum and ignore (already verified). + if (rb->ndone != rb->size) { + fprintf(stderr, "Header size did not match contents.\n"); + r = EINVAL; + goto exit; + } + + { + struct ft_header h = { + .type = FT_CURRENT, + .dirty = 0, + .checkpoint_count = checkpoint_count, + .checkpoint_lsn = checkpoint_lsn, + .layout_version = FT_LAYOUT_VERSION, + .layout_version_original = layout_version_original, + .build_id = build_id, + .build_id_original = build_id_original, + .time_of_creation = time_of_creation, + .root_xid_that_created = root_xid_that_created, + .time_of_last_modification = time_of_last_modification, + .time_of_last_verification = time_of_last_verification, + .root_blocknum = root_blocknum, + .flags = flags, + .nodesize = nodesize, + .basementnodesize = basementnodesize, + .compression_method = compression_method, + .fanout = FT_DEFAULT_FANOUT, // fanout is not serialized, must be set at startup + .highest_unused_msn_for_upgrade = highest_unused_msn_for_upgrade, + .max_msn_in_ft = max_msn_in_ft, + .time_of_last_optimize_begin = time_of_last_optimize_begin, + .time_of_last_optimize_end = time_of_last_optimize_end, + .count_of_optimize_in_progress = count_of_optimize_in_progress, + .count_of_optimize_in_progress_read_from_disk = count_of_optimize_in_progress, + .msn_at_start_of_last_completed_optimize = msn_at_start_of_last_completed_optimize, + .on_disk_stats = on_disk_stats + }; + XMEMDUP(ft->h, &h); + } + + if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_18) { + // This needs ft->h to be non-null, so we have to do it after we + // read everything else. + r = toku_upgrade_subtree_estimates_to_stat64info(fd, ft); + if (r != 0) { + goto exit; + } + } + if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_21) { + r = toku_upgrade_msn_from_root_to_header(fd, ft); + if (r != 0) { + goto exit; + } + } + + invariant((uint32_t) ft->layout_version_read_from_disk == version); + r = deserialize_descriptor_from(fd, &ft->blocktable, &ft->descriptor, version); + if (r != 0) { + goto exit; + } + + // initialize for svn #4541 + toku_clone_dbt(&ft->cmp_descriptor.dbt, ft->descriptor.dbt); + + // Version 13 descriptors had an extra 4 bytes that we don't read + // anymore. Since the header is going to think it's the current + // version if it gets written out, we need to write the descriptor in + // the new format (without those bytes) before that happens. + if (version <= FT_LAYOUT_VERSION_13) { + toku_ft_update_descriptor_with_fd(ft, &ft->cmp_descriptor, fd); + } + r = 0; +exit: + if (r != 0 && ft != NULL) { + toku_free(ft); + ft = NULL; + } + *ftp = ft; + return r; +} + +static size_t +serialize_ft_min_size (uint32_t version) { + size_t size = 0; + + switch(version) { + case FT_LAYOUT_VERSION_27: + case FT_LAYOUT_VERSION_26: + case FT_LAYOUT_VERSION_25: + case FT_LAYOUT_VERSION_24: + case FT_LAYOUT_VERSION_23: + case FT_LAYOUT_VERSION_22: + case FT_LAYOUT_VERSION_21: + size += sizeof(MSN); // max_msn_in_ft + case FT_LAYOUT_VERSION_20: + case FT_LAYOUT_VERSION_19: + size += 1; // compression method + size += sizeof(MSN); // highest_unused_msn_for_upgrade + case FT_LAYOUT_VERSION_18: + size += sizeof(uint64_t); // time_of_last_optimize_begin + size += sizeof(uint64_t); // time_of_last_optimize_end + size += sizeof(uint32_t); // count_of_optimize_in_progress + size += sizeof(MSN); // msn_at_start_of_last_completed_optimize + size -= 8; // removed num_blocks_to_upgrade_14 + size -= 8; // removed num_blocks_to_upgrade_13 + case FT_LAYOUT_VERSION_17: + size += 16; + invariant(sizeof(STAT64INFO_S) == 16); + case FT_LAYOUT_VERSION_16: + case FT_LAYOUT_VERSION_15: + size += 4; // basement node size + size += 8; // num_blocks_to_upgrade_14 (previously num_blocks_to_upgrade, now one int each for upgrade from 13, 14 + size += 8; // time of last verification + case FT_LAYOUT_VERSION_14: + size += 8; //TXNID that created + case FT_LAYOUT_VERSION_13: + size += ( 4 // build_id + +4 // build_id_original + +8 // time_of_creation + +8 // time_of_last_modification + ); + // fall through + case FT_LAYOUT_VERSION_12: + size += (+8 // "tokudata" + +4 // version + +4 // original_version + +4 // size + +8 // byte order verification + +8 // checkpoint_count + +8 // checkpoint_lsn + +4 // tree's nodesize + +8 // translation_size_on_disk + +8 // translation_address_on_disk + +4 // checksum + +8 // Number of blocks in old version. + +8 // diskoff + +4 // flags + ); + break; + default: + abort(); + } + + lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE); + return size; +} + +int deserialize_ft_from_fd_into_rbuf(int fd, + toku_off_t offset_of_header, + struct rbuf *rb, + uint64_t *checkpoint_count, + LSN *checkpoint_lsn, + uint32_t * version_p) +// Effect: Read and parse the header of a fractalal tree +// +// Simply reading the raw bytes of the header into an rbuf is insensitive +// to disk format version. If that ever changes, then modify this. +// +// TOKUDB_DICTIONARY_NO_HEADER means we can overwrite everything in the +// file AND the header is useless +{ + int r = 0; + const int64_t prefix_size = 8 + // magic ("tokudata") + 4 + // version + 4 + // build_id + 4; // size + const int64_t read_size = roundup_to_multiple(512, prefix_size); + unsigned char *XMALLOC_N_ALIGNED(512, read_size, prefix); + rb->buf = NULL; + int64_t n = toku_os_pread(fd, prefix, read_size, offset_of_header); + if (n != read_size) { + if (n==0) { + r = TOKUDB_DICTIONARY_NO_HEADER; + } else if (n<0) { + r = get_error_errno(); + } else { + r = EINVAL; + } + toku_free(prefix); + goto exit; + } + + rbuf_init(rb, prefix, prefix_size); + + //Check magic number + const void *magic; + rbuf_literal_bytes(rb, &magic, 8); + if (memcmp(magic,"tokudata",8)!=0) { + if ((*(uint64_t*)magic) == 0) { + r = TOKUDB_DICTIONARY_NO_HEADER; + } else { + r = EINVAL; //Not a tokudb file! Do not use. + } + goto exit; + } + + //Version MUST be in network order regardless of disk order. + uint32_t version; + version = rbuf_network_int(rb); + *version_p = version; + if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) { + r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use + goto exit; + } else if (version > FT_LAYOUT_VERSION) { + r = TOKUDB_DICTIONARY_TOO_NEW; //Cannot use + goto exit; + } + + //build_id MUST be in network order regardless of disk order. + uint32_t build_id __attribute__((__unused__)); + build_id = rbuf_network_int(rb); + int64_t min_header_size; + min_header_size = serialize_ft_min_size(version); + + //Size MUST be in network order regardless of disk order. + uint32_t size; + size = rbuf_network_int(rb); + //If too big, it is corrupt. We would probably notice during checksum + //but may have to do a multi-gigabyte malloc+read to find out. + //If its too small reading rbuf would crash, so verify. + if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) { + r = TOKUDB_DICTIONARY_NO_HEADER; + goto exit; + } + + lazy_assert(rb->ndone==prefix_size); + rb->size = size; + { + toku_free(rb->buf); + uint32_t size_to_read = roundup_to_multiple(512, size); + XMALLOC_N_ALIGNED(512, size_to_read, rb->buf); + + assert(offset_of_header%512==0); + n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header); + if (n != size_to_read) { + if (n < 0) { + r = get_error_errno(); + } else { + r = EINVAL; //Header might be useless (wrong size) or could be a disk read error. + } + goto exit; + } + } + //It's version 14 or later. Magic looks OK. + //We have an rbuf that represents the header. + //Size is within acceptable bounds. + + //Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed) + uint32_t calculated_x1764; + calculated_x1764 = toku_x1764_memory(rb->buf, rb->size-4); + uint32_t stored_x1764; + stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4)); + if (calculated_x1764 != stored_x1764) { + r = TOKUDB_BAD_CHECKSUM; //Header useless + fprintf(stderr, "Header checksum failure: calc=0x%08x read=0x%08x\n", calculated_x1764, stored_x1764); + goto exit; + } + + //Verify byte order + const void *tmp_byte_order_check; + lazy_assert((sizeof toku_byte_order_host) == 8); + rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order + int64_t byte_order_stored; + byte_order_stored = *(int64_t*)tmp_byte_order_check; + if (byte_order_stored != toku_byte_order_host) { + r = TOKUDB_DICTIONARY_NO_HEADER; //Cannot use dictionary + goto exit; + } + + //Load checkpoint count + *checkpoint_count = rbuf_ulonglong(rb); + *checkpoint_lsn = rbuf_LSN(rb); + //Restart at beginning during regular deserialization + rb->ndone = 0; + +exit: + if (r != 0 && rb->buf != NULL) { + toku_free(rb->buf); + rb->buf = NULL; + } + return r; +} + +// Read ft from file into struct. Read both headers and use one. +// We want the latest acceptable header whose checkpoint_lsn is no later +// than max_acceptable_lsn. +int +toku_deserialize_ft_from(int fd, + LSN max_acceptable_lsn, + FT *ft) +{ + struct rbuf rb_0; + struct rbuf rb_1; + uint64_t checkpoint_count_0 = 0; + uint64_t checkpoint_count_1 = 0; + LSN checkpoint_lsn_0; + LSN checkpoint_lsn_1; + uint32_t version_0 = 0, version_1 = 0, version = 0; + bool h0_acceptable = false; + bool h1_acceptable = false; + struct rbuf *rb = NULL; + int r0, r1, r; + + toku_off_t header_0_off = 0; + r0 = deserialize_ft_from_fd_into_rbuf(fd, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, &version_0); + if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) { + h0_acceptable = true; + } + + toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1); + if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) { + h1_acceptable = true; + } + + // if either header is too new, the dictionary is unreadable + if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW || + !(h0_acceptable || h1_acceptable)) { + // We were unable to read either header or at least one is too + // new. Certain errors are higher priority than others. Order of + // these if/else if is important. + if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) { + r = TOKUDB_DICTIONARY_TOO_NEW; + } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || r1 == TOKUDB_DICTIONARY_TOO_OLD) { + r = TOKUDB_DICTIONARY_TOO_OLD; + } else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) { + fprintf(stderr, "Both header checksums failed.\n"); + r = TOKUDB_BAD_CHECKSUM; + } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || r1 == TOKUDB_DICTIONARY_NO_HEADER) { + r = TOKUDB_DICTIONARY_NO_HEADER; + } else { + r = r0 ? r0 : r1; //Arbitrarily report the error from the + //first header, unless it's readable + } + + // it should not be possible for both headers to be later than the max_acceptable_lsn + invariant(!((r0==0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) && + (r1==0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn))); + invariant(r!=0); + goto exit; + } + + if (h0_acceptable && h1_acceptable) { + if (checkpoint_count_0 > checkpoint_count_1) { + invariant(checkpoint_count_0 == checkpoint_count_1 + 1); + invariant(version_0 >= version_1); + rb = &rb_0; + version = version_0; + } + else { + invariant(checkpoint_count_1 == checkpoint_count_0 + 1); + invariant(version_1 >= version_0); + rb = &rb_1; + version = version_1; + } + } else if (h0_acceptable) { + if (r1 == TOKUDB_BAD_CHECKSUM) { + // print something reassuring + fprintf(stderr, "Header 2 checksum failed, but header 1 ok. Proceeding.\n"); + } + rb = &rb_0; + version = version_0; + } else if (h1_acceptable) { + if (r0 == TOKUDB_BAD_CHECKSUM) { + // print something reassuring + fprintf(stderr, "Header 1 checksum failed, but header 2 ok. Proceeding.\n"); + } + rb = &rb_1; + version = version_1; + } + + paranoid_invariant(rb); + r = deserialize_ft_versioned(fd, rb, ft, version); + +exit: + if (rb_0.buf) { + toku_free(rb_0.buf); + } + if (rb_1.buf) { + toku_free(rb_1.buf); + } + return r; +} + + +size_t toku_serialize_ft_size (FT_HEADER h) { + size_t size = serialize_ft_min_size(h->layout_version); + //There is no dynamic data. + lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE); + return size; +} + + +void toku_serialize_ft_to_wbuf ( + struct wbuf *wbuf, + FT_HEADER h, + DISKOFF translation_location_on_disk, + DISKOFF translation_size_on_disk + ) +{ + wbuf_literal_bytes(wbuf, "tokudata", 8); + wbuf_network_int (wbuf, h->layout_version); //MUST be in network order regardless of disk order + wbuf_network_int (wbuf, BUILD_ID); //MUST be in network order regardless of disk order + wbuf_network_int (wbuf, wbuf->size); //MUST be in network order regardless of disk order + wbuf_literal_bytes(wbuf, &toku_byte_order_host, 8); //Must not translate byte order + wbuf_ulonglong(wbuf, h->checkpoint_count); + wbuf_LSN (wbuf, h->checkpoint_lsn); + wbuf_int (wbuf, h->nodesize); + + wbuf_DISKOFF(wbuf, translation_location_on_disk); + wbuf_DISKOFF(wbuf, translation_size_on_disk); + wbuf_BLOCKNUM(wbuf, h->root_blocknum); + wbuf_int(wbuf, h->flags); + wbuf_int(wbuf, h->layout_version_original); + wbuf_int(wbuf, h->build_id_original); + wbuf_ulonglong(wbuf, h->time_of_creation); + wbuf_ulonglong(wbuf, h->time_of_last_modification); + wbuf_TXNID(wbuf, h->root_xid_that_created); + wbuf_int(wbuf, h->basementnodesize); + wbuf_ulonglong(wbuf, h->time_of_last_verification); + wbuf_ulonglong(wbuf, h->on_disk_stats.numrows); + wbuf_ulonglong(wbuf, h->on_disk_stats.numbytes); + wbuf_ulonglong(wbuf, h->time_of_last_optimize_begin); + wbuf_ulonglong(wbuf, h->time_of_last_optimize_end); + wbuf_int(wbuf, h->count_of_optimize_in_progress); + wbuf_MSN(wbuf, h->msn_at_start_of_last_completed_optimize); + wbuf_char(wbuf, (unsigned char) h->compression_method); + wbuf_MSN(wbuf, h->highest_unused_msn_for_upgrade); + wbuf_MSN(wbuf, h->max_msn_in_ft); + uint32_t checksum = toku_x1764_finish(&wbuf->checksum); + wbuf_int(wbuf, checksum); + lazy_assert(wbuf->ndone == wbuf->size); +} + +void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) { + lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS); + struct wbuf w_translation; + int64_t size_translation; + int64_t address_translation; + + // Must serialize translation first, to get address,size for header. + bt->serialize_translation_to_wbuf(fd, &w_translation, + &address_translation, + &size_translation); + assert(size_translation == w_translation.ndone); + + // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized. + assert(w_translation.size % 512 == 0); + + struct wbuf w_main; + size_t size_main = toku_serialize_ft_size(h); + size_t size_main_aligned = roundup_to_multiple(512, size_main); + assert(size_main_alignedcheckpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset); + toku_free(w_main.buf); + toku_free(w_translation.buf); +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft-serialize.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft-serialize.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/ft-serialize.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/ft-serialize.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,112 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#include "ft/ft.h" +#include "ft/serialize/block_table.h" + +size_t toku_serialize_ft_size(struct ft_header *h); +void toku_serialize_ft_to(int fd, struct ft_header *h, block_table *bt, CACHEFILE cf); +void toku_serialize_ft_to_wbuf(struct wbuf *wbuf, struct ft_header *h, DISKOFF translation_location_on_disk, DISKOFF translation_size_on_disk); +void toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset); +void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc); + +int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft); + +// TODO rename +int deserialize_ft_from_fd_into_rbuf(int fd, + toku_off_t offset_of_header, + struct rbuf *rb, + uint64_t *checkpoint_count, + LSN *checkpoint_lsn, + uint32_t *version_p); + +// used by verify +// TODO rename +int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/quicklz.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/quicklz.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/quicklz.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/quicklz.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,939 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +// Fast data compression library +// Copyright (C) 2006-2011 Lasse Mikkel Reinhold +// lar@quicklz.com +// +// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything +// released into public must be open source) or under a commercial license if such +// has been acquired (see http://www.quicklz.com/order.html). The commercial license +// does not cover derived or ported versions created by third parties under GPL. + +// 1.5.0 final + +#include "quicklz.h" + +#if QLZ_VERSION_MAJOR != 1 || QLZ_VERSION_MINOR != 5 || QLZ_VERSION_REVISION != 0 + #error quicklz.c and quicklz.h have different versions +#endif + +#if (defined(__X86__) || defined(__i386__) || defined(i386) || defined(_M_IX86) || defined(__386__) || defined(__x86_64__) || defined(_M_X64)) + #define X86X64 +#endif + +#define MINOFFSET 2 +#define UNCONDITIONAL_MATCHLEN 6 +#define UNCOMPRESSED_END 4 +#define CWORD_LEN 4 + +#if QLZ_COMPRESSION_LEVEL == 1 && defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0 + #define OFFSET_BASE source + #define CAST (ui32)(size_t) +#else + #define OFFSET_BASE 0 + #define CAST +#endif + +int qlz_get_setting(int setting) +{ + switch (setting) + { + case 0: return QLZ_COMPRESSION_LEVEL; + case 1: return sizeof(qlz_state_compress); + case 2: return sizeof(qlz_state_decompress); + case 3: return QLZ_STREAMING_BUFFER; +#ifdef QLZ_MEMORY_SAFE + case 6: return 1; +#else + case 6: return 0; +#endif + case 7: return QLZ_VERSION_MAJOR; + case 8: return QLZ_VERSION_MINOR; + case 9: return QLZ_VERSION_REVISION; + } + return -1; +} + +#if QLZ_COMPRESSION_LEVEL == 1 +static int same(const unsigned char *src, size_t n) +{ + while(n > 0 && *(src + n) == *src) + n--; + return n == 0 ? 1 : 0; +} +#endif + +static void reset_table_compress(qlz_state_compress *state) +{ + int i; + for(i = 0; i < QLZ_HASH_VALUES; i++) + { +#if QLZ_COMPRESSION_LEVEL == 1 + state->hash[i].offset = 0; +#else + state->hash_counter[i] = 0; + state->hash[i].offset[0] = 0; +#endif + } +} + +static void reset_table_decompress(qlz_state_decompress *state) +{ + (void)state; +#if QLZ_COMPRESSION_LEVEL == 2 + for(int i = 0; i < QLZ_HASH_VALUES; i++) + { + state->hash_counter[i] = 0; + } +#endif +} + +static __inline ui32 hash_func(ui32 i) +{ +#if QLZ_COMPRESSION_LEVEL == 2 + return ((i >> 9) ^ (i >> 13) ^ i) & (QLZ_HASH_VALUES - 1); +#else + return ((i >> 12) ^ i) & (QLZ_HASH_VALUES - 1); +#endif +} + +static __inline ui32 fast_read(void const *src, ui32 bytes) +{ +#ifndef X86X64 + unsigned char *p = (unsigned char*)src; + switch (bytes) + { + case 4: + return(*p | *(p + 1) << 8 | *(p + 2) << 16 | *(p + 3) << 24); + case 3: + return(*p | *(p + 1) << 8 | *(p + 2) << 16); + case 2: + return(*p | *(p + 1) << 8); + case 1: + return(*p); + } + return 0; +#else + if (bytes >= 1 && bytes <= 4) + return *((ui32*)src); + else + return 0; +#endif +} + +static __inline ui32 hashat(const unsigned char *src) +{ + ui32 fetch, hash; + fetch = fast_read(src, 3); + hash = hash_func(fetch); + return hash; +} + +static __inline void fast_write(ui32 f, void *dst, size_t bytes) +{ +#ifndef X86X64 + unsigned char *p = (unsigned char*)dst; + + switch (bytes) + { + case 4: + *p = (unsigned char)f; + *(p + 1) = (unsigned char)(f >> 8); + *(p + 2) = (unsigned char)(f >> 16); + *(p + 3) = (unsigned char)(f >> 24); + return; + case 3: + *p = (unsigned char)f; + *(p + 1) = (unsigned char)(f >> 8); + *(p + 2) = (unsigned char)(f >> 16); + return; + case 2: + *p = (unsigned char)f; + *(p + 1) = (unsigned char)(f >> 8); + return; + case 1: + *p = (unsigned char)f; + return; + } +#else + switch (bytes) + { + case 4: + *((ui32*)dst) = f; + return; + case 3: + *((ui32*)dst) = f; + return; + case 2: + *((ui16 *)dst) = (ui16)f; + return; + case 1: + *((unsigned char*)dst) = (unsigned char)f; + return; + } +#endif +} + + +size_t qlz_size_decompressed(const char *source) +{ + ui32 n, r; + n = (((*source) & 2) == 2) ? 4 : 1; + r = fast_read(source + 1 + n, n); + r = r & (0xffffffff >> ((4 - n)*8)); + return r; +} + +size_t qlz_size_compressed(const char *source) +{ + ui32 n, r; + n = (((*source) & 2) == 2) ? 4 : 1; + r = fast_read(source + 1, n); + r = r & (0xffffffff >> ((4 - n)*8)); + return r; +} + +static +size_t qlz_size_header(const char *source) +{ + size_t n = 2*((((*source) & 2) == 2) ? 4 : 1) + 1; + return n; +} + + +static __inline void memcpy_up(unsigned char *dst, const unsigned char *src, ui32 n) +{ + // Caution if modifying memcpy_up! Overlap of dst and src must be special handled. +#ifndef X86X64 + unsigned char *end = dst + n; + while(dst < end) + { + *dst = *src; + dst++; + src++; + } +#else + ui32 f = 0; + do + { + *(ui32 *)(dst + f) = *(ui32 *)(src + f); + f += MINOFFSET + 1; + } + while (f < n); +#endif +} + +__attribute__((unused)) +static __inline void update_hash(qlz_state_decompress *state, const unsigned char *s) +{ +#if QLZ_COMPRESSION_LEVEL == 1 + ui32 hash; + hash = hashat(s); + state->hash[hash].offset = s; + state->hash_counter[hash] = 1; +#elif QLZ_COMPRESSION_LEVEL == 2 + ui32 hash; + unsigned char c; + hash = hashat(s); + c = state->hash_counter[hash]; + state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = s; + c++; + state->hash_counter[hash] = c; +#endif + (void)state; + (void)s; +} + +#if QLZ_COMPRESSION_LEVEL <= 2 +static void update_hash_upto(qlz_state_decompress *state, unsigned char **lh, const unsigned char *max) +{ + while(*lh < max) + { + (*lh)++; + update_hash(state, *lh); + } +} +#endif + +static size_t qlz_compress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_compress *state) +{ + const unsigned char *last_byte = source + size - 1; + const unsigned char *src = source; + unsigned char *cword_ptr = destination; + unsigned char *dst = destination + CWORD_LEN; + ui32 cword_val = 1U << 31; + const unsigned char *last_matchstart = last_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END; + ui32 fetch = 0; + unsigned int lits = 0; + + (void) lits; + + if(src <= last_matchstart) + fetch = fast_read(src, 3); + + while(src <= last_matchstart) + { + if ((cword_val & 1) == 1) + { + // store uncompressed if compression ratio is too low + if (src > source + (size >> 1) && dst - destination > src - source - ((src - source) >> 5)) + return 0; + + fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN); + + cword_ptr = dst; + dst += CWORD_LEN; + cword_val = 1U << 31; + fetch = fast_read(src, 3); + } +#if QLZ_COMPRESSION_LEVEL == 1 + { + const unsigned char *o; + ui32 hash, cached; + + hash = hash_func(fetch); + cached = fetch ^ state->hash[hash].cache; + state->hash[hash].cache = fetch; + + o = state->hash[hash].offset + OFFSET_BASE; + state->hash[hash].offset = CAST(src - OFFSET_BASE); + +#ifdef X86X64 + if ((cached & 0xffffff) == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6)))) + { + if(cached != 0) + { +#else + if (cached == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6)))) + { + if (*(o + 3) != *(src + 3)) + { +#endif + hash <<= 4; + cword_val = (cword_val >> 1) | (1U << 31); + fast_write((3 - 2) | hash, dst, 2); + src += 3; + dst += 2; + } + else + { + const unsigned char *old_src = src; + size_t matchlen; + hash <<= 4; + + cword_val = (cword_val >> 1) | (1U << 31); + src += 4; + + if(*(o + (src - old_src)) == *src) + { + src++; + if(*(o + (src - old_src)) == *src) + { + size_t q = last_byte - UNCOMPRESSED_END - (src - 5) + 1; + size_t remaining = q > 255 ? 255 : q; + src++; + while(*(o + (src - old_src)) == *src && (size_t)(src - old_src) < remaining) + src++; + } + } + + matchlen = src - old_src; + if (matchlen < 18) + { + fast_write((ui32)(matchlen - 2) | hash, dst, 2); + dst += 2; + } + else + { + fast_write((ui32)(matchlen << 16) | hash, dst, 3); + dst += 3; + } + } + fetch = fast_read(src, 3); + lits = 0; + } + else + { + lits++; + *dst = *src; + src++; + dst++; + cword_val = (cword_val >> 1); +#ifdef X86X64 + fetch = fast_read(src, 3); +#else + fetch = (fetch >> 8 & 0xffff) | (*(src + 2) << 16); +#endif + } + } +#elif QLZ_COMPRESSION_LEVEL >= 2 + { + const unsigned char *o, *offset2; + ui32 hash, matchlen, k, m, best_k = 0; + unsigned char c; + size_t remaining = (last_byte - UNCOMPRESSED_END - src + 1) > 255 ? 255 : (last_byte - UNCOMPRESSED_END - src + 1); + (void)best_k; + + + //hash = hashat(src); + fetch = fast_read(src, 3); + hash = hash_func(fetch); + + c = state->hash_counter[hash]; + + offset2 = state->hash[hash].offset[0]; + if(offset2 < src - MINOFFSET && c > 0 && ((fast_read(offset2, 3) ^ fetch) & 0xffffff) == 0) + { + matchlen = 3; + if(*(offset2 + matchlen) == *(src + matchlen)) + { + matchlen = 4; + while(*(offset2 + matchlen) == *(src + matchlen) && matchlen < remaining) + matchlen++; + } + } + else + matchlen = 0; + for(k = 1; k < QLZ_POINTERS && c > k; k++) + { + o = state->hash[hash].offset[k]; +#if QLZ_COMPRESSION_LEVEL == 3 + if(((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET) +#elif QLZ_COMPRESSION_LEVEL == 2 + if(*(src + matchlen) == *(o + matchlen) && ((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET) +#endif + { + m = 3; + while(*(o + m) == *(src + m) && m < remaining) + m++; +#if QLZ_COMPRESSION_LEVEL == 3 + if ((m > matchlen) || (m == matchlen && o > offset2)) +#elif QLZ_COMPRESSION_LEVEL == 2 + if (m > matchlen) +#endif + { + offset2 = o; + matchlen = m; + best_k = k; + } + } + } + o = offset2; + state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src; + c++; + state->hash_counter[hash] = c; + +#if QLZ_COMPRESSION_LEVEL == 3 + if(matchlen > 2 && src - o < 131071) + { + ui32 u; + size_t offset = src - o; + + for(u = 1; u < matchlen; u++) + { + hash = hashat(src + u); + c = state->hash_counter[hash]++; + state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src + u; + } + + cword_val = (cword_val >> 1) | (1U << 31); + src += matchlen; + + if(matchlen == 3 && offset <= 63) + { + *dst = (unsigned char)(offset << 2); + dst++; + } + else if (matchlen == 3 && offset <= 16383) + { + ui32 f = (ui32)((offset << 2) | 1); + fast_write(f, dst, 2); + dst += 2; + } + else if (matchlen <= 18 && offset <= 1023) + { + ui32 f = ((matchlen - 3) << 2) | ((ui32)offset << 6) | 2; + fast_write(f, dst, 2); + dst += 2; + } + + else if(matchlen <= 33) + { + ui32 f = ((matchlen - 2) << 2) | ((ui32)offset << 7) | 3; + fast_write(f, dst, 3); + dst += 3; + } + else + { + ui32 f = ((matchlen - 3) << 7) | ((ui32)offset << 15) | 3; + fast_write(f, dst, 4); + dst += 4; + } + } + else + { + *dst = *src; + src++; + dst++; + cword_val = (cword_val >> 1); + } +#elif QLZ_COMPRESSION_LEVEL == 2 + + if(matchlen > 2) + { + cword_val = (cword_val >> 1) | (1U << 31); + src += matchlen; + + if (matchlen < 10) + { + ui32 f = best_k | ((matchlen - 2) << 2) | (hash << 5); + fast_write(f, dst, 2); + dst += 2; + } + else + { + ui32 f = best_k | (matchlen << 16) | (hash << 5); + fast_write(f, dst, 3); + dst += 3; + } + } + else + { + *dst = *src; + src++; + dst++; + cword_val = (cword_val >> 1); + } +#endif + } +#endif + } + while (src <= last_byte) + { + if ((cword_val & 1) == 1) + { + fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN); + cword_ptr = dst; + dst += CWORD_LEN; + cword_val = 1U << 31; + } +#if QLZ_COMPRESSION_LEVEL < 3 + if (src <= last_byte - 3) + { +#if QLZ_COMPRESSION_LEVEL == 1 + ui32 hash, fetchv; + fetchv = fast_read(src, 3); + hash = hash_func(fetch); + state->hash[hash].offset = CAST(src - OFFSET_BASE); + state->hash[hash].cache = fetchv; +#elif QLZ_COMPRESSION_LEVEL == 2 + ui32 hash; + unsigned char c; + hash = hashat(src); + c = state->hash_counter[hash]; + state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src; + c++; + state->hash_counter[hash] = c; +#endif + } +#endif + *dst = *src; + src++; + dst++; + cword_val = (cword_val >> 1); + } + + while((cword_val & 1) != 1) + cword_val = (cword_val >> 1); + + fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN); + + // min. size must be 9 bytes so that the qlz_size functions can take 9 bytes as argument + return dst - destination < 9 ? 9 : dst - destination; +} + +static size_t qlz_decompress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_decompress *state, const unsigned char *history) +{ + const unsigned char *src = source + qlz_size_header((const char *)source); + unsigned char *dst = destination; + const unsigned char *last_destination_byte = destination + size - 1; + ui32 cword_val = 1; + const unsigned char *last_matchstart = last_destination_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END; + unsigned char *last_hashed = destination - 1; + const unsigned char *last_source_byte = source + qlz_size_compressed((const char *)source) - 1; + static const ui32 bitlut[16] = {4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0}; + + (void) last_source_byte; + (void) last_hashed; + (void) state; + (void) history; + + for(;;) + { + ui32 fetch; + + if (cword_val == 1) + { +#ifdef QLZ_MEMORY_SAFE + if(src + CWORD_LEN - 1 > last_source_byte) + return 0; +#endif + cword_val = fast_read(src, CWORD_LEN); + src += CWORD_LEN; + } + +#ifdef QLZ_MEMORY_SAFE + if(src + 4 - 1 > last_source_byte) + return 0; +#endif + + fetch = fast_read(src, 4); + + if ((cword_val & 1) == 1) + { + ui32 matchlen; + const unsigned char *offset2; + +#if QLZ_COMPRESSION_LEVEL == 1 + ui32 hash; + cword_val = cword_val >> 1; + hash = (fetch >> 4) & 0xfff; + offset2 = (const unsigned char *)(size_t)state->hash[hash].offset; + + if((fetch & 0xf) != 0) + { + matchlen = (fetch & 0xf) + 2; + src += 2; + } + else + { + matchlen = *(src + 2); + src += 3; + } + +#elif QLZ_COMPRESSION_LEVEL == 2 + ui32 hash; + unsigned char c; + cword_val = cword_val >> 1; + hash = (fetch >> 5) & 0x7ff; + c = (unsigned char)(fetch & 0x3); + offset2 = state->hash[hash].offset[c]; + + if((fetch & (28)) != 0) + { + matchlen = ((fetch >> 2) & 0x7) + 2; + src += 2; + } + else + { + matchlen = *(src + 2); + src += 3; + } + +#elif QLZ_COMPRESSION_LEVEL == 3 + ui32 offset; + cword_val = cword_val >> 1; + if ((fetch & 3) == 0) + { + offset = (fetch & 0xff) >> 2; + matchlen = 3; + src++; + } + else if ((fetch & 2) == 0) + { + offset = (fetch & 0xffff) >> 2; + matchlen = 3; + src += 2; + } + else if ((fetch & 1) == 0) + { + offset = (fetch & 0xffff) >> 6; + matchlen = ((fetch >> 2) & 15) + 3; + src += 2; + } + else if ((fetch & 127) != 3) + { + offset = (fetch >> 7) & 0x1ffff; + matchlen = ((fetch >> 2) & 0x1f) + 2; + src += 3; + } + else + { + offset = (fetch >> 15); + matchlen = ((fetch >> 7) & 255) + 3; + src += 4; + } + + offset2 = dst - offset; +#endif + +#ifdef QLZ_MEMORY_SAFE + if(offset2 < history || offset2 > dst - MINOFFSET - 1) + return 0; + + if(matchlen > (ui32)(last_destination_byte - dst - UNCOMPRESSED_END + 1)) + return 0; +#endif + + memcpy_up(dst, offset2, matchlen); + dst += matchlen; + +#if QLZ_COMPRESSION_LEVEL <= 2 + update_hash_upto(state, &last_hashed, dst - matchlen); + last_hashed = dst - 1; +#endif + } + else + { + if (dst < last_matchstart) + { + unsigned int n = bitlut[cword_val & 0xf]; +#ifdef X86X64 + *(ui32 *)dst = *(ui32 *)src; +#else + memcpy_up(dst, src, 4); +#endif + cword_val = cword_val >> n; + dst += n; + src += n; +#if QLZ_COMPRESSION_LEVEL <= 2 + update_hash_upto(state, &last_hashed, dst - 3); +#endif + } + else + { + while(dst <= last_destination_byte) + { + if (cword_val == 1) + { + src += CWORD_LEN; + cword_val = 1U << 31; + } +#ifdef QLZ_MEMORY_SAFE + if(src >= last_source_byte + 1) + return 0; +#endif + *dst = *src; + dst++; + src++; + cword_val = cword_val >> 1; + } + +#if QLZ_COMPRESSION_LEVEL <= 2 + update_hash_upto(state, &last_hashed, last_destination_byte - 3); // todo, use constant +#endif + return size; + } + + } + } +} + +size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state) +{ + size_t r; + ui32 compressed; + size_t base; + + if(size == 0 || size > 0xffffffff - 400) + return 0; + + if(size < 216) + base = 3; + else + base = 9; + +#if QLZ_STREAMING_BUFFER > 0 + if (state->stream_counter + size - 1 >= QLZ_STREAMING_BUFFER) +#endif + { + reset_table_compress(state); + r = base + qlz_compress_core((const unsigned char *)source, (unsigned char*)destination + base, size, state); +#if QLZ_STREAMING_BUFFER > 0 + reset_table_compress(state); +#endif + if(r == base) + { + memcpy(destination + base, source, size); + r = size + base; + compressed = 0; + } + else + { + compressed = 1; + } + state->stream_counter = 0; + } +#if QLZ_STREAMING_BUFFER > 0 + else + { + unsigned char *src = state->stream_buffer + state->stream_counter; + + memcpy(src, source, size); + r = base + qlz_compress_core(src, (unsigned char*)destination + base, size, state); + + if(r == base) + { + memcpy(destination + base, src, size); + r = size + base; + compressed = 0; + reset_table_compress(state); + } + else + { + compressed = 1; + } + state->stream_counter += size; + } +#endif + if(base == 3) + { + *destination = (unsigned char)(0 | compressed); + *(destination + 1) = (unsigned char)r; + *(destination + 2) = (unsigned char)size; + } + else + { + *destination = (unsigned char)(2 | compressed); + fast_write((ui32)r, destination + 1, 4); + fast_write((ui32)size, destination + 5, 4); + } + + *destination |= (QLZ_COMPRESSION_LEVEL << 2); + *destination |= (1 << 6); + *destination |= ((QLZ_STREAMING_BUFFER == 0 ? 0 : (QLZ_STREAMING_BUFFER == 100000 ? 1 : (QLZ_STREAMING_BUFFER == 1000000 ? 2 : 3))) << 4); + +// 76543210 +// 01SSLLHC + + return r; +} + +size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state) +{ + size_t dsiz = qlz_size_decompressed(source); + +#if QLZ_STREAMING_BUFFER > 0 + if (state->stream_counter + qlz_size_decompressed(source) - 1 >= QLZ_STREAMING_BUFFER) +#endif + { + if((*source & 1) == 1) + { + reset_table_decompress(state); + dsiz = qlz_decompress_core((const unsigned char *)source, (unsigned char *)destination, dsiz, state, (const unsigned char *)destination); + } + else + { + memcpy(destination, source + qlz_size_header(source), dsiz); + } + state->stream_counter = 0; + reset_table_decompress(state); + } +#if QLZ_STREAMING_BUFFER > 0 + else + { + unsigned char *dst = state->stream_buffer + state->stream_counter; + if((*source & 1) == 1) + { + dsiz = qlz_decompress_core((const unsigned char *)source, dst, dsiz, state, (const unsigned char *)state->stream_buffer); + } + else + { + memcpy(dst, source + qlz_size_header(source), dsiz); + reset_table_decompress(state); + } + memcpy(destination, dst, dsiz); + state->stream_counter += dsiz; + } +#endif + return dsiz; +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/quicklz.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/quicklz.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/quicklz.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/quicklz.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,230 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// Fast data compression library +// Copyright (C) 2006-2011 Lasse Mikkel Reinhold +// lar@quicklz.com +// +// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything +// released into public must be open source) or under a commercial license if such +// has been acquired (see http://www.quicklz.com/order.html). The commercial license +// does not cover derived or ported versions created by third parties under GPL. + +// You can edit following user settings. Data must be decompressed with the same +// setting of QLZ_COMPRESSION_LEVEL and QLZ_STREAMING_BUFFER as it was compressed +// (see manual). If QLZ_STREAMING_BUFFER > 0, scratch buffers must be initially +// zeroed out (see manual). First #ifndef makes it possible to define settings from +// the outside like the compiler command line. + +// 1.5.0 final + +#ifndef QLZ_COMPRESSION_LEVEL + //#define QLZ_COMPRESSION_LEVEL 1 + //#define QLZ_COMPRESSION_LEVEL 2 + #define QLZ_COMPRESSION_LEVEL 3 + + #define QLZ_STREAMING_BUFFER 0 + //#define QLZ_STREAMING_BUFFER 100000 + //#define QLZ_STREAMING_BUFFER 1000000 + + //#define QLZ_MEMORY_SAFE +#endif + +#define QLZ_VERSION_MAJOR 1 +#define QLZ_VERSION_MINOR 5 +#define QLZ_VERSION_REVISION 0 + +// Using size_t, memset() and memcpy() +#include + +// Verify compression level +#if QLZ_COMPRESSION_LEVEL != 1 && QLZ_COMPRESSION_LEVEL != 2 && QLZ_COMPRESSION_LEVEL != 3 +#error QLZ_COMPRESSION_LEVEL must be 1, 2 or 3 +#endif + +typedef unsigned int ui32; +typedef unsigned short int ui16; + +// Decrease QLZ_POINTERS for level 3 to increase compression speed. Do not touch any other values! +#if QLZ_COMPRESSION_LEVEL == 1 +#define QLZ_POINTERS 1 +#define QLZ_HASH_VALUES 4096 +#elif QLZ_COMPRESSION_LEVEL == 2 +#define QLZ_POINTERS 4 +#define QLZ_HASH_VALUES 2048 +#elif QLZ_COMPRESSION_LEVEL == 3 +#define QLZ_POINTERS 16 +#define QLZ_HASH_VALUES 4096 +#endif + +// Detect if pointer size is 64-bit. It's not fatal if some 64-bit target is not detected because this is only for adding an optional 64-bit optimization. +#if defined _LP64 || defined __LP64__ || defined __64BIT__ || _ADDR64 || defined _WIN64 || defined __arch64__ || __WORDSIZE == 64 || (defined __sparc && defined __sparcv9) || defined __x86_64 || defined __amd64 || defined __x86_64__ || defined _M_X64 || defined _M_IA64 || defined __ia64 || defined __IA64__ + #define QLZ_PTR_64 +#endif + +// hash entry +typedef struct +{ +#if QLZ_COMPRESSION_LEVEL == 1 + ui32 cache; +#if defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0 + unsigned int offset; +#else + const unsigned char *offset; +#endif +#else + const unsigned char *offset[QLZ_POINTERS]; +#endif + +} qlz_hash_compress; + +typedef struct +{ +#if QLZ_COMPRESSION_LEVEL == 1 + const unsigned char *offset; +#else + const unsigned char *offset[QLZ_POINTERS]; +#endif +} qlz_hash_decompress; + + +// states +typedef struct +{ + #if QLZ_STREAMING_BUFFER > 0 + unsigned char stream_buffer[QLZ_STREAMING_BUFFER]; + #endif + size_t stream_counter; + qlz_hash_compress hash[QLZ_HASH_VALUES]; + unsigned char hash_counter[QLZ_HASH_VALUES]; +} qlz_state_compress; + + +#if QLZ_COMPRESSION_LEVEL == 1 || QLZ_COMPRESSION_LEVEL == 2 + typedef struct + { +#if QLZ_STREAMING_BUFFER > 0 + unsigned char stream_buffer[QLZ_STREAMING_BUFFER]; +#endif + qlz_hash_decompress hash[QLZ_HASH_VALUES]; + unsigned char hash_counter[QLZ_HASH_VALUES]; + size_t stream_counter; + } qlz_state_decompress; +#elif QLZ_COMPRESSION_LEVEL == 3 + typedef struct + { +#if QLZ_STREAMING_BUFFER > 0 + unsigned char stream_buffer[QLZ_STREAMING_BUFFER]; +#endif +#if QLZ_COMPRESSION_LEVEL <= 2 + qlz_hash_decompress hash[QLZ_HASH_VALUES]; +#endif + size_t stream_counter; + } qlz_state_decompress; +#endif + + +#if defined (__cplusplus) +extern "C" { +#endif + +// Public functions of QuickLZ +size_t qlz_size_decompressed(const char *source); +size_t qlz_size_compressed(const char *source); +size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state); +size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state); +int qlz_get_setting(int setting); + +#if defined (__cplusplus) +} +#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/rbuf.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/rbuf.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/rbuf.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/rbuf.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,210 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_htonl.h" +#include "portability/toku_portability.h" +#include "util/memarena.h" + +struct rbuf { + unsigned char *buf; + unsigned int size; + unsigned int ndone; +}; +#define RBUF_INITIALIZER ((struct rbuf){.buf = NULL, .size=0, .ndone=0}) + +static inline void rbuf_init(struct rbuf *r, unsigned char *buf, unsigned int size) { + r->buf = buf; + r->size = size; + r->ndone = 0; +} + +static inline unsigned int rbuf_get_roffset(struct rbuf *r) { + return r->ndone; +} + +static inline unsigned char rbuf_char (struct rbuf *r) { + assert(r->ndonesize); + return r->buf[r->ndone++]; +} + +static inline void rbuf_ma_uint8_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint8_t *num) { + *num = rbuf_char(r); +} + +static inline void rbuf_ma_bool (struct rbuf *r, memarena *ma __attribute__((__unused__)), bool *b) { + uint8_t n = rbuf_char(r); + *b = (n!=0); +} + +//Read an int that MUST be in network order regardless of disk order +static unsigned int rbuf_network_int (struct rbuf *r) __attribute__((__unused__)); +static unsigned int rbuf_network_int (struct rbuf *r) { + assert(r->ndone+4 <= r->size); + uint32_t result = toku_ntohl(*(uint32_t*)(r->buf+r->ndone)); // This only works on machines where unaligned loads are OK. + r->ndone+=4; + return result; +} + +static unsigned int rbuf_int (struct rbuf *r) { +#if 1 + assert(r->ndone+4 <= r->size); + uint32_t result = toku_dtoh32(*(uint32_t*)(r->buf+r->ndone)); // This only works on machines where unaligned loads are OK. + r->ndone+=4; + return result; +#else + unsigned char c0 = rbuf_char(r); + unsigned char c1 = rbuf_char(r); + unsigned char c2 = rbuf_char(r); + unsigned char c3 = rbuf_char(r); + return ((c0<<24)| + (c1<<16)| + (c2<<8)| + (c3<<0)); +#endif +} + +static inline void rbuf_literal_bytes (struct rbuf *r, const void **bytes, unsigned int n_bytes) { + *bytes = &r->buf[r->ndone]; + r->ndone+=n_bytes; + assert(r->ndone<=r->size); +} + +/* Return a pointer into the middle of the buffer. */ +static inline void rbuf_bytes (struct rbuf *r, const void **bytes, unsigned int *n_bytes) +{ + *n_bytes = rbuf_int(r); + rbuf_literal_bytes(r, bytes, *n_bytes); +} + +static inline unsigned long long rbuf_ulonglong (struct rbuf *r) { + unsigned i0 = rbuf_int(r); + unsigned i1 = rbuf_int(r); + return ((unsigned long long)(i0)<<32) | ((unsigned long long)(i1)); +} + +static inline signed long long rbuf_longlong (struct rbuf *r) { + return (signed long long)rbuf_ulonglong(r); +} + +static inline void rbuf_ma_uint32_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint32_t *num) { + *num = rbuf_int(r); +} + +static inline void rbuf_ma_uint64_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint64_t *num) { + *num = rbuf_ulonglong(r); +} + +// Don't try to use the same space, malloc it +static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) { + bs->len = rbuf_int(r); + uint32_t newndone = r->ndone + bs->len; + assert(newndone <= r->size); + bs->data = (char *) toku_memdup(&r->buf[r->ndone], (size_t)bs->len); + assert(bs->data); + r->ndone = newndone; +} + +static inline void rbuf_ma_BYTESTRING (struct rbuf *r, memarena *ma, BYTESTRING *bs) { + bs->len = rbuf_int(r); + uint32_t newndone = r->ndone + bs->len; + assert(newndone <= r->size); + bs->data = (char *) ma->malloc_from_arena(bs->len); + assert(bs->data); + memcpy(bs->data, &r->buf[r->ndone], bs->len); + r->ndone = newndone; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/sub_block.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/sub_block.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/sub_block.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/sub_block.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,444 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include +#include +#include +#include + +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_portability.h" + +#include "ft/serialize/compress.h" +#include "ft/serialize/sub_block.h" +#include "ft/serialize/quicklz.h" +#include "util/threadpool.h" +#include "util/x1764.h" + +SUB_BLOCK sub_block_creat(void) { + SUB_BLOCK XMALLOC(sb); + sub_block_init(sb); + return sb; +} +void sub_block_init(SUB_BLOCK sub_block) { + sub_block->uncompressed_ptr = 0; + sub_block->uncompressed_size = 0; + + sub_block->compressed_ptr = 0; + sub_block->compressed_size_bound = 0; + sub_block->compressed_size = 0; + + sub_block->xsum = 0; +} + +// get the size of the compression header +size_t +sub_block_header_size(int n_sub_blocks) { + return sizeof (uint32_t) + n_sub_blocks * sizeof (struct stored_sub_block); +} + +void +set_compressed_size_bound(struct sub_block *se, enum toku_compression_method method) { + se->compressed_size_bound = toku_compress_bound(method, se->uncompressed_size); +} + +// get the sum of the sub block compressed sizes +size_t +get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[], enum toku_compression_method method) { + size_t compressed_size_bound = 0; + for (int i = 0; i < n_sub_blocks; i++) { + sub_block[i].compressed_size_bound = toku_compress_bound(method, sub_block[i].uncompressed_size); + compressed_size_bound += sub_block[i].compressed_size_bound; + } + return compressed_size_bound; +} + +// get the sum of the sub block uncompressed sizes +size_t +get_sum_uncompressed_size(int n_sub_blocks, struct sub_block sub_block[]) { + size_t uncompressed_size = 0; + for (int i = 0; i < n_sub_blocks; i++) + uncompressed_size += sub_block[i].uncompressed_size; + return uncompressed_size; +} + +// round up n +static inline int +alignup32(int a, int b) { + return ((a+b-1) / b) * b; +} + +// Choose n_sub_blocks and sub_block_size such that the product is >= total_size and the sub_block_size is at +// least >= the target_sub_block_size. +int +choose_sub_block_size(int total_size, int n_sub_blocks_limit, int *sub_block_size_ret, int *n_sub_blocks_ret) { + if (total_size < 0 || n_sub_blocks_limit < 1) + return EINVAL; + + const int alignment = 32; + + int n_sub_blocks, sub_block_size; + n_sub_blocks = total_size / target_sub_block_size; + if (n_sub_blocks <= 1) { + if (total_size > 0 && n_sub_blocks_limit > 0) + n_sub_blocks = 1; + sub_block_size = total_size; + } else { + if (n_sub_blocks > n_sub_blocks_limit) // limit the number of sub-blocks + n_sub_blocks = n_sub_blocks_limit; + sub_block_size = alignup32(total_size / n_sub_blocks, alignment); + while (sub_block_size * n_sub_blocks < total_size) // round up the sub-block size until big enough + sub_block_size += alignment; + } + + *sub_block_size_ret = sub_block_size; + *n_sub_blocks_ret = n_sub_blocks; + + return 0; +} + +// Choose the right size of basement nodes. For now, just align up to +// 256k blocks and hope it compresses well enough. +int +choose_basement_node_size(int total_size, int *sub_block_size_ret, int *n_sub_blocks_ret) { + if (total_size < 0) + return EINVAL; + + *n_sub_blocks_ret = (total_size + max_basement_node_uncompressed_size - 1) / max_basement_node_uncompressed_size; + *sub_block_size_ret = max_basement_node_uncompressed_size; + + return 0; +} + +void +set_all_sub_block_sizes(int total_size, int sub_block_size, int n_sub_blocks, struct sub_block sub_block[]) { + int size_left = total_size; + int i; + for (i = 0; i < n_sub_blocks-1; i++) { + sub_block[i].uncompressed_size = sub_block_size; + size_left -= sub_block_size; + } + if (i == 0 || size_left > 0) + sub_block[i].uncompressed_size = size_left; +} + +// find the index of the first sub block that contains offset +// Returns the sub block index, else returns -1 +int +get_sub_block_index(int n_sub_blocks, struct sub_block sub_block[], size_t offset) { + size_t start_offset = 0; + for (int i = 0; i < n_sub_blocks; i++) { + size_t size = sub_block[i].uncompressed_size; + if (offset < start_offset + size) + return i; + start_offset += size; + } + return -1; +} + +#include "workset.h" + +void +compress_work_init(struct compress_work *w, enum toku_compression_method method, struct sub_block *sub_block) { + w->method = method; + w->sub_block = sub_block; +} + +// +// takes the uncompressed contents of sub_block +// and compresses them into sb_compressed_ptr +// cs_bound is the compressed size bound +// Returns the size of the compressed data +// +uint32_t +compress_nocrc_sub_block( + struct sub_block *sub_block, + void* sb_compressed_ptr, + uint32_t cs_bound, + enum toku_compression_method method + ) +{ + // compress it + Bytef *uncompressed_ptr = (Bytef *) sub_block->uncompressed_ptr; + Bytef *compressed_ptr = (Bytef *) sb_compressed_ptr; + uLongf uncompressed_len = sub_block->uncompressed_size; + uLongf real_compressed_len = cs_bound; + toku_compress(method, + compressed_ptr, &real_compressed_len, + uncompressed_ptr, uncompressed_len); + return real_compressed_len; +} + +void +compress_sub_block(struct sub_block *sub_block, enum toku_compression_method method) { + sub_block->compressed_size = compress_nocrc_sub_block( + sub_block, + sub_block->compressed_ptr, + sub_block->compressed_size_bound, + method + ); + // checksum it + sub_block->xsum = toku_x1764_memory(sub_block->compressed_ptr, sub_block->compressed_size); +} + +void * +compress_worker(void *arg) { + struct workset *ws = (struct workset *) arg; + while (1) { + struct compress_work *w = (struct compress_work *) workset_get(ws); + if (w == NULL) + break; + compress_sub_block(w->sub_block, w->method); + } + workset_release_ref(ws); + return arg; +} + +size_t +compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool, enum toku_compression_method method) { + char *compressed_base_ptr = compressed_ptr; + size_t compressed_len; + + // This is a complex way to write a parallel loop. Cilk would be better. + + if (n_sub_blocks == 1) { + // single sub-block + sub_block[0].uncompressed_ptr = uncompressed_ptr; + sub_block[0].compressed_ptr = compressed_ptr; + compress_sub_block(&sub_block[0], method); + compressed_len = sub_block[0].compressed_size; + } else { + // multiple sub-blocks + int T = num_cores; // T = min(num_cores, n_sub_blocks) - 1 + if (T > n_sub_blocks) + T = n_sub_blocks; + if (T > 0) + T = T - 1; // threads in addition to the running thread + + struct workset ws; + ZERO_STRUCT(ws); + workset_init(&ws); + + struct compress_work work[n_sub_blocks]; + workset_lock(&ws); + for (int i = 0; i < n_sub_blocks; i++) { + sub_block[i].uncompressed_ptr = uncompressed_ptr; + sub_block[i].compressed_ptr = compressed_ptr; + compress_work_init(&work[i], method, &sub_block[i]); + workset_put_locked(&ws, &work[i].base); + uncompressed_ptr += sub_block[i].uncompressed_size; + compressed_ptr += sub_block[i].compressed_size_bound; + } + workset_unlock(&ws); + + // compress the sub-blocks + if (0) printf("%s:%d T=%d N=%d\n", __FUNCTION__, __LINE__, T, n_sub_blocks); + toku_thread_pool_run(pool, 0, &T, compress_worker, &ws); + workset_add_ref(&ws, T); + compress_worker(&ws); + + // wait for all of the work to complete + workset_join(&ws); + workset_destroy(&ws); + + // squeeze out the holes not used by the compress bound + compressed_ptr = compressed_base_ptr + sub_block[0].compressed_size; + for (int i = 1; i < n_sub_blocks; i++) { + memmove(compressed_ptr, sub_block[i].compressed_ptr, sub_block[i].compressed_size); + compressed_ptr += sub_block[i].compressed_size; + } + + compressed_len = compressed_ptr - compressed_base_ptr; + } + return compressed_len; +} + +// initialize the decompression work +void +decompress_work_init(struct decompress_work *dw, + void *compress_ptr, uint32_t compress_size, + void *uncompress_ptr, uint32_t uncompress_size, + uint32_t xsum) { + dw->compress_ptr = compress_ptr; + dw->compress_size = compress_size; + dw->uncompress_ptr = uncompress_ptr; + dw->uncompress_size = uncompress_size; + dw->xsum = xsum; + dw->error = 0; +} + +int verbose_decompress_sub_block = 1; + +// decompress one block +int +decompress_sub_block(void *compress_ptr, uint32_t compress_size, void *uncompress_ptr, uint32_t uncompress_size, uint32_t expected_xsum) { + int result = 0; + + // verify checksum + uint32_t xsum = toku_x1764_memory(compress_ptr, compress_size); + if (xsum != expected_xsum) { + if (verbose_decompress_sub_block) fprintf(stderr, "%s:%d xsum %u expected %u\n", __FUNCTION__, __LINE__, xsum, expected_xsum); + result = EINVAL; + } else { + // decompress + toku_decompress((Bytef *) uncompress_ptr, uncompress_size, (Bytef *) compress_ptr, compress_size); + } + return result; +} + +// decompress blocks until there is no more work to do +void * +decompress_worker(void *arg) { + struct workset *ws = (struct workset *) arg; + while (1) { + struct decompress_work *dw = (struct decompress_work *) workset_get(ws); + if (dw == NULL) + break; + dw->error = decompress_sub_block(dw->compress_ptr, dw->compress_size, dw->uncompress_ptr, dw->uncompress_size, dw->xsum); + } + workset_release_ref(ws); + return arg; +} + +int +decompress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], unsigned char *compressed_data, unsigned char *uncompressed_data, int num_cores, struct toku_thread_pool *pool) { + int r; + + if (n_sub_blocks == 1) { + r = decompress_sub_block(compressed_data, sub_block[0].compressed_size, uncompressed_data, sub_block[0].uncompressed_size, sub_block[0].xsum); + } else { + // compute the number of additional threads needed for decompressing this node + int T = num_cores; // T = min(#cores, #blocks) - 1 + if (T > n_sub_blocks) + T = n_sub_blocks; + if (T > 0) + T = T - 1; // threads in addition to the running thread + + // init the decompression work set + struct workset ws; + ZERO_STRUCT(ws); + workset_init(&ws); + + // initialize the decompression work and add to the work set + struct decompress_work decompress_work[n_sub_blocks]; + workset_lock(&ws); + for (int i = 0; i < n_sub_blocks; i++) { + decompress_work_init(&decompress_work[i], compressed_data, sub_block[i].compressed_size, uncompressed_data, sub_block[i].uncompressed_size, sub_block[i].xsum); + workset_put_locked(&ws, &decompress_work[i].base); + + uncompressed_data += sub_block[i].uncompressed_size; + compressed_data += sub_block[i].compressed_size; + } + workset_unlock(&ws); + + // decompress the sub-blocks + if (0) printf("%s:%d Cores=%d Blocks=%d T=%d\n", __FUNCTION__, __LINE__, num_cores, n_sub_blocks, T); + toku_thread_pool_run(pool, 0, &T, decompress_worker, &ws); + workset_add_ref(&ws, T); + decompress_worker(&ws); + + // cleanup + workset_join(&ws); + workset_destroy(&ws); + + r = 0; + for (int i = 0; i < n_sub_blocks; i++) { + r = decompress_work[i].error; + if (r != 0) + break; + } + } + + return r; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/sub_block.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/sub_block.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/sub_block.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/sub_block.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,214 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "ft/serialize/compress.h" + +// TODO: Clean this abstraciton up +static const int max_sub_blocks = 8; +static const int target_sub_block_size = 512 * 1024; +static const int max_basement_nodes = 32; +static const int max_basement_node_uncompressed_size = 256 * 1024; +static const int max_basement_node_compressed_size = 64 * 1024; + +struct sub_block { + void *uncompressed_ptr; + uint32_t uncompressed_size; + + void *compressed_ptr; + uint32_t compressed_size; // real compressed size + uint32_t compressed_size_bound; // estimated compressed size + + uint32_t xsum; // sub block checksum +}; +typedef struct sub_block *SUB_BLOCK; + +struct stored_sub_block { + uint32_t uncompressed_size; + uint32_t compressed_size; + uint32_t xsum; +}; + +void sub_block_init(SUB_BLOCK); +SUB_BLOCK sub_block_creat(void); + +// get the size of the compression header +size_t +sub_block_header_size(int n_sub_blocks); + +void +set_compressed_size_bound(struct sub_block *se, enum toku_compression_method method); + +// get the sum of the sub block compressed bound sizes +size_t +get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[], enum toku_compression_method method); + +// get the sum of the sub block uncompressed sizes +size_t +get_sum_uncompressed_size(int n_sub_blocks, struct sub_block sub_block[]); + +// Choose n_sub_blocks and sub_block_size such that the product is >= total_size and the sub_block_size is at +// least >= the target_sub_block_size. +int +choose_sub_block_size(int total_size, int n_sub_blocks_limit, int *sub_block_size_ret, int *n_sub_blocks_ret); + +int +choose_basement_node_size(int total_size, int *sub_block_size_ret, int *n_sub_blocks_ret); + +void +set_all_sub_block_sizes(int total_size, int sub_block_size, int n_sub_blocks, struct sub_block sub_block[]); + +// find the index of the first sub block that contains the offset +// Returns the index if found, else returns -1 +int +get_sub_block_index(int n_sub_blocks, struct sub_block sub_block[], size_t offset); + +#include "workset.h" + +struct compress_work { + struct work base; + enum toku_compression_method method; + struct sub_block *sub_block; +}; + +void +compress_work_init(struct compress_work *w, enum toku_compression_method method, struct sub_block *sub_block); + +uint32_t +compress_nocrc_sub_block( + struct sub_block *sub_block, + void* sb_compressed_ptr, + uint32_t cs_bound, + enum toku_compression_method method + ); + +void +compress_sub_block(struct sub_block *sub_block, enum toku_compression_method method); + +void * +compress_worker(void *arg); + +size_t +compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool, enum toku_compression_method method); + +struct decompress_work { + struct work base; + void *compress_ptr; + void *uncompress_ptr; + uint32_t compress_size; + uint32_t uncompress_size; + uint32_t xsum; + int error; +}; + +// initialize the decompression work +void +decompress_work_init(struct decompress_work *dw, + void *compress_ptr, uint32_t compress_size, + void *uncompress_ptr, uint32_t uncompress_size, + uint32_t xsum); + +// decompress one block +int +decompress_sub_block(void *compress_ptr, uint32_t compress_size, void *uncompress_ptr, uint32_t uncompress_size, uint32_t expected_xsum); + +// decompress blocks until there is no more work to do +void * +decompress_worker(void *arg); + +// decompress all sub blocks from the compressed_data buffer to the uncompressed_data buffer +// Returns 0 if success, otherwise an error +int +decompress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], unsigned char *compressed_data, unsigned char *uncompressed_data, int num_cores, struct toku_thread_pool *pool); + +extern int verbose_decompress_sub_block; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/wbuf.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/wbuf.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/wbuf.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/wbuf.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,262 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include + +#include "portability/toku_htonl.h" + +#include "util/bytestring.h" +#include "util/x1764.h" + +/* When serializing a value, write it into a buffer. */ +/* This code requires that the buffer be big enough to hold whatever you put into it. */ +/* This abstraction doesn't do a good job of hiding its internals. + * Why? The performance of this code is important, and we want to inline stuff */ +//Why is size here an int instead of DISKOFF like in the initializer? +struct wbuf { + unsigned char *buf; + unsigned int size; + unsigned int ndone; + struct x1764 checksum; // The checksum state +}; + +static inline void wbuf_nocrc_init (struct wbuf *w, void *buf, unsigned int size) { + w->buf = (unsigned char *) buf; + w->size = size; + w->ndone = 0; +} + +static inline void wbuf_init (struct wbuf *w, void *buf, unsigned int size) { + wbuf_nocrc_init(w, buf, size); + toku_x1764_init(&w->checksum); +} + +static inline size_t wbuf_get_woffset(struct wbuf *w) { + return w->ndone; +} + +/* Write a character. */ +static inline void wbuf_nocrc_char (struct wbuf *w, unsigned char ch) { + assert(w->ndonesize); + w->buf[w->ndone++]=ch; +} + +/* Write a character. */ +static inline void wbuf_nocrc_uint8_t (struct wbuf *w, uint8_t ch) { + assert(w->ndonesize); + w->buf[w->ndone++]=ch; +} + +static inline void wbuf_char (struct wbuf *w, unsigned char ch) { + wbuf_nocrc_char (w, ch); + toku_x1764_add(&w->checksum, &w->buf[w->ndone-1], 1); +} + +//Write an int that MUST be in network order regardless of disk order +static void wbuf_network_int (struct wbuf *w, int32_t i) __attribute__((__unused__)); +static void wbuf_network_int (struct wbuf *w, int32_t i) { + assert(w->ndone + 4 <= w->size); + *(uint32_t*)(&w->buf[w->ndone]) = toku_htonl(i); + toku_x1764_add(&w->checksum, &w->buf[w->ndone], 4); + w->ndone += 4; +} + +static inline void wbuf_nocrc_int (struct wbuf *w, int32_t i) { +#if 0 + wbuf_nocrc_char(w, i>>24); + wbuf_nocrc_char(w, i>>16); + wbuf_nocrc_char(w, i>>8); + wbuf_nocrc_char(w, i>>0); +#else + assert(w->ndone + 4 <= w->size); + #if 0 + w->buf[w->ndone+0] = i>>24; + w->buf[w->ndone+1] = i>>16; + w->buf[w->ndone+2] = i>>8; + w->buf[w->ndone+3] = i>>0; + #else + *(uint32_t*)(&w->buf[w->ndone]) = toku_htod32(i); + #endif + w->ndone += 4; +#endif +} + +static inline void wbuf_int (struct wbuf *w, int32_t i) { + wbuf_nocrc_int(w, i); + toku_x1764_add(&w->checksum, &w->buf[w->ndone-4], 4); +} + +static inline void wbuf_nocrc_uint (struct wbuf *w, uint32_t i) { + wbuf_nocrc_int(w, (int32_t)i); +} + +static inline void wbuf_uint (struct wbuf *w, uint32_t i) { + wbuf_int(w, (int32_t)i); +} + +static inline uint8_t* wbuf_nocrc_reserve_literal_bytes(struct wbuf *w, uint32_t nbytes) { + assert(w->ndone + nbytes <= w->size); + uint8_t * dest = w->buf + w->ndone; + w->ndone += nbytes; + return dest; +} + +static inline void wbuf_nocrc_literal_bytes(struct wbuf *w, const void *bytes_bv, uint32_t nbytes) { + const unsigned char *bytes = (const unsigned char *) bytes_bv; +#if 0 + { int i; for (i=0; indone + nbytes <= w->size); + memcpy(w->buf + w->ndone, bytes, (size_t)nbytes); + w->ndone += nbytes; +#endif +} + +static inline void wbuf_literal_bytes(struct wbuf *w, const void *bytes_bv, uint32_t nbytes) { + wbuf_nocrc_literal_bytes(w, bytes_bv, nbytes); + toku_x1764_add(&w->checksum, &w->buf[w->ndone-nbytes], nbytes); +} + +static void wbuf_nocrc_bytes (struct wbuf *w, const void *bytes_bv, uint32_t nbytes) { + wbuf_nocrc_uint(w, nbytes); + wbuf_nocrc_literal_bytes(w, bytes_bv, nbytes); +} + +static void wbuf_bytes (struct wbuf *w, const void *bytes_bv, uint32_t nbytes) { + wbuf_uint(w, nbytes); + wbuf_literal_bytes(w, bytes_bv, nbytes); +} + +static void wbuf_nocrc_ulonglong (struct wbuf *w, uint64_t ull) { + wbuf_nocrc_uint(w, (uint32_t)(ull>>32)); + wbuf_nocrc_uint(w, (uint32_t)(ull&0xFFFFFFFF)); +} + +static void wbuf_ulonglong (struct wbuf *w, uint64_t ull) { + wbuf_uint(w, (uint32_t)(ull>>32)); + wbuf_uint(w, (uint32_t)(ull&0xFFFFFFFF)); +} + +static inline void wbuf_nocrc_uint64_t(struct wbuf *w, uint64_t ull) { + wbuf_nocrc_ulonglong(w, ull); +} + + +static inline void wbuf_uint64_t(struct wbuf *w, uint64_t ull) { + wbuf_ulonglong(w, ull); +} + +static inline void wbuf_nocrc_bool (struct wbuf *w, bool b) { + wbuf_nocrc_uint8_t(w, (uint8_t)(b ? 1 : 0)); +} + +static inline void wbuf_nocrc_BYTESTRING (struct wbuf *w, BYTESTRING v) { + wbuf_nocrc_bytes(w, v.data, v.len); +} + +static inline void wbuf_BYTESTRING (struct wbuf *w, BYTESTRING v) { + wbuf_bytes(w, v.data, v.len); +} + +static inline void wbuf_uint8_t (struct wbuf *w, uint8_t v) { + wbuf_char(w, v); +} + +static inline void wbuf_nocrc_uint32_t (struct wbuf *w, uint32_t v) { + wbuf_nocrc_uint(w, v); +} + +static inline void wbuf_uint32_t (struct wbuf *w, uint32_t v) { + wbuf_uint(w, v); +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/workset.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/workset.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/serialize/workset.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/serialize/workset.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,189 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include + +// The work struct is the base class for work to be done by some threads +struct work { + struct toku_list next; +}; + +// The workset struct contains the set of work to be done by some threads +struct workset { + toku_mutex_t lock; + struct toku_list worklist; // a list of work + int refs; // number of workers that have a reference on the workset + toku_cond_t worker_wait; // a condition variable used to wait for all of the worker to release their reference on the workset +}; + +static inline void +workset_init(struct workset *ws) { + toku_mutex_init(&ws->lock, NULL); + toku_list_init(&ws->worklist); + ws->refs = 1; // the calling thread gets a reference + toku_cond_init(&ws->worker_wait, NULL); +} + +static inline void +workset_destroy(struct workset *ws) { + invariant(toku_list_empty(&ws->worklist)); + toku_cond_destroy(&ws->worker_wait); + toku_mutex_destroy(&ws->lock); +} + +static inline void +workset_lock(struct workset *ws) { + toku_mutex_lock(&ws->lock); +} + +static inline void +workset_unlock(struct workset *ws) { + toku_mutex_unlock(&ws->lock); +} + +// Put work in the workset. Assume the workset is already locked. +static inline void +workset_put_locked(struct workset *ws, struct work *w) { + toku_list_push(&ws->worklist, &w->next); +} + +// Put work in the workset +static inline void +workset_put(struct workset *ws, struct work *w) { + workset_lock(ws); + workset_put_locked(ws, w); + workset_unlock(ws); +} + +// Get work from the workset +static inline struct work * +workset_get(struct workset *ws) { + workset_lock(ws); + struct work *w = NULL; + if (!toku_list_empty(&ws->worklist)) { + struct toku_list *l = toku_list_pop_head(&ws->worklist); + w = toku_list_struct(l, struct work, next); + } + workset_unlock(ws); + return w; +} + +// Add references to the workset +static inline void +workset_add_ref(struct workset *ws, int refs) { + workset_lock(ws); + ws->refs += refs; + workset_unlock(ws); +} + +// Release a reference on the workset +static inline void +workset_release_ref(struct workset *ws) { + workset_lock(ws); + if (--ws->refs == 0) { + toku_cond_broadcast(&ws->worker_wait); + } + workset_unlock(ws); +} + +// Wait until all of the worker threads have released their reference on the workset +static inline void +workset_join(struct workset *ws) { + workset_lock(ws); + while (ws->refs != 0) { + toku_cond_wait(&ws->worker_wait, &ws->lock); + } + workset_unlock(ws); +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/sub_block.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/sub_block.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/sub_block.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/sub_block.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,442 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "compress.h" -#include "sub_block.h" -#include "quicklz.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -SUB_BLOCK sub_block_creat(void) { - SUB_BLOCK XMALLOC(sb); - sub_block_init(sb); - return sb; -} -void sub_block_init(SUB_BLOCK sub_block) { - sub_block->uncompressed_ptr = 0; - sub_block->uncompressed_size = 0; - - sub_block->compressed_ptr = 0; - sub_block->compressed_size_bound = 0; - sub_block->compressed_size = 0; - - sub_block->xsum = 0; -} - -// get the size of the compression header -size_t -sub_block_header_size(int n_sub_blocks) { - return sizeof (uint32_t) + n_sub_blocks * sizeof (struct stored_sub_block); -} - -void -set_compressed_size_bound(struct sub_block *se, enum toku_compression_method method) { - se->compressed_size_bound = toku_compress_bound(method, se->uncompressed_size); -} - -// get the sum of the sub block compressed sizes -size_t -get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[], enum toku_compression_method method) { - size_t compressed_size_bound = 0; - for (int i = 0; i < n_sub_blocks; i++) { - sub_block[i].compressed_size_bound = toku_compress_bound(method, sub_block[i].uncompressed_size); - compressed_size_bound += sub_block[i].compressed_size_bound; - } - return compressed_size_bound; -} - -// get the sum of the sub block uncompressed sizes -size_t -get_sum_uncompressed_size(int n_sub_blocks, struct sub_block sub_block[]) { - size_t uncompressed_size = 0; - for (int i = 0; i < n_sub_blocks; i++) - uncompressed_size += sub_block[i].uncompressed_size; - return uncompressed_size; -} - -// round up n -static inline int -alignup32(int a, int b) { - return ((a+b-1) / b) * b; -} - -// Choose n_sub_blocks and sub_block_size such that the product is >= total_size and the sub_block_size is at -// least >= the target_sub_block_size. -int -choose_sub_block_size(int total_size, int n_sub_blocks_limit, int *sub_block_size_ret, int *n_sub_blocks_ret) { - if (total_size < 0 || n_sub_blocks_limit < 1) - return EINVAL; - - const int alignment = 32; - - int n_sub_blocks, sub_block_size; - n_sub_blocks = total_size / target_sub_block_size; - if (n_sub_blocks <= 1) { - if (total_size > 0 && n_sub_blocks_limit > 0) - n_sub_blocks = 1; - sub_block_size = total_size; - } else { - if (n_sub_blocks > n_sub_blocks_limit) // limit the number of sub-blocks - n_sub_blocks = n_sub_blocks_limit; - sub_block_size = alignup32(total_size / n_sub_blocks, alignment); - while (sub_block_size * n_sub_blocks < total_size) // round up the sub-block size until big enough - sub_block_size += alignment; - } - - *sub_block_size_ret = sub_block_size; - *n_sub_blocks_ret = n_sub_blocks; - - return 0; -} - -// Choose the right size of basement nodes. For now, just align up to -// 256k blocks and hope it compresses well enough. -int -choose_basement_node_size(int total_size, int *sub_block_size_ret, int *n_sub_blocks_ret) { - if (total_size < 0) - return EINVAL; - - *n_sub_blocks_ret = (total_size + max_basement_node_uncompressed_size - 1) / max_basement_node_uncompressed_size; - *sub_block_size_ret = max_basement_node_uncompressed_size; - - return 0; -} - -void -set_all_sub_block_sizes(int total_size, int sub_block_size, int n_sub_blocks, struct sub_block sub_block[]) { - int size_left = total_size; - int i; - for (i = 0; i < n_sub_blocks-1; i++) { - sub_block[i].uncompressed_size = sub_block_size; - size_left -= sub_block_size; - } - if (i == 0 || size_left > 0) - sub_block[i].uncompressed_size = size_left; -} - -// find the index of the first sub block that contains offset -// Returns the sub block index, else returns -1 -int -get_sub_block_index(int n_sub_blocks, struct sub_block sub_block[], size_t offset) { - size_t start_offset = 0; - for (int i = 0; i < n_sub_blocks; i++) { - size_t size = sub_block[i].uncompressed_size; - if (offset < start_offset + size) - return i; - start_offset += size; - } - return -1; -} - -#include "workset.h" - -void -compress_work_init(struct compress_work *w, enum toku_compression_method method, struct sub_block *sub_block) { - w->method = method; - w->sub_block = sub_block; -} - -// -// takes the uncompressed contents of sub_block -// and compresses them into sb_compressed_ptr -// cs_bound is the compressed size bound -// Returns the size of the compressed data -// -uint32_t -compress_nocrc_sub_block( - struct sub_block *sub_block, - void* sb_compressed_ptr, - uint32_t cs_bound, - enum toku_compression_method method - ) -{ - // compress it - Bytef *uncompressed_ptr = (Bytef *) sub_block->uncompressed_ptr; - Bytef *compressed_ptr = (Bytef *) sb_compressed_ptr; - uLongf uncompressed_len = sub_block->uncompressed_size; - uLongf real_compressed_len = cs_bound; - toku_compress(method, - compressed_ptr, &real_compressed_len, - uncompressed_ptr, uncompressed_len); - return real_compressed_len; -} - -void -compress_sub_block(struct sub_block *sub_block, enum toku_compression_method method) { - sub_block->compressed_size = compress_nocrc_sub_block( - sub_block, - sub_block->compressed_ptr, - sub_block->compressed_size_bound, - method - ); - // checksum it - sub_block->xsum = toku_x1764_memory(sub_block->compressed_ptr, sub_block->compressed_size); -} - -void * -compress_worker(void *arg) { - struct workset *ws = (struct workset *) arg; - while (1) { - struct compress_work *w = (struct compress_work *) workset_get(ws); - if (w == NULL) - break; - compress_sub_block(w->sub_block, w->method); - } - workset_release_ref(ws); - return arg; -} - -size_t -compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool, enum toku_compression_method method) { - char *compressed_base_ptr = compressed_ptr; - size_t compressed_len; - - // This is a complex way to write a parallel loop. Cilk would be better. - - if (n_sub_blocks == 1) { - // single sub-block - sub_block[0].uncompressed_ptr = uncompressed_ptr; - sub_block[0].compressed_ptr = compressed_ptr; - compress_sub_block(&sub_block[0], method); - compressed_len = sub_block[0].compressed_size; - } else { - // multiple sub-blocks - int T = num_cores; // T = min(num_cores, n_sub_blocks) - 1 - if (T > n_sub_blocks) - T = n_sub_blocks; - if (T > 0) - T = T - 1; // threads in addition to the running thread - - struct workset ws; - ZERO_STRUCT(ws); - workset_init(&ws); - - struct compress_work work[n_sub_blocks]; - workset_lock(&ws); - for (int i = 0; i < n_sub_blocks; i++) { - sub_block[i].uncompressed_ptr = uncompressed_ptr; - sub_block[i].compressed_ptr = compressed_ptr; - compress_work_init(&work[i], method, &sub_block[i]); - workset_put_locked(&ws, &work[i].base); - uncompressed_ptr += sub_block[i].uncompressed_size; - compressed_ptr += sub_block[i].compressed_size_bound; - } - workset_unlock(&ws); - - // compress the sub-blocks - if (0) printf("%s:%d T=%d N=%d\n", __FUNCTION__, __LINE__, T, n_sub_blocks); - toku_thread_pool_run(pool, 0, &T, compress_worker, &ws); - workset_add_ref(&ws, T); - compress_worker(&ws); - - // wait for all of the work to complete - workset_join(&ws); - workset_destroy(&ws); - - // squeeze out the holes not used by the compress bound - compressed_ptr = compressed_base_ptr + sub_block[0].compressed_size; - for (int i = 1; i < n_sub_blocks; i++) { - memmove(compressed_ptr, sub_block[i].compressed_ptr, sub_block[i].compressed_size); - compressed_ptr += sub_block[i].compressed_size; - } - - compressed_len = compressed_ptr - compressed_base_ptr; - } - return compressed_len; -} - -// initialize the decompression work -void -decompress_work_init(struct decompress_work *dw, - void *compress_ptr, uint32_t compress_size, - void *uncompress_ptr, uint32_t uncompress_size, - uint32_t xsum) { - dw->compress_ptr = compress_ptr; - dw->compress_size = compress_size; - dw->uncompress_ptr = uncompress_ptr; - dw->uncompress_size = uncompress_size; - dw->xsum = xsum; - dw->error = 0; -} - -int verbose_decompress_sub_block = 1; - -// decompress one block -int -decompress_sub_block(void *compress_ptr, uint32_t compress_size, void *uncompress_ptr, uint32_t uncompress_size, uint32_t expected_xsum) { - int result = 0; - - // verify checksum - uint32_t xsum = toku_x1764_memory(compress_ptr, compress_size); - if (xsum != expected_xsum) { - if (verbose_decompress_sub_block) fprintf(stderr, "%s:%d xsum %u expected %u\n", __FUNCTION__, __LINE__, xsum, expected_xsum); - result = EINVAL; - } else { - // decompress - toku_decompress((Bytef *) uncompress_ptr, uncompress_size, (Bytef *) compress_ptr, compress_size); - } - return result; -} - -// decompress blocks until there is no more work to do -void * -decompress_worker(void *arg) { - struct workset *ws = (struct workset *) arg; - while (1) { - struct decompress_work *dw = (struct decompress_work *) workset_get(ws); - if (dw == NULL) - break; - dw->error = decompress_sub_block(dw->compress_ptr, dw->compress_size, dw->uncompress_ptr, dw->uncompress_size, dw->xsum); - } - workset_release_ref(ws); - return arg; -} - -int -decompress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], unsigned char *compressed_data, unsigned char *uncompressed_data, int num_cores, struct toku_thread_pool *pool) { - int r; - - if (n_sub_blocks == 1) { - r = decompress_sub_block(compressed_data, sub_block[0].compressed_size, uncompressed_data, sub_block[0].uncompressed_size, sub_block[0].xsum); - } else { - // compute the number of additional threads needed for decompressing this node - int T = num_cores; // T = min(#cores, #blocks) - 1 - if (T > n_sub_blocks) - T = n_sub_blocks; - if (T > 0) - T = T - 1; // threads in addition to the running thread - - // init the decompression work set - struct workset ws; - ZERO_STRUCT(ws); - workset_init(&ws); - - // initialize the decompression work and add to the work set - struct decompress_work decompress_work[n_sub_blocks]; - workset_lock(&ws); - for (int i = 0; i < n_sub_blocks; i++) { - decompress_work_init(&decompress_work[i], compressed_data, sub_block[i].compressed_size, uncompressed_data, sub_block[i].uncompressed_size, sub_block[i].xsum); - workset_put_locked(&ws, &decompress_work[i].base); - - uncompressed_data += sub_block[i].uncompressed_size; - compressed_data += sub_block[i].compressed_size; - } - workset_unlock(&ws); - - // decompress the sub-blocks - if (0) printf("%s:%d Cores=%d Blocks=%d T=%d\n", __FUNCTION__, __LINE__, num_cores, n_sub_blocks, T); - toku_thread_pool_run(pool, 0, &T, decompress_worker, &ws); - workset_add_ref(&ws, T); - decompress_worker(&ws); - - // cleanup - workset_join(&ws); - workset_destroy(&ws); - - r = 0; - for (int i = 0; i < n_sub_blocks; i++) { - r = decompress_work[i].error; - if (r != 0) - break; - } - } - - return r; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/sub_block.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/sub_block.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/sub_block.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/sub_block.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,217 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_SUB_BLOCK_H -#define TOKU_SUB_BLOCK_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "compress.h" -#include "fttypes.h" - - -static const int max_sub_blocks = 8; -static const int target_sub_block_size = 512*1024; -static const int max_basement_nodes = 32; -static const int max_basement_node_uncompressed_size = 256*1024; -static const int max_basement_node_compressed_size = 64*1024; - -struct sub_block { - void *uncompressed_ptr; - uint32_t uncompressed_size; - - void *compressed_ptr; - uint32_t compressed_size; // real compressed size - uint32_t compressed_size_bound; // estimated compressed size - - uint32_t xsum; // sub block checksum -}; - -struct stored_sub_block { - uint32_t uncompressed_size; - uint32_t compressed_size; - uint32_t xsum; -}; - -void sub_block_init(SUB_BLOCK); -SUB_BLOCK sub_block_creat(void); - -// get the size of the compression header -size_t -sub_block_header_size(int n_sub_blocks); - -void -set_compressed_size_bound(struct sub_block *se, enum toku_compression_method method); - -// get the sum of the sub block compressed bound sizes -size_t -get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[], enum toku_compression_method method); - -// get the sum of the sub block uncompressed sizes -size_t -get_sum_uncompressed_size(int n_sub_blocks, struct sub_block sub_block[]); - -// Choose n_sub_blocks and sub_block_size such that the product is >= total_size and the sub_block_size is at -// least >= the target_sub_block_size. -int -choose_sub_block_size(int total_size, int n_sub_blocks_limit, int *sub_block_size_ret, int *n_sub_blocks_ret); - -int -choose_basement_node_size(int total_size, int *sub_block_size_ret, int *n_sub_blocks_ret); - -void -set_all_sub_block_sizes(int total_size, int sub_block_size, int n_sub_blocks, struct sub_block sub_block[]); - -// find the index of the first sub block that contains the offset -// Returns the index if found, else returns -1 -int -get_sub_block_index(int n_sub_blocks, struct sub_block sub_block[], size_t offset); - -#include "workset.h" - -struct compress_work { - struct work base; - enum toku_compression_method method; - struct sub_block *sub_block; -}; - -void -compress_work_init(struct compress_work *w, enum toku_compression_method method, struct sub_block *sub_block); - -uint32_t -compress_nocrc_sub_block( - struct sub_block *sub_block, - void* sb_compressed_ptr, - uint32_t cs_bound, - enum toku_compression_method method - ); - -void -compress_sub_block(struct sub_block *sub_block, enum toku_compression_method method); - -void * -compress_worker(void *arg); - -size_t -compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool, enum toku_compression_method method); - -struct decompress_work { - struct work base; - void *compress_ptr; - void *uncompress_ptr; - uint32_t compress_size; - uint32_t uncompress_size; - uint32_t xsum; - int error; -}; - -// initialize the decompression work -void -decompress_work_init(struct decompress_work *dw, - void *compress_ptr, uint32_t compress_size, - void *uncompress_ptr, uint32_t uncompress_size, - uint32_t xsum); - -// decompress one block -int -decompress_sub_block(void *compress_ptr, uint32_t compress_size, void *uncompress_ptr, uint32_t uncompress_size, uint32_t expected_xsum); - -// decompress blocks until there is no more work to do -void * -decompress_worker(void *arg); - -// decompress all sub blocks from the compressed_data buffer to the uncompressed_data buffer -// Returns 0 if success, otherwise an error -int -decompress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], unsigned char *compressed_data, unsigned char *uncompressed_data, int num_cores, struct toku_thread_pool *pool); - -extern int verbose_decompress_sub_block; - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/sub_block_map.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/sub_block_map.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/sub_block_map.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/sub_block_map.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,127 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef _TOKU_SUB_BLOCK_MAP_H -#define _TOKU_SUB_BLOCK_MAP_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -// Map objects to a sequence of sub block -struct sub_block_map { - uint32_t idx; - uint32_t offset; - uint32_t size; -}; - -enum { - stored_sub_block_map_size = sizeof (struct sub_block_map), // size of a sub-block map on disk -}; - -static inline void -sub_block_map_init(struct sub_block_map *sbmap, uint32_t idx, uint32_t offset, uint32_t size) { - sbmap->idx = idx; - sbmap->offset = offset; - sbmap->size = size; -} - -static inline void -sub_block_map_serialize(struct sub_block_map *sbmap, struct wbuf *wbuf) { - wbuf_nocrc_int(wbuf, sbmap->idx); - wbuf_nocrc_int(wbuf, sbmap->offset); - wbuf_nocrc_int(wbuf, sbmap->size); -} - -static inline void -sub_block_map_deserialize(struct sub_block_map *sbmap, struct rbuf *rbuf) { - sbmap->idx = rbuf_int(rbuf); - sbmap->offset = rbuf_int(rbuf); - sbmap->size = rbuf_int(rbuf); -} - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tdb_logprint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tdb_logprint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tdb_logprint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tdb_logprint.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,125 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -/* Dump the log from stdin to stdout. */ - -#include - -static void newmain (int count) { - int i; - uint32_t version; - int r = toku_read_and_print_logmagic(stdin, &version); - for (i=0; i!=count; i++) { - r = toku_logprint_one_record(stdout, stdin); - if (r==EOF) break; - if (r!=0) { - fflush(stdout); - fprintf(stderr, "Problem in log err=%d\n", r); - exit(1); - } - } -} - -int main (int argc, char *const argv[]) { - int count=-1; - while (argc>1) { - if (strcmp(argv[1], "--oldcode")==0) { - fprintf(stderr,"Old code no longer works.\n"); - exit(1); - } else { - count = atoi(argv[1]); - } - argc--; argv++; - } - newmain(count); - return 0; -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tdb-recover.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tdb-recover.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tdb-recover.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tdb-recover.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,136 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -/* Recover an env. The logs are in argv[1]. The new database is created in the cwd. */ - -// Test: -// cd ../src/tests/tmpdir -// ../../../ft/recover ../dir.test_log2.c.tdb - -#include "ft-ops.h" -#include "recover.h" - -static int recovery_main(int argc, const char *const argv[]); - -int -main(int argc, const char *const argv[]) { - { - int rr = toku_ft_layer_init(); - assert(rr==0); - } - int r = recovery_main(argc, argv); - toku_ft_layer_destroy(); - return r; -} - -int recovery_main (int argc, const char *const argv[]) { - const char *data_dir, *log_dir; - if (argc==3) { - data_dir = argv[1]; - log_dir = argv[2]; - } else if (argc==2) { - data_dir = log_dir = argv[1]; - } else { - printf("Usage: %s [ ]\n", argv[0]); - return(1); - } - - int r = tokudb_recover(NULL, - NULL_prepared_txn_callback, - NULL_keep_cachetable_callback, - NULL_logger, - data_dir, log_dir, NULL, NULL, NULL, NULL, 0); - if (r!=0) { - fprintf(stderr, "Recovery failed\n"); - return(1); - } - return 0; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/benchmark-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/benchmark-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/benchmark-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/benchmark-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,8 +119,8 @@ static void setup (void) { int r; unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); - r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, NULL_TXN, toku_builtin_compare_fun); assert(r==0); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, nullptr, toku_builtin_compare_fun); assert(r==0); } static void toku_shutdown (void) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/block_allocator_strategy_test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/block_allocator_strategy_test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/block_allocator_strategy_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/block_allocator_strategy_test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,176 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include "ft/tests/test.h" + +#include "ft/serialize/block_allocator_strategy.h" + +static const uint64_t alignment = 4096; + +static void test_first_vs_best_fit(void) { + struct block_allocator::blockpair pairs[] = { + block_allocator::blockpair(1 * alignment, 6 * alignment), + // hole between 7x align -> 8x align + block_allocator::blockpair(8 * alignment, 4 * alignment), + // hole between 12x align -> 16x align + block_allocator::blockpair(16 * alignment, 1 * alignment), + block_allocator::blockpair(17 * alignment, 2 * alignment), + // hole between 19 align -> 21x align + block_allocator::blockpair(21 * alignment, 2 * alignment), + }; + const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]); + + block_allocator::blockpair *bp; + + // first fit + bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment); + assert(bp == &pairs[0]); + bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment); + assert(bp == &pairs[0]); + bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment); + assert(bp == &pairs[1]); + bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment); + assert(bp == nullptr); + + // best fit + bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment); + assert(bp == &pairs[0]); + bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment); + assert(bp == &pairs[3]); + bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment); + assert(bp == &pairs[1]); + bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment); + assert(bp == nullptr); +} + +static void test_padded_fit(void) { + struct block_allocator::blockpair pairs[] = { + block_allocator::blockpair(1 * alignment, 1 * alignment), + // 4096 byte hole after bp[0] + block_allocator::blockpair(3 * alignment, 1 * alignment), + // 8192 byte hole after bp[1] + block_allocator::blockpair(6 * alignment, 1 * alignment), + // 16384 byte hole after bp[2] + block_allocator::blockpair(11 * alignment, 1 * alignment), + // 32768 byte hole after bp[3] + block_allocator::blockpair(17 * alignment, 1 * alignment), + // 116kb hole after bp[4] + block_allocator::blockpair(113 * alignment, 1 * alignment), + // 256kb hole after bp[5] + block_allocator::blockpair(371 * alignment, 1 * alignment), + }; + const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]); + + block_allocator::blockpair *bp; + + // padding for a 100 byte allocation will be < than standard alignment, + // so it should fit in the first 4096 byte hole. + bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment); + assert(bp == &pairs[0]); + + // Even padded, a 12kb alloc will fit in a 16kb hole + bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment); + assert(bp == &pairs[2]); + + // would normally fit in the 116kb hole but the padding will bring it over + bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment); + assert(bp == &pairs[5]); + + bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment); + assert(bp == &pairs[5]); +} + +int test_main(int argc, const char *argv[]) { + (void) argc; + (void) argv; + + test_first_vs_best_fit(); + test_padded_fit(); + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/block_allocator_test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/block_allocator_test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/block_allocator_test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/block_allocator_test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,75 +88,48 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." - #include "test.h" -static void ba_alloc_at (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t offset) { - block_allocator_validate(ba); - block_allocator_alloc_block_at(ba, size*512, offset*512); - block_allocator_validate(ba); -} - -static void ba_alloc (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t *answer) { - block_allocator_validate(ba); +static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) { + ba->validate(); uint64_t actual_answer; - block_allocator_alloc_block(ba, 512*size, &actual_answer); - block_allocator_validate(ba); + const uint64_t heat = random() % 2; + ba->alloc_block(512 * size, heat, &actual_answer); + ba->validate(); + assert(actual_answer%512==0); *answer = actual_answer/512; } -static void ba_free (BLOCK_ALLOCATOR ba, uint64_t offset) { - block_allocator_validate(ba); - block_allocator_free_block(ba, offset*512); - block_allocator_validate(ba); +static void ba_free(block_allocator *ba, uint64_t offset) { + ba->validate(); + ba->free_block(offset * 512); + ba->validate(); } -static void -ba_check_l (BLOCK_ALLOCATOR ba, uint64_t blocknum_in_layout_order, uint64_t expected_offset, uint64_t expected_size) -{ +static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order, + uint64_t expected_offset, uint64_t expected_size) { uint64_t actual_offset, actual_size; - int r = block_allocator_get_nth_block_in_layout_order(ba, blocknum_in_layout_order, &actual_offset, &actual_size); + int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size); assert(r==0); assert(expected_offset*512 == actual_offset); assert(expected_size *512 == actual_size); } -static void -ba_check_none (BLOCK_ALLOCATOR ba, uint64_t blocknum_in_layout_order) -{ +static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) { uint64_t actual_offset, actual_size; - int r = block_allocator_get_nth_block_in_layout_order(ba, blocknum_in_layout_order, &actual_offset, &actual_size); + int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size); assert(r==-1); } // Simple block allocator test -static void -test_ba0 (void) { - BLOCK_ALLOCATOR ba; - uint64_t b0, b1; - create_block_allocator(&ba, 100*512, 1*512); - assert(block_allocator_allocated_limit(ba)==100*512); - ba_alloc_at(ba, 50, 100); - assert(block_allocator_allocated_limit(ba)==150*512); - ba_alloc_at(ba, 25, 150); - ba_alloc (ba, 10, &b0); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, 100, 50); - ba_check_l (ba, 2, 150, 25); - ba_check_l (ba, 3, b0, 10); - ba_check_none (ba, 4); - assert(b0==175); - ba_free(ba, 150); - ba_alloc_at(ba, 10, 150); - ba_alloc(ba, 10, &b0); - assert(b0==160); - ba_alloc(ba, 10, &b0); - ba_alloc(ba, 113, &b1); - assert(113*512==block_allocator_block_size(ba, b1 *512)); - assert(10 *512==block_allocator_block_size(ba, b0 *512)); - assert(50 *512==block_allocator_block_size(ba, 100*512)); +static void test_ba0(block_allocator::allocation_strategy strategy) { + block_allocator allocator; + block_allocator *ba = &allocator; + ba->create(100*512, 1*512); + ba->set_strategy(strategy); + assert(ba->allocated_limit()==100*512); uint64_t b2, b3, b4, b5, b6, b7; ba_alloc(ba, 100, &b2); @@ -183,27 +156,28 @@ ba_free(ba, b4); ba_alloc(ba, 100, &b4); - destroy_block_allocator(&ba); - assert(ba==0); + ba->destroy(); } // Manually to get coverage of all the code in the block allocator. static void -test_ba1 (int n_initial) { - BLOCK_ALLOCATOR ba; - create_block_allocator(&ba, 0*512, 1*512); - int i; +test_ba1(block_allocator::allocation_strategy strategy, int n_initial) { + block_allocator allocator; + block_allocator *ba = &allocator; + ba->create(0*512, 1*512); + ba->set_strategy(strategy); + int n_blocks=0; uint64_t blocks[1000]; - for (i=0; i<1000; i++) { - if (i0) { + if (n_blocks > 0) { int blocknum = random()%n_blocks; //printf("F[%d]%ld\n", blocknum, blocks[blocknum]); ba_free(ba, blocks[blocknum]); @@ -213,19 +187,21 @@ } } - destroy_block_allocator(&ba); - assert(ba==0); + ba->destroy(); } // Check to see if it is first fit or best fit. static void test_ba2 (void) { - BLOCK_ALLOCATOR ba; + block_allocator allocator; + block_allocator *ba = &allocator; uint64_t b[6]; enum { BSIZE = 1024 }; - create_block_allocator(&ba, 100*512, BSIZE*512); - assert(block_allocator_allocated_limit(ba)==100*512); + ba->create(100*512, BSIZE*512); + ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT); + assert(ba->allocated_limit()==100*512); + ba_check_l (ba, 0, 0, 100); ba_check_none (ba, 1); @@ -234,16 +210,16 @@ ba_check_l (ba, 1, BSIZE, 100); ba_check_none (ba, 2); - ba_alloc (ba, BSIZE+100, &b[1]); + ba_alloc (ba, BSIZE + 100, &b[1]); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_none (ba, 3); ba_alloc (ba, 100, &b[2]); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 4*BSIZE, 100); ba_check_none (ba, 4); @@ -252,7 +228,7 @@ ba_alloc (ba, 100, &b[5]); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 4*BSIZE, 100); ba_check_l (ba, 4, 5*BSIZE, 100); ba_check_l (ba, 5, 6*BSIZE, 100); @@ -262,7 +238,7 @@ ba_free (ba, 4*BSIZE); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 5*BSIZE, 100); ba_check_l (ba, 4, 6*BSIZE, 100); ba_check_l (ba, 5, 7*BSIZE, 100); @@ -273,7 +249,7 @@ assert(b2==4*BSIZE); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 4*BSIZE, 100); ba_check_l (ba, 4, 5*BSIZE, 100); ba_check_l (ba, 5, 6*BSIZE, 100); @@ -283,7 +259,7 @@ ba_free (ba, BSIZE); ba_free (ba, 5*BSIZE); ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 1, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 2, 4*BSIZE, 100); ba_check_l (ba, 3, 6*BSIZE, 100); ba_check_l (ba, 4, 7*BSIZE, 100); @@ -301,7 +277,7 @@ assert(b5==5*BSIZE); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 4*BSIZE, 100); ba_check_l (ba, 4, 5*BSIZE, 100); ba_check_l (ba, 5, 6*BSIZE, 100); @@ -318,7 +294,7 @@ assert(b8==10*BSIZE); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 4*BSIZE, 100); ba_check_l (ba, 4, 5*BSIZE, 100); ba_check_l (ba, 5, 6*BSIZE, 100); @@ -344,15 +320,23 @@ ba_alloc(ba, 100, &b11); assert(b11==5*BSIZE); - destroy_block_allocator(&ba); + ba->destroy(); } int test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { - test_ba0(); - test_ba1(0); - test_ba1(10); - test_ba1(20); + enum block_allocator::allocation_strategy strategies[] = { + block_allocator::BA_STRATEGY_FIRST_FIT, + block_allocator::BA_STRATEGY_BEST_FIT, + block_allocator::BA_STRATEGY_PADDED_FIT, + block_allocator::BA_STRATEGY_HEAT_ZONE, + }; + for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) { + test_ba0(strategies[i]); + test_ba1(strategies[i], 0); + test_ba1(strategies[i], 10); + test_ba1(strategies[i], 20); + } test_ba2(); return 0; } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/bnc-insert-benchmark.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/bnc-insert-benchmark.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/bnc-insert-benchmark.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/bnc-insert-benchmark.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -127,9 +127,9 @@ *p = (rand() & 0xff); } } - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; - int r = xids_create_child(xids_0, &xids_123, (TXNID)123); + int r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); NONLEAF_CHILDINFO bnc; @@ -137,6 +137,9 @@ struct timeval t[2]; gettimeofday(&t[0], NULL); + toku::comparator cmp; + cmp.create(long_key_cmp, nullptr); + for (unsigned int i = 0; i < repeat; ++i) { bnc = toku_create_empty_nl(); for (; toku_bnc_nbytesinbuf(bnc) <= nodesize; ++cur) { @@ -144,7 +147,7 @@ &keys[cur % 1024], sizeof keys[cur % 1024], vals[cur % 1024], eltsize - (sizeof keys[cur % 1024]), FT_NONE, next_dummymsn(), xids_123, true, - NULL, long_key_cmp); assert_zero(r); + cmp); assert_zero(r); } nbytesinserted += toku_bnc_nbytesinbuf(bnc); destroy_nonleaf_childinfo(bnc); @@ -157,6 +160,8 @@ long long unsigned eltrate = (long) (cur / dt); printf("%0.03lf MB/sec\n", mbrate); printf("%llu elts/sec\n", eltrate); + + cmp.destroy(); } int diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-4357.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-4357.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-4357.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-4357.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -115,7 +115,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-4365.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-4365.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-4365.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-4365.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -136,7 +136,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-5097.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-5097.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-5097.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-5097.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -169,7 +169,7 @@ check_flush = false; dirty_flush_called = false; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); evictor_test_helpers::disable_ev_thread(&ct->ev); // disable eviction thread toku_os_recursive_delete(TOKU_TEST_FILENAME); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-5978-2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-5978-2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-5978-2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-5978-2.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -210,7 +210,7 @@ int r; toku_pair_list_set_lock_size(2); // set two bucket mutexes CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-5978.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-5978.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-5978.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-5978.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -227,7 +227,7 @@ int r; toku_pair_list_set_lock_size(2); // set two bucket mutexes CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-all-write.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-all-write.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-all-write.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-all-write.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,7 +116,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-checkpointer-class.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-checkpointer-class.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-checkpointer-class.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-checkpointer-class.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -#include "cachetable-internal.h" +#include "cachetable/cachetable-internal.h" #include "cachetable-test.h" // @@ -112,6 +112,14 @@ uint32_t k); }; +static void init_cachefile(CACHEFILE cf, int which_cf, bool for_checkpoint) { + memset(cf, 0, sizeof(*cf)); + create_dummy_functions(cf); + cf->fileid = { 0, (unsigned) which_cf }; + cf->filenum = { (unsigned) which_cf }; + cf->for_checkpoint = for_checkpoint; +} + //------------------------------------------------------------------------------ // test_begin_checkpoint() - // @@ -135,33 +143,28 @@ // 2. Call checkpoint with ONE cachefile. //cachefile cf; struct cachefile cf; - cf.next = NULL; - cf.for_checkpoint = false; - m_cp.m_cf_list->m_active_head = &cf; - create_dummy_functions(&cf); + init_cachefile(&cf, 0, false); + m_cp.m_cf_list->add_cf_unlocked(&cf); m_cp.begin_checkpoint(); assert(m_cp.m_checkpoint_num_files == 1); assert(cf.for_checkpoint == true); + m_cp.m_cf_list->remove_cf(&cf); // 3. Call checkpoint with MANY cachefiles. const uint32_t count = 3; struct cachefile cfs[count]; - m_cp.m_cf_list->m_active_head = &cfs[0]; for (uint32_t i = 0; i < count; ++i) { - cfs[i].for_checkpoint = false; + init_cachefile(&cfs[i], i, false); create_dummy_functions(&cfs[i]); - if (i == count - 1) { - cfs[i].next = NULL; - } else { - cfs[i].next = &cfs[i + 1]; - } + m_cp.m_cf_list->add_cf_unlocked(&cfs[i]); } m_cp.begin_checkpoint(); assert(m_cp.m_checkpoint_num_files == count); for (uint32_t i = 0; i < count; ++i) { assert(cfs[i].for_checkpoint == true); + cfl.remove_cf(&cfs[i]); } ctbl.list.destroy(); m_cp.destroy(); @@ -195,10 +198,8 @@ // struct cachefile cf; cf.cachetable = &ctbl; - memset(&cf, 0, sizeof(cf)); - cf.next = NULL; - cf.for_checkpoint = true; - m_cp.m_cf_list->m_active_head = &cf; + init_cachefile(&cf, 0, true); + m_cp.m_cf_list->add_cf_unlocked(&cf); create_dummy_functions(&cf); CACHEKEY k; @@ -258,6 +259,7 @@ ctbl.list.destroy(); m_cp.destroy(); + cfl.remove_cf(&cf); cfl.destroy(); } @@ -337,14 +339,11 @@ cfl.init(); struct cachefile cf; - memset(&cf, 0, sizeof(cf)); - cf.next = NULL; - cf.for_checkpoint = true; - create_dummy_functions(&cf); + init_cachefile(&cf, 0, true); ZERO_STRUCT(m_cp); m_cp.init(&ctbl.list, NULL, &ctbl.ev, &cfl); - m_cp.m_cf_list->m_active_head = &cf; + m_cp.m_cf_list->add_cf_unlocked(&cf); // 2. Add data before running checkpoint. const uint32_t count = 6; @@ -394,6 +393,7 @@ assert(pp); m_cp.m_list->evict_completely(pp); } + cfl.remove_cf(&cf); m_cp.destroy(); ctbl.list.destroy(); cfl.destroy(); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pending.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pending.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pending.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pending.cc 2014-10-08 13:19:51.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,7 +93,7 @@ #include #include #include "cachetable-test.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include static int N; // how many items in the table @@ -187,7 +187,7 @@ if (verbose) { printf("%s:%d n=%d\n", __FUNCTION__, __LINE__, N); fflush(stdout); } const int test_limit = N; int r; - toku_cachetable_create(&ct, test_limit*sizeof(int), ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit*sizeof(int), ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; r = unlink(fname1); if (r!=0) CKERR2(get_error_errno(), ENOENT); r = toku_cachetable_openf(&cf, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pinned-nodes.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pinned-nodes.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pinned-nodes.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pinned-nodes.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -152,7 +152,7 @@ const int test_limit = 20; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-prefetched-nodes.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-prefetched-nodes.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-prefetched-nodes.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-prefetched-nodes.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -154,7 +154,7 @@ const int test_limit = 20; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,7 +93,7 @@ #include -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static const int item_size = 1; @@ -145,7 +145,7 @@ const int test_limit = n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint2.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -147,7 +147,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -147,7 +147,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -143,7 +143,7 @@ int r; CACHETABLE ct; toku_mutex_init(&attr_mutex, NULL); - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -99,7 +99,7 @@ const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_set_cleaner_period(ct, 1); const char *fname1 = TOKU_TEST_FILENAME; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-everything-pinned.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-everything-pinned.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-everything-pinned.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-everything-pinned.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -111,7 +111,7 @@ const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_set_cleaner_period(ct, 1); const char *fname1 = TOKU_TEST_FILENAME; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -111,7 +111,7 @@ const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_set_cleaner_period(ct, 1); const char *fname1 = TOKU_TEST_FILENAME; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-same-fullhash.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-same-fullhash.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-same-fullhash.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-same-fullhash.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,7 +119,7 @@ const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); my_cleaner_callback_called = false; const char *fname1 = TOKU_TEST_FILENAME; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-simple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-simple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-simple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-simple.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,7 +119,7 @@ const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_set_cleaner_period(ct, 1); my_cleaner_callback_called = false; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clock-all-pinned.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clock-all-pinned.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clock-all-pinned.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clock-all-pinned.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,7 @@ int test_limit = 6; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction2.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -186,7 +186,7 @@ const int test_limit = 16; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction3.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -202,7 +202,7 @@ const int test_limit = 20; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); evictor_test_helpers::set_hysteresis_limits(&ct->ev, test_limit, 100*test_limit); evictor_test_helpers::disable_ev_thread(&ct->ev); const char *fname1 = TOKU_TEST_FILENAME; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction4.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction4.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -181,7 +181,7 @@ num_entries = 0; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -143,7 +143,7 @@ num_entries = 0; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clone-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clone-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clone-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clone-checkpoint.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -145,7 +145,7 @@ const int test_limit = 200; int r; ct = NULL; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -144,7 +144,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -139,7 +139,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clone-pin-nonblocking.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clone-pin-nonblocking.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clone-pin-nonblocking.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clone-pin-nonblocking.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -126,7 +126,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clone-unpin-remove.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clone-unpin-remove.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-clone-unpin-remove.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-clone-unpin-remove.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -137,7 +137,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-count-pinned-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-count-pinned-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-count-pinned-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-count-pinned-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,7 +97,7 @@ const int test_limit = 2*n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-debug-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-debug-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-debug-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-debug-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,7 @@ const int test_limit = n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test2.cc 2014-10-08 13:19:51.000000000 +0000 @@ -31,7 +31,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -168,7 +168,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -31,7 +31,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -155,7 +155,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test2.cc 2014-10-08 13:19:51.000000000 +0000 @@ -31,7 +31,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -130,7 +130,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); evictor_test_helpers::disable_ev_thread(&ct->ev); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -31,7 +31,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -123,7 +123,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); evictor_test_helpers::disable_ev_thread(&ct->ev); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-evictor-class.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-evictor-class.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-evictor-class.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-evictor-class.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -#include "cachetable-internal.h" +#include "cachetable/cachetable-internal.h" class evictor_unit_test { public: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-fd-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-fd-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-fd-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-fd-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,7 +98,7 @@ const int test_limit = 1; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_os_recursive_delete(TOKU_TEST_FILENAME); r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); assert_zero(r); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-fetch-inducing-evictor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-fetch-inducing-evictor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-fetch-inducing-evictor.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-fetch-inducing-evictor.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -113,7 +113,7 @@ const int test_limit = 7; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); evictor_test_helpers::set_hysteresis_limits(&ct->ev, test_limit, test_limit); evictor_test_helpers::disable_ev_thread(&ct->ev); const char *fname1 = TOKU_TEST_FILENAME; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-flush-during-cleaner.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-flush-during-cleaner.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-flush-during-cleaner.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-flush-during-cleaner.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,7 +117,7 @@ const int test_limit = 400; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_set_cleaner_period(ct, 1); const char *fname1 = TOKU_TEST_FILENAME; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-flush-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-flush-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-flush-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-flush-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,7 +97,7 @@ const int test_limit = 2*n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_os_recursive_delete(TOKU_TEST_FILENAME); r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); assert_zero(r); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-getandpin-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-getandpin-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-getandpin-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-getandpin-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -134,7 +134,7 @@ const int test_limit = 1024*1024; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -114,7 +114,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-partial-fetch.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-partial-fetch.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-partial-fetch.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-partial-fetch.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -166,7 +166,7 @@ int r; CACHETABLE ct; bool doing_prefetch = false; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; @@ -215,7 +215,7 @@ // close and reopen cachefile so we can do some simple prefetch tests toku_cachefile_close(&f1, false, ZERO_LSN); toku_cachetable_close(&ct); - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); // // verify that a prefetch of the node will succeed diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-pin-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-pin-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-pin-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-pin-checkpoint.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -413,7 +413,7 @@ int r; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -100,7 +100,7 @@ const int test_limit = 20; int r; ct = NULL; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); f1 = NULL; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch2-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch2-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch2-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch2-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -32,7 +32,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -122,7 +122,7 @@ const int test_limit = 1; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-checkpoint-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-checkpoint-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-checkpoint-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-checkpoint-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -32,7 +32,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #include #include "cachetable-test.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" const int item_size = 1; @@ -153,7 +153,7 @@ CACHETABLE ct; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-leak-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-leak-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-leak-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-leak-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -31,7 +31,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -139,7 +139,7 @@ const int test_limit = 1; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -31,7 +31,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -141,7 +141,7 @@ expect_pf = false; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-flowcontrol-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-flowcontrol-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-flowcontrol-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-flowcontrol-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -32,7 +32,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,7 +93,7 @@ #include "test.h" -#include "cachetable-internal.h" +#include "cachetable/cachetable-internal.h" static int flush_calls = 0; static int flush_evict_calls = 0; @@ -152,7 +152,7 @@ static void cachetable_prefetch_flowcontrol_test (int cachetable_size_limit) { int r; CACHETABLE ct; - toku_cachetable_create(&ct, cachetable_size_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, cachetable_size_limit, ZERO_LSN, nullptr); evictor_test_helpers::set_hysteresis_limits(&ct->ev, cachetable_size_limit, cachetable_size_limit); evictor_test_helpers::disable_ev_thread(&ct->ev); const char *fname1 = TOKU_TEST_FILENAME; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-getandpin-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-getandpin-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-getandpin-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-getandpin-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -31,7 +31,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -162,7 +162,7 @@ const int test_limit = 2; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-maybegetandpin-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-maybegetandpin-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-maybegetandpin-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-maybegetandpin-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -31,7 +31,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,7 +119,7 @@ const int test_limit = 1; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-put-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-put-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-put-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-put-checkpoint.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -545,7 +545,7 @@ int r; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-put-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-put-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-put-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-put-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,7 @@ const int test_limit = 2*n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-rwlock-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-rwlock-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-rwlock-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-rwlock-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone2.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -137,7 +137,7 @@ const int test_limit = 200; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -150,7 +150,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-close.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-close.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-close.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-close.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -145,7 +145,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; @@ -214,7 +214,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; @@ -245,7 +245,7 @@ const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); char fname1[strlen(TOKU_TEST_FILENAME) + sizeof("_1")]; strcpy(fname1, TOKU_TEST_FILENAME); @@ -333,7 +333,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); char fname1[strlen(TOKU_TEST_FILENAME) + sizeof("_1")]; strcpy(fname1, TOKU_TEST_FILENAME); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-maybe-get-pin.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-maybe-get-pin.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-maybe-get-pin.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-maybe-get-pin.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -100,7 +100,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -139,7 +139,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-cheap.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-cheap.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-cheap.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-cheap.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -125,7 +125,7 @@ struct unlockers unlockers = {true, unlock_dummy, NULL, NULL}; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-dep-nodes.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-dep-nodes.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-dep-nodes.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-dep-nodes.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -158,7 +158,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -147,7 +147,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -164,7 +164,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-put-dep-nodes.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-put-dep-nodes.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-put-dep-nodes.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-put-dep-nodes.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -173,7 +173,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -158,7 +158,7 @@ int r; void *ret; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin-nonblocking.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin-nonblocking.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin-nonblocking.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin-nonblocking.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -152,7 +152,7 @@ int r; void *ret; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -114,7 +114,7 @@ const int test_limit = 120; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-verify.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-verify.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-simple-verify.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-simple-verify.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -118,7 +118,7 @@ static void test_cachetable_create(void) { CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); toku_cachetable_close(&ct); } @@ -172,7 +172,7 @@ void *vv,*vv2; const char *fname = TOKU_TEST_FILENAME; if (verbose) printf("creating cachetable\n"); - toku_cachetable_create(&t, 1, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&t, 1, ZERO_LSN, nullptr); toku_os_recursive_delete(fname); r = toku_cachetable_openf(&f, t, fname, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r==0); @@ -257,7 +257,7 @@ unlink(fname1); unlink(fname2); - toku_cachetable_create(&t, 4, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&t, 4, ZERO_LSN, nullptr); r = toku_cachetable_openf(&f1, t, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r==0); r = link(fname1, fname2); assert(r==0); r = toku_cachetable_openf(&f2, t, fname2, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r==0); @@ -325,7 +325,7 @@ int dirty; long long pinned; long entry_size; int r; - toku_cachetable_create(&t, 4, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&t, 4, ZERO_LSN, nullptr); const char *fname = TOKU_TEST_FILENAME; toku_os_recursive_delete(fname); @@ -455,7 +455,7 @@ int n = 3; long size = 1; - toku_cachetable_create(&t, n*size, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&t, n*size, ZERO_LSN, nullptr); const char *fname = TOKU_TEST_FILENAME; unlink(fname); @@ -509,7 +509,7 @@ const int n = 8; long long size = 1*1024*1024; - toku_cachetable_create(&t, n*size, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&t, n*size, ZERO_LSN, nullptr); const char *fname = TOKU_TEST_FILENAME; unlink(fname); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-test.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-test.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,11 @@ under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#include "cachetable-internal.h" +#include "cachetable/cachetable-internal.h" // // Dummy callbacks for checkpointing diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-unpin-and-remove-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-unpin-and-remove-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-unpin-and-remove-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-unpin-and-remove-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,7 +116,7 @@ int i; CACHETABLE ct; - toku_cachetable_create(&ct, table_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, table_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; @@ -172,7 +172,7 @@ int i; CACHETABLE ct; - toku_cachetable_create(&ct, table_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, table_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-unpin-remove-and-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-unpin-remove-and-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-unpin-remove-and-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-unpin-remove-and-checkpoint.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -114,7 +114,7 @@ const int test_limit = 12; int r; ct = NULL; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-unpin-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-unpin-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-unpin-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-unpin-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,7 +97,7 @@ const int test_limit = 2*n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; @@ -145,7 +145,7 @@ int r; CACHETABLE ct; int test_limit = 4; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-writer-thread-limit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-writer-thread-limit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/cachetable-writer-thread-limit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/cachetable-writer-thread-limit.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -125,7 +125,7 @@ test_limit = 6; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/CMakeLists.txt mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/CMakeLists.txt 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/CMakeLists.txt 2014-10-08 13:19:51.000000000 +0000 @@ -101,6 +101,17 @@ set_property(TEST ft/upgrade_test_simple APPEND PROPERTY ENVIRONMENT "TOKUDB_DATA=${TOKUDB_DATA}") + # should be a file GLOB and a loop + declare_custom_tests(test-upgrade-recovery-logs) + add_ft_test_aux(test-upgrade-recovery-logs-24-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-24-clean) + add_ft_test_aux(test-upgrade-recovery-logs-24-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-24-dirty) + add_ft_test_aux(test-upgrade-recovery-logs-25-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-25-clean) + add_ft_test_aux(test-upgrade-recovery-logs-25-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-25-dirty) + add_ft_test_aux(test-upgrade-recovery-logs-26-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-26-clean) + add_ft_test_aux(test-upgrade-recovery-logs-26-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-26-dirty) + add_ft_test_aux(test-upgrade-recovery-logs-27-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-27-clean) + add_ft_test_aux(test-upgrade-recovery-logs-27-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-27-dirty) + ## give some tests, that time out normally, 1 hour to complete set(long_tests ft/ftloader-test-extractor-3a diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/comparator-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/comparator-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/comparator-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/comparator-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -112,14 +112,31 @@ // create with d1, make sure it gets used cmp.create(magic_compare, &d1); expected_desc = &d1; - c = cmp.compare(&dbt_a, &dbt_b); + c = cmp(&dbt_a, &dbt_b); invariant(c == MAGIC); // set desc to d2, make sure it gets used - cmp.set_descriptor(&d2); + toku::comparator cmp2; + cmp2.create(magic_compare, &d2); + cmp.inherit(cmp2); expected_desc = &d2; - c = cmp.compare(&dbt_a, &dbt_b); + c = cmp(&dbt_a, &dbt_b); invariant(c == MAGIC); + cmp2.destroy(); + + // go back to using d1, but using the create_from API + toku::comparator cmp3, cmp4; + cmp3.create(magic_compare, &d1); // cmp3 has d1 + cmp4.create_from(cmp3); // cmp4 should get d1 from cmp3 + expected_desc = &d1; + c = cmp3(&dbt_a, &dbt_b); + invariant(c == MAGIC); + c = cmp4(&dbt_a, &dbt_b); + invariant(c == MAGIC); + cmp3.destroy(); + cmp4.destroy(); + + cmp.destroy(); } static int dont_compare_me_bro(DB *db, const DBT *a, const DBT *b) { @@ -137,20 +154,22 @@ // should never be called and thus the dbt never actually read. DBT arbitrary_dbt; - c = cmp.compare(&arbitrary_dbt, toku_dbt_positive_infinity()); + c = cmp(&arbitrary_dbt, toku_dbt_positive_infinity()); invariant(c < 0); - c = cmp.compare(toku_dbt_negative_infinity(), &arbitrary_dbt); + c = cmp(toku_dbt_negative_infinity(), &arbitrary_dbt); invariant(c < 0); - c = cmp.compare(toku_dbt_positive_infinity(), &arbitrary_dbt); + c = cmp(toku_dbt_positive_infinity(), &arbitrary_dbt); invariant(c > 0); - c = cmp.compare(&arbitrary_dbt, toku_dbt_negative_infinity()); + c = cmp(&arbitrary_dbt, toku_dbt_negative_infinity()); invariant(c > 0); - c = cmp.compare(toku_dbt_negative_infinity(), toku_dbt_negative_infinity()); + c = cmp(toku_dbt_negative_infinity(), toku_dbt_negative_infinity()); invariant(c == 0); - c = cmp.compare(toku_dbt_positive_infinity(), toku_dbt_positive_infinity()); + c = cmp(toku_dbt_positive_infinity(), toku_dbt_positive_infinity()); invariant(c == 0); + + cmp.destroy(); } int main(void) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/compress-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/compress-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/compress-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/compress-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ #ident "$Id$" #include "test.h" -#include "compress.h" +#include "serialize/compress.h" static void test_compress_buf_method (unsigned char *buf, int i, enum toku_compression_method m) { int bound = toku_compress_bound(m, i); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/dbufio-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/dbufio-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/dbufio-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/dbufio-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "dbufio.h" +#include "loader/dbufio.h" #include #include #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/dbufio-test-destroy.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/dbufio-test-destroy.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/dbufio-test-destroy.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/dbufio-test-destroy.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "dbufio.h" +#include "loader/dbufio.h" #include #include #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/dmt-test2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/dmt-test2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/dmt-test2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/dmt-test2.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/dmt-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/dmt-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/dmt-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/dmt-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/fifo-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/fifo-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/fifo-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/fifo-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,90 +94,95 @@ #include "test.h" static void -test_fifo_create (void) { - int r; - FIFO f; - - f = 0; - r = toku_fifo_create(&f); - assert(r == 0); assert(f != 0); +test_create (void) { + message_buffer msg_buffer; + msg_buffer.create(); + msg_buffer.destroy(); +} + +static char *buildkey(size_t len) { + char *XMALLOC_N(len, k); + memset(k, 0, len); + return k; +} - toku_fifo_free(&f); - assert(f == 0); +static char *buildval(size_t len) { + char *XMALLOC_N(len, v); + memset(v, ~len, len); + return v; } static void -test_fifo_enq (int n) { - int r; - FIFO f; +test_enqueue(int n) { MSN startmsn = ZERO_MSN; - f = 0; - r = toku_fifo_create(&f); - assert(r == 0); assert(f != 0); - - char *thekey = 0; int thekeylen; - char *theval = 0; int thevallen; - - // this was a function but icc cant handle it -#define buildkey(len) { \ - thekeylen = len+1; \ - XREALLOC_N(thekeylen, thekey); \ - memset(thekey, len, thekeylen); \ - } - -#define buildval(len) { \ - thevallen = len+2; \ - XREALLOC_N(thevallen, theval); \ - memset(theval, ~len, thevallen); \ - } + message_buffer msg_buffer; + msg_buffer.create(); for (int i=0; isize == thekeylen); assert(memcmp(msg.kdbt()->data, thekey, msg.kdbt()->size) == 0); + assert((int) msg.vdbt()->size == thevallen); assert(memcmp(msg.vdbt()->data, theval, msg.vdbt()->size) == 0); + assert(i % 256 == (int)type); + assert((TXNID)i == toku_xids_get_innermost_xid(msg.xids())); + i += 1; + toku_free(thekey); + toku_free(theval); + return 0; + } + } checkit(startmsn, verbose); + msg_buffer.iterate(checkit); + assert(checkit.i == n); - toku_fifo_free(&f); - assert(f == 0); + msg_buffer.destroy(); } int test_main(int argc, const char *argv[]) { default_parse_args(argc, argv); initialize_dummymsn(); - test_fifo_create(); - test_fifo_enq(4); - test_fifo_enq(512); + test_create(); + test_enqueue(4); + test_enqueue(512); return 0; } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-bfe-query.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-bfe-query.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-bfe-query.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-bfe-query.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -103,7 +103,6 @@ static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { int r; - ft_h->compare_fun = int64_key_cmp; FT_CURSOR XMALLOC(cursor); FTNODE dn = NULL; PAIR_ATTR attr; @@ -115,12 +114,12 @@ cursor->right_is_pos_infty = true; cursor->disable_prefetching = false; - struct ftnode_fetch_extra bfe; + ftnode_fetch_extra bfe; // quick test to see that we have the right behavior when we set // disable_prefetching to true cursor->disable_prefetching = true; - fill_bfe_for_prefetch(&bfe, ft_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); FTNODE_DISK_DATA ndd = NULL; r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); @@ -132,14 +131,14 @@ assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); // now enable prefetching again cursor->disable_prefetching = false; - fill_bfe_for_prefetch(&bfe, ft_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); @@ -154,14 +153,14 @@ assert(BP_STATE(dn,0) == PT_AVAIL); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_AVAIL); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); uint64_t left_key = 150; toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t)); cursor->left_is_neg_infty = false; - fill_bfe_for_prefetch(&bfe, ft_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); @@ -176,14 +175,14 @@ assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_AVAIL); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); uint64_t right_key = 151; toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t)); cursor->right_is_pos_infty = false; - fill_bfe_for_prefetch(&bfe, ft_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); @@ -198,13 +197,13 @@ assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_ON_DISK); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); left_key = 100000; right_key = 100000; - fill_bfe_for_prefetch(&bfe, ft_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); @@ -219,13 +218,13 @@ assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_AVAIL); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_free(ndd); toku_ftnode_free(&dn); left_key = 100; right_key = 100; - fill_bfe_for_prefetch(&bfe, ft_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); @@ -240,7 +239,7 @@ assert(BP_STATE(dn,0) == PT_AVAIL); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); @@ -250,7 +249,6 @@ static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { int r; - ft_h->compare_fun = int64_key_cmp; FT_CURSOR XMALLOC(cursor); FTNODE dn = NULL; FTNODE_DISK_DATA ndd = NULL; @@ -262,15 +260,14 @@ cursor->left_is_neg_infty = true; cursor->right_is_pos_infty = true; - struct ftnode_fetch_extra bfe; - uint64_t left_key = 150; uint64_t right_key = 151; DBT left, right; toku_fill_dbt(&left, &left_key, sizeof(left_key)); toku_fill_dbt(&right, &right_key, sizeof(right_key)); - fill_bfe_for_subset_read( - &bfe, + + ftnode_fetch_extra bfe; + bfe.create_for_subset_read( ft_h, NULL, &left, @@ -372,7 +369,7 @@ // source_ft.fd=fd; sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 1; @@ -384,10 +381,10 @@ uint64_t key2 = 200; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], &key1, sizeof(key1)); - toku_memdup_dbt(&sn.childkeys[1], &key2, sizeof(key2)); - sn.totalchildkeylens = sizeof(key1) + sizeof(key2); + DBT pivotkeys[2]; + toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1)); + toku_fill_dbt(&pivotkeys[1], &key2, sizeof(key2)); + sn.pivotkeys.create_from_dbts(pivotkeys, 2); BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; BP_BLOCKNUM(&sn, 2).b = 40; @@ -398,19 +395,19 @@ set_BNC(&sn, 1, toku_create_empty_nl()); set_BNC(&sn, 2, toku_create_empty_nl()); //Create XIDS - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_123, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_123, &xids_234, (TXNID)234); CKERR(r); // data in the buffers does not matter in this test //Cleanup: - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); FT_HANDLE XMALLOC(ft); FT XCALLOC(ft_h); @@ -422,24 +419,25 @@ 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); + ft_h->cmp.create(int64_key_cmp, nullptr); ft->ft = ft_h; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA ndd = NULL; @@ -449,16 +447,11 @@ test_prefetch_read(fd, ft, ft_h); test_subset_read(fd, ft, ft_h); - toku_free(sn.childkeys[0].data); - toku_free(sn.childkeys[1].data); - destroy_nonleaf_childinfo(BNC(&sn, 0)); - destroy_nonleaf_childinfo(BNC(&sn, 1)); - destroy_nonleaf_childinfo(BNC(&sn, 2)); - toku_free(sn.bp); - toku_free(sn.childkeys); + toku_destroy_ftnode_internals(&sn); - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + ft_h->cmp.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-clock-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-clock-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-clock-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-clock-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ #include "test.h" - +#include "ft/cursor.h" enum ftnode_verify_type { read_all=1, @@ -146,9 +146,8 @@ static void test1(int fd, FT ft_h, FTNODE *dn) { int r; - struct ftnode_fetch_extra bfe_all; - ft_h->compare_fun = string_key_cmp; - fill_bfe_for_full_read(&bfe_all, ft_h); + ftnode_fetch_extra bfe_all; + bfe_all.create_for_full_read(ft_h); FTNODE_DISK_DATA ndd = NULL; r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all); bool is_leaf = ((*dn)->height == 0); @@ -218,19 +217,17 @@ static void test2(int fd, FT ft_h, FTNODE *dn) { - struct ftnode_fetch_extra bfe_subset; DBT left, right; DB dummy_db; memset(&dummy_db, 0, sizeof(dummy_db)); memset(&left, 0, sizeof(left)); memset(&right, 0, sizeof(right)); - ft_search_t search_t; + ft_search search; - ft_h->compare_fun = string_key_cmp; - fill_bfe_for_subset_read( - &bfe_subset, + ftnode_fetch_extra bfe_subset; + bfe_subset.create_for_subset_read( ft_h, - ft_search_init(&search_t, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr), + ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr), &left, &right, true, @@ -238,6 +235,7 @@ false, false ); + FTNODE_DISK_DATA ndd = NULL; int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset); assert(r==0); @@ -272,18 +270,15 @@ static void test3_leaf(int fd, FT ft_h, FTNODE *dn) { - struct ftnode_fetch_extra bfe_min; DBT left, right; DB dummy_db; memset(&dummy_db, 0, sizeof(dummy_db)); memset(&left, 0, sizeof(left)); memset(&right, 0, sizeof(right)); - ft_h->compare_fun = string_key_cmp; - fill_bfe_for_min_read( - &bfe_min, - ft_h - ); + ftnode_fetch_extra bfe_min; + bfe_min.create_for_min_read(ft_h); + FTNODE_DISK_DATA ndd = NULL; int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min); assert(r==0); @@ -309,20 +304,17 @@ // source_ft.fd=fd; sn.max_msn_applied_to_node_on_disk.msn = 0; - char *hello_string; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 1; sn.n_children = 2; sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; - hello_string = toku_strdup("hello"); MALLOC_N(2, sn.bp); - MALLOC_N(1, sn.childkeys); - toku_fill_dbt(&sn.childkeys[0], hello_string, 6); - sn.totalchildkeylens = 6; + DBT pivotkey; + sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1); BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; BP_STATE(&sn,0) = PT_AVAIL; @@ -330,21 +322,26 @@ set_BNC(&sn, 0, toku_create_empty_nl()); set_BNC(&sn, 1, toku_create_empty_nl()); //Create XIDS - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_123, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_123, &xids_234, (TXNID)234); CKERR(r); - toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, NULL, string_key_cmp); - toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, NULL, string_key_cmp); - toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, NULL, string_key_cmp); + toku::comparator cmp; + cmp.create(string_key_cmp, nullptr); + + toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp); + toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp); + toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp); + //Cleanup: - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); + cmp.destroy(); FT_HANDLE XMALLOC(ft); FT XCALLOC(ft_h); @@ -356,25 +353,26 @@ 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); + ft_h->cmp.create(string_key_cmp, nullptr); ft->ft = ft_h; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA ndd = NULL; @@ -384,16 +382,13 @@ test1(fd, ft_h, &dn); test2(fd, ft_h, &dn); - toku_free(hello_string); - destroy_nonleaf_childinfo(BNC(&sn, 0)); - destroy_nonleaf_childinfo(BNC(&sn, 1)); - toku_free(sn.bp); - toku_free(sn.childkeys); + toku_destroy_ftnode_internals(&sn); toku_free(ndd); - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); toku_free(ft_h->h); + ft_h->cmp.destroy(); toku_free(ft_h); toku_free(ft); @@ -411,7 +406,7 @@ sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -419,9 +414,8 @@ sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "b", 2); - sn.totalchildkeylens = 2; + DBT pivotkey; + sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1); BP_STATE(&sn,0) = PT_AVAIL; BP_STATE(&sn,1) = PT_AVAIL; set_BLB(&sn, 0, toku_create_empty_bn()); @@ -442,23 +436,23 @@ 16); ft->ft = ft_h; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA ndd = NULL; @@ -468,17 +462,10 @@ test1(fd, ft_h, &dn); test3_leaf(fd, ft_h,&dn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); + toku_destroy_ftnode_internals(&sn); - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-error-injector.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-error-injector.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-error-injector.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-error-injector.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef FTLOADER_ERROR_INJECTOR_H -#define FTLOADER_ERROR_INJECTOR_H - #include static toku_mutex_t event_mutex = TOKU_MUTEX_INITIALIZER; @@ -224,5 +223,3 @@ } return realloc(p, n); } - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-bad-generate.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-bad-generate.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-bad-generate.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-bad-generate.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,8 +94,8 @@ #define DONT_DEPRECATE_MALLOC #define DONT_DEPRECATE_WRITES #include "test.h" -#include "ftloader.h" -#include "ftloader-internal.h" +#include "loader/loader.h" +#include "loader/loader-internal.h" #include "ftloader-error-injector.h" #include "memory.h" #include @@ -183,7 +183,7 @@ // feed rowsets to the extractor for (int i = 0; i < nrowsets; i++) { - r = queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); + r = toku_queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); assert(r == 0); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,7 +94,7 @@ #include #include #include -#include "ftloader-internal.h" +#include "loader/loader-internal.h" #include "memory.h" #include @@ -186,7 +186,7 @@ static void test_merge (void) { { int avals[]={1,2,3,4,5}; - int *bvals = NULL; //icc won't let us use a zero-sized array explicitly or by [] = {} construction. + int *bvals = NULL; test_merge_internal(avals, 5, bvals, 0, false); test_merge_internal(bvals, 0, avals, 5, false); } @@ -336,7 +336,7 @@ int r; CACHETABLE ct; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); TOKUTXN const null_txn = NULL; FT_HANDLE t = NULL; @@ -350,7 +350,7 @@ size_t userdata = 0; int i; for (i=0; i @@ -415,7 +415,7 @@ // feed rowsets to the extractor for (int i = 0; i < nrowsets; i++) { - r = queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); + r = toku_queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); assert(r == 0); } r = toku_ft_loader_finish_extractor(loader); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor-errors.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor-errors.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor-errors.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor-errors.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,8 +95,8 @@ #define DONT_DEPRECATE_MALLOC #define DONT_DEPRECATE_WRITES #include "test.h" -#include "ftloader.h" -#include "ftloader-internal.h" +#include "loader/loader.h" +#include "loader/loader-internal.h" #include "ftloader-error-injector.h" #include "memory.h" #include @@ -201,7 +201,7 @@ // feed rowsets to the extractor for (int i = 0; i < nrowsets; i++) { - r = queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); + r = toku_queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); assert(r == 0); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-merge-files-dbufio.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-merge-files-dbufio.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-merge-files-dbufio.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-merge-files-dbufio.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #define DONT_DEPRECATE_MALLOC #include "test.h" -#include "ftloader-internal.h" +#include "loader/loader-internal.h" #include static int event_count, event_count_trigger; @@ -346,7 +346,7 @@ struct consumer_thunk *cthunk = (struct consumer_thunk *)ctv; while (1) { void *item; - int r = queue_deq(cthunk->q, &item, NULL, NULL); + int r = toku_queue_deq(cthunk->q, &item, NULL, NULL); if (r==EOF) return NULL; assert(r==0); struct rowset *rowset = (struct rowset *)item; @@ -423,7 +423,7 @@ ft_loader_set_fractal_workers_count_from_c(bl); QUEUE q; - { int r = queue_create(&q, 1000); assert(r==0); } + { int r = toku_queue_create(&q, 1000); assert(r==0); } DBUFIO_FILESET bfs; const int MERGE_BUF_SIZE = 100000; // bigger than 64K so that we will trigger malloc issues. { int r = create_dbufio_fileset(&bfs, N_SOURCES, fds, MERGE_BUF_SIZE, false); assert(r==0); } @@ -474,7 +474,7 @@ panic_dbufio_fileset(bfs, r); } { - int r = queue_eof(q); + int r = toku_queue_eof(q); assert(r==0); } @@ -501,7 +501,7 @@ } } { - int r = queue_destroy(q); + int r = toku_queue_destroy(q); assert(r==0); } toku_ft_loader_internal_destroy(bl, false); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-open.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-open.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-open.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-open.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,8 +94,8 @@ #define DONT_DEPRECATE_MALLOC #include "test.h" -#include "ftloader.h" -#include "ftloader-internal.h" +#include "loader/loader.h" +#include "loader/loader-internal.h" #include "memory.h" #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-vm.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-vm.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-vm.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-vm.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include "test.h" -#include "cachetable.h" +#include "cachetable/cachetable.h" #include /* Test for #2755. The ft_loader is using too much VM. */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-writer.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-writer.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-writer.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-writer.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,7 +93,7 @@ #include "test.h" -#include "ftloader-internal.h" +#include "loader/loader-internal.h" #include #include @@ -129,7 +129,7 @@ int r; CACHETABLE ct; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); TOKUTXN const null_txn = NULL; FT_HANDLE t = NULL; @@ -215,20 +215,20 @@ ft_loader_fi_close_all(&bl.file_infos); QUEUE q; - r = queue_create(&q, 0xFFFFFFFF); // infinite queue. + r = toku_queue_create(&q, 0xFFFFFFFF); // infinite queue. assert(r==0); r = merge_files(&fs, &bl, 0, dest_db, compare_ints, 0, q); CKERR(r); assert(fs.n_temp_files==0); QUEUE q2; - r = queue_create(&q2, 0xFFFFFFFF); // infinite queue. + r = toku_queue_create(&q2, 0xFFFFFFFF); // infinite queue. assert(r==0); size_t num_found = 0; size_t found_size_est = 0; while (1) { void *v; - r = queue_deq(q, &v, NULL, NULL); + r = toku_queue_deq(q, &v, NULL, NULL); if (r==EOF) break; struct rowset *rs = (struct rowset *)v; if (verbose) printf("v=%p\n", v); @@ -243,16 +243,16 @@ num_found += rs->n_rows; - r = queue_enq(q2, v, 0, NULL); + r = toku_queue_enq(q2, v, 0, NULL); assert(r==0); } assert((int)num_found == n); assert(found_size_est == size_est); - r = queue_eof(q2); + r = toku_queue_eof(q2); assert(r==0); - r = queue_destroy(q); + r = toku_queue_destroy(q); assert(r==0); DESCRIPTOR_S desc; @@ -265,7 +265,7 @@ r = toku_loader_write_ft_from_q_in_C(&bl, &desc, fd, 1000, q2, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16); assert(r==0); - r = queue_destroy(q2); + r = toku_queue_destroy(q2); assert_zero(r); destroy_merge_fileset(&fs); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-writer-errors.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-writer-errors.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ftloader-test-writer-errors.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ftloader-test-writer-errors.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #define DONT_DEPRECATE_MALLOC #include "test.h" -#include "ftloader-internal.h" +#include "loader/loader-internal.h" #include "ftloader-error-injector.h" #include @@ -159,20 +159,20 @@ ft_loader_fi_close_all(&bl.file_infos); QUEUE q; - r = queue_create(&q, 0xFFFFFFFF); // infinite queue. + r = toku_queue_create(&q, 0xFFFFFFFF); // infinite queue. assert(r==0); r = merge_files(&fs, &bl, 0, dest_db, compare_ints, 0, q); CKERR(r); assert(fs.n_temp_files==0); QUEUE q2; - r = queue_create(&q2, 0xFFFFFFFF); // infinite queue. + r = toku_queue_create(&q2, 0xFFFFFFFF); // infinite queue. assert(r==0); size_t num_found = 0; size_t found_size_est = 0; while (1) { void *v; - r = queue_deq(q, &v, NULL, NULL); + r = toku_queue_deq(q, &v, NULL, NULL); if (r==EOF) break; struct rowset *rs = (struct rowset *)v; if (verbose) printf("v=%p\n", v); @@ -187,16 +187,16 @@ num_found += rs->n_rows; - r = queue_enq(q2, v, 0, NULL); + r = toku_queue_enq(q2, v, 0, NULL); assert(r==0); } assert((int)num_found == n); if (!expect_error) assert(found_size_est == size_est); - r = queue_eof(q2); + r = toku_queue_eof(q2); assert(r==0); - r = queue_destroy(q); + r = toku_queue_destroy(q); assert(r==0); DESCRIPTOR_S desc; @@ -225,7 +225,7 @@ ft_loader_destroy_poll_callback(&bl.poll_callback); ft_loader_lock_destroy(&bl); - r = queue_destroy(q2); + r = toku_queue_destroy(q2); assert(r==0); destroy_merge_fileset(&fs); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-serialize-benchmark.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-serialize-benchmark.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-serialize-benchmark.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-serialize-benchmark.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -144,7 +144,7 @@ sn->max_msn_applied_to_node_on_disk.msn = 0; sn->flags = 0x11223344; - sn->thisnodename.b = 20; + sn->blocknum.b = 20; sn->layout_version = FT_LAYOUT_VERSION; sn->layout_version_original = FT_LAYOUT_VERSION; sn->height = 0; @@ -152,8 +152,7 @@ sn->dirty = 1; sn->oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn->n_children, sn->bp); - MALLOC_N(sn->n_children-1, sn->childkeys); - sn->totalchildkeylens = 0; + sn->pivotkeys.create_empty(); for (int i = 0; i < sn->n_children; ++i) { BP_STATE(sn,i) = PT_AVAIL; set_BLB(sn, i, toku_create_empty_bn()); @@ -181,8 +180,8 @@ ); } if (ck < 7) { - toku_memdup_dbt(&sn->childkeys[ck], &k, sizeof k); - sn->totalchildkeylens += sizeof k; + DBT pivotkey; + sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck); } } @@ -196,26 +195,26 @@ 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); + ft_h->cmp.create(long_key_cmp, nullptr); ft->ft = ft_h; - ft_h->compare_fun = long_key_cmp; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } @@ -248,9 +247,9 @@ total_start.tv_sec = total_start.tv_usec = 0; total_end.tv_sec = total_end.tv_usec = 0; - struct ftnode_fetch_extra bfe; + ftnode_fetch_extra bfe; for (int i = 0; i < deser_runs; i++) { - fill_bfe_for_full_read(&bfe, ft_h); + bfe.create_for_full_read(ft_h); gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd2 = NULL; r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); @@ -278,8 +277,9 @@ toku_ftnode_free(&sn); - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + ft_h->cmp.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); @@ -299,7 +299,7 @@ // source_ft.fd=fd; sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 1; @@ -307,18 +307,19 @@ sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - sn.totalchildkeylens = 0; + sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { BP_BLOCKNUM(&sn, i).b = 30 + (i*5); BP_STATE(&sn,i) = PT_AVAIL; set_BNC(&sn, i, toku_create_empty_nl()); } //Create XIDS - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); + toku::comparator cmp; + cmp.create(long_key_cmp, nullptr); int nperchild = nelts / 8; for (int ck = 0; ck < sn.n_children; ++ck) { long k; @@ -334,17 +335,18 @@ } memset(&buf[c], 0, valsize - c); - toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, NULL, long_key_cmp); + toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp); } if (ck < 7) { - toku_memdup_dbt(&sn.childkeys[ck], &k, sizeof k); - sn.totalchildkeylens += sizeof k; + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck); } } //Cleanup: - xids_destroy(&xids_0); - xids_destroy(&xids_123); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + cmp.destroy(); FT_HANDLE XMALLOC(ft); FT XCALLOC(ft_h); @@ -356,26 +358,26 @@ 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); + ft_h->cmp.create(long_key_cmp, nullptr); ft->ft = ft_h; - ft_h->compare_fun = long_key_cmp; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } @@ -390,8 +392,8 @@ dt *= 1000; printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft_h); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_h); gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd2 = NULL; r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); @@ -408,19 +410,12 @@ ); toku_ftnode_free(&dn); + toku_destroy_ftnode_internals(&sn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; ++i) { - destroy_nonleaf_childinfo(BNC(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); toku_free(ft_h->h); + ft_h->cmp.destroy(); toku_free(ft_h); toku_free(ft); toku_free(ndd); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-serialize-sub-block-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-serialize-sub-block-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-serialize-sub-block-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-serialize-sub-block-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -112,7 +112,7 @@ unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); error = toku_open_ft_handle(fname, true, &ft, nodesize, basementnodesize, compression_method, ct, null_txn, toku_builtin_compare_fun); assert(error == 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-serialize-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-serialize-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-serialize-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-serialize-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -164,16 +164,15 @@ static void setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) { int r; - ft_h->compare_fun = string_key_cmp; if (bft == read_all) { - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft_h); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_h); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe); assert(r==0); } else if (bft == read_compressed || bft == read_none) { - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, ft_h); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft_h); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe); assert(r==0); // assert all bp's are compressed or on disk. @@ -200,7 +199,7 @@ // that it is available // then run partial eviction to get it compressed PAIR_ATTR attr; - fill_bfe_for_full_read(&bfe, ft_h); + bfe.create_for_full_read(ft_h); assert(toku_ftnode_pf_req_callback(*dn, &bfe)); r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr); assert(r==0); @@ -222,7 +221,7 @@ } } // now decompress them - fill_bfe_for_full_read(&bfe, ft_h); + bfe.create_for_full_read(ft_h); assert(toku_ftnode_pf_req_callback(*dn, &bfe)); PAIR_ATTR attr; r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr); @@ -271,7 +270,7 @@ sn.max_msn_applied_to_node_on_disk = PRESERIALIZE_MSN_ON_DISK; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -279,9 +278,8 @@ sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "b", 2); - sn.totalchildkeylens = 2; + DBT pivotkey; + sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1); BP_STATE(&sn,0) = PT_AVAIL; BP_STATE(&sn,1) = PT_AVAIL; set_BLB(&sn, 0, toku_create_empty_bn()); @@ -303,24 +301,24 @@ TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; @@ -330,7 +328,7 @@ setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); @@ -346,7 +344,6 @@ elts[1].init("b", "bval"); elts[2].init("x", "xval"); const uint32_t npartitions = dn->n_children; - assert(dn->totalchildkeylens==(2*(npartitions-1))); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { assert(BLB_MAX_MSN_APPLIED(dn, bn).msn == POSTSERIALIZE_MSN_ON_DISK.msn); @@ -363,7 +360,7 @@ assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le)); assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0); if (bn < npartitions-1) { - assert(strcmp((char*)dn->childkeys[bn].data, elts[last_i].keyp) <= 0); + assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, elts[last_i].keyp) <= 0); } // TODO for later, get a key comparison here as well last_i++; @@ -372,19 +369,12 @@ } assert(last_i == 3); } - toku_ftnode_free(&dn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); + toku_ftnode_free(&dn); + toku_destroy_ftnode_internals(&sn); - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); @@ -405,7 +395,7 @@ sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -414,8 +404,7 @@ sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - sn.totalchildkeylens = (sn.n_children-1)*sizeof(int); + sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { BP_STATE(&sn,i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); @@ -430,7 +419,8 @@ uint32_t keylen; void* curr_key; BLB_DATA(&sn, i)->fetch_key_and_len(0, &keylen, &curr_key); - toku_memdup_dbt(&sn.childkeys[i], curr_key, keylen); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), i); } } @@ -445,23 +435,23 @@ TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; @@ -471,7 +461,7 @@ setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); @@ -489,7 +479,6 @@ } } const uint32_t npartitions = dn->n_children; - assert(dn->totalchildkeylens==(keylens*(npartitions-1))); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { assert(dest_ndd[bn].start > 0); @@ -506,7 +495,7 @@ assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); if (bn < npartitions-1) { - assert(strcmp((char*)dn->childkeys[bn].data, les[last_i].keyp) <= 0); + assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, les[last_i].keyp) <= 0); } // TODO for later, get a key comparison here as well last_i++; @@ -517,17 +506,10 @@ } toku_ftnode_free(&dn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - toku_free(sn.childkeys); - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); + toku_destroy_ftnode_internals(&sn); - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); @@ -546,7 +528,7 @@ sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -555,8 +537,7 @@ sn.oldest_referenced_xid_known = TXNID_NONE; XMALLOC_N(sn.n_children, sn.bp); - XMALLOC_N(sn.n_children-1, sn.childkeys); - sn.totalchildkeylens = (sn.n_children-1)*sizeof(int); + sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { BP_STATE(&sn,i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); @@ -580,23 +561,23 @@ 16); ft->ft = ft_h; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } @@ -606,7 +587,7 @@ setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); @@ -621,7 +602,6 @@ } } const uint32_t npartitions = dn->n_children; - assert(dn->totalchildkeylens==(sizeof(int)*(npartitions-1))); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { assert(dest_ndd[bn].start > 0); @@ -638,7 +618,7 @@ assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); if (bn < npartitions-1) { - uint32_t *CAST_FROM_VOIDP(pivot, dn->childkeys[bn].data); + uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn).data); void* tmp = les[last_i].keyp; uint32_t *CAST_FROM_VOIDP(item, tmp); assert(*pivot >= *item); @@ -654,17 +634,10 @@ } toku_ftnode_free(&dn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); + toku_destroy_ftnode_internals(&sn); - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); @@ -687,7 +660,7 @@ sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -696,8 +669,7 @@ sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - sn.totalchildkeylens = (sn.n_children-1)*8; + sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { BP_STATE(&sn,i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); @@ -724,23 +696,23 @@ 16); ft->ft = ft_h; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } @@ -750,7 +722,7 @@ setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); @@ -771,7 +743,6 @@ } const uint32_t npartitions = dn->n_children; assert(npartitions == nrows); - assert(dn->totalchildkeylens==(key_size*(npartitions-1))); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { assert(dest_ndd[bn].start > 0); @@ -788,7 +759,7 @@ assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); if (bn < npartitions-1) { - assert(strcmp((char*)dn->childkeys[bn].data, (char*)(les[last_i].keyp)) <= 0); + assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(les[last_i].keyp)) <= 0); } // TODO for later, get a key comparison here as well last_i++; @@ -800,17 +771,10 @@ } toku_ftnode_free(&dn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); + toku_destroy_ftnode_internals(&sn); - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); @@ -831,7 +795,7 @@ sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -839,14 +803,14 @@ sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "A", 2); - toku_memdup_dbt(&sn.childkeys[1], "a", 2); - toku_memdup_dbt(&sn.childkeys[2], "a", 2); - toku_memdup_dbt(&sn.childkeys[3], "b", 2); - toku_memdup_dbt(&sn.childkeys[4], "b", 2); - toku_memdup_dbt(&sn.childkeys[5], "x", 2); - sn.totalchildkeylens = (sn.n_children-1)*2; + DBT pivotkeys[6]; + toku_fill_dbt(&pivotkeys[0], "A", 2); + toku_fill_dbt(&pivotkeys[1], "a", 2); + toku_fill_dbt(&pivotkeys[2], "a", 2); + toku_fill_dbt(&pivotkeys[3], "b", 2); + toku_fill_dbt(&pivotkeys[4], "b", 2); + toku_fill_dbt(&pivotkeys[5], "x", 2); + sn.pivotkeys.create_from_dbts(pivotkeys, 6); for (int i = 0; i < sn.n_children; ++i) { BP_STATE(&sn,i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); @@ -868,23 +832,23 @@ 16); ft->ft = ft_h; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; @@ -893,7 +857,7 @@ setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); @@ -909,7 +873,6 @@ elts[1].init("b", "bval"); elts[2].init("x", "xval"); const uint32_t npartitions = dn->n_children; - assert(dn->totalchildkeylens==(2*(npartitions-1))); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { assert(dest_ndd[bn].start > 0); @@ -925,7 +888,7 @@ assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le)); assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0); if (bn < npartitions-1) { - assert(strcmp((char*)dn->childkeys[bn].data, (char*)(elts[last_i].keyp)) <= 0); + assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(elts[last_i].keyp)) <= 0); } // TODO for later, get a key comparison here as well last_i++; @@ -934,19 +897,12 @@ } assert(last_i == 3); } - toku_ftnode_free(&dn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); + toku_ftnode_free(&dn); + toku_destroy_ftnode_internals(&sn); - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); @@ -966,7 +922,7 @@ sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -974,11 +930,11 @@ sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "A", 2); - toku_memdup_dbt(&sn.childkeys[1], "A", 2); - toku_memdup_dbt(&sn.childkeys[2], "A", 2); - sn.totalchildkeylens = (sn.n_children-1)*2; + DBT pivotkeys[3]; + toku_fill_dbt(&pivotkeys[0], "A", 2); + toku_fill_dbt(&pivotkeys[1], "A", 2); + toku_fill_dbt(&pivotkeys[2], "A", 2); + sn.pivotkeys.create_from_dbts(pivotkeys, 3); for (int i = 0; i < sn.n_children; ++i) { BP_STATE(&sn,i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); @@ -996,23 +952,23 @@ 16); ft->ft = ft_h; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } @@ -1022,7 +978,7 @@ setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); @@ -1031,7 +987,6 @@ assert(dn->n_children == 1); { const uint32_t npartitions = dn->n_children; - assert(dn->totalchildkeylens==(2*(npartitions-1))); for (uint32_t i = 0; i < npartitions; ++i) { assert(dest_ndd[i].start > 0); assert(dest_ndd[i].size > 0); @@ -1041,19 +996,12 @@ assert(BLB_DATA(dn, i)->num_klpairs() == 0); } } + toku_ftnode_free(&dn); + toku_destroy_ftnode_internals(&sn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); @@ -1076,7 +1024,7 @@ // source_ft.fd=fd; sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 1; @@ -1084,9 +1032,8 @@ sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(2, sn.bp); - MALLOC_N(1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "hello", 6); - sn.totalchildkeylens = 6; + DBT pivotkey; + sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1); BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; BP_STATE(&sn,0) = PT_AVAIL; @@ -1094,21 +1041,26 @@ set_BNC(&sn, 0, toku_create_empty_nl()); set_BNC(&sn, 1, toku_create_empty_nl()); //Create XIDS - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_123, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_123, &xids_234, (TXNID)234); CKERR(r); - toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, NULL, string_key_cmp); - toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, NULL, string_key_cmp); - toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, NULL, string_key_cmp); + toku::comparator cmp; + cmp.create(string_key_cmp, nullptr); + + toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp); + toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp); + toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp); + //Cleanup: - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); + cmp.destroy(); FT_HANDLE XMALLOC(ft); FT XCALLOC(ft_h); @@ -1120,25 +1072,26 @@ 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); + ft_h->cmp.create(string_key_cmp, nullptr); ft->ft = ft_h; - toku_blocktable_create_new(&ft_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(ft_h->blocktable, &b, ft_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(ft_h->blocktable, b, 100, &offset, ft_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(ft_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; @@ -1147,37 +1100,32 @@ setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION); assert(dn->height == 1); assert(dn->n_children==2); - assert(strcmp((char*)dn->childkeys[0].data, "hello")==0); - assert(dn->childkeys[0].size==6); - assert(dn->totalchildkeylens==6); + assert(strcmp((char*)dn->pivotkeys.get_pivot(0).data, "hello")==0); + assert(dn->pivotkeys.get_pivot(0).size==6); assert(BP_BLOCKNUM(dn,0).b==30); assert(BP_BLOCKNUM(dn,1).b==35); - FIFO src_fifo_1 = BNC(&sn, 0)->buffer; - FIFO src_fifo_2 = BNC(&sn, 1)->buffer; - FIFO dest_fifo_1 = BNC(dn, 0)->buffer; - FIFO dest_fifo_2 = BNC(dn, 1)->buffer; + message_buffer *src_msg_buffer1 = &BNC(&sn, 0)->msg_buffer; + message_buffer *src_msg_buffer2 = &BNC(&sn, 1)->msg_buffer; + message_buffer *dest_msg_buffer1 = &BNC(dn, 0)->msg_buffer; + message_buffer *dest_msg_buffer2 = &BNC(dn, 1)->msg_buffer; - assert(toku_are_fifos_same(src_fifo_1, dest_fifo_1)); - assert(toku_are_fifos_same(src_fifo_2, dest_fifo_2)); + assert(src_msg_buffer1->equals(dest_msg_buffer1)); + assert(src_msg_buffer2->equals(dest_msg_buffer2)); toku_ftnode_free(&dn); + toku_destroy_ftnode_internals(&sn); - toku_free(sn.childkeys[0].data); - destroy_nonleaf_childinfo(BNC(&sn, 0)); - destroy_nonleaf_childinfo(BNC(&sn, 1)); - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(ft_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&ft_h->blocktable); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + ft_h->cmp.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-test0.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-test0.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-test0.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-test0.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,6 @@ #include "test.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static void test0 (void) { FT_HANDLE t; @@ -101,7 +100,7 @@ const char *fname = TOKU_TEST_FILENAME; if (verbose) printf("%s:%d test0\n", __FILE__, __LINE__); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); if (verbose) printf("%s:%d test0\n", __FILE__, __LINE__); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-test1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-test1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-test1.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-test1.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,6 @@ #include "test.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static void test1 (void) { FT_HANDLE t; @@ -101,7 +100,7 @@ const char *fname = TOKU_TEST_FILENAME; DBT k,v; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-test2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-test2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-test2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-test2.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,6 @@ #include "test.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static void test2 (int limit) { FT_HANDLE t; @@ -102,7 +101,7 @@ const char *fname = TOKU_TEST_FILENAME; if (verbose) printf("%s:%d checking\n", __FILE__, __LINE__); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); if (verbose) printf("%s:%d did setup\n", __FILE__, __LINE__); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-test3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-test3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-test3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-test3.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,7 +97,6 @@ static const enum toku_compression_method compression_method = TOKU_DEFAULT_COMPRESSION_METHOD; static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static void test3 (int nodesize, int basementnodesize, int count) { FT_HANDLE t; @@ -106,7 +105,7 @@ int i; CACHETABLE ct; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); gettimeofday(&t0, 0); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, null_txn, toku_builtin_compare_fun); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-test4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-test4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ft-test4.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ft-test4.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,6 @@ static const char *fname = TOKU_TEST_FILENAME; static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static void test4 (int nodesize, int count) { FT_HANDLE t; @@ -106,7 +105,7 @@ gettimeofday(&t0, 0); unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &t, nodesize, nodesize / 8, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); for (i=0; i 0) + verbose--; + continue; + } + if (strcmp(argv[i], "--clean") == 0) { + do_shutdown = true; + continue; + } + if (strcmp(argv[i], "--dirty") == 0) { + do_shutdown = false; + continue; + } + } + char testdir[256]; + sprintf(testdir, "upgrade-recovery-logs-%d-%s", TOKU_LOG_VERSION, do_shutdown ? "clean" : "dirty"); + generate_recovery_log(testdir, do_shutdown); + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/is_empty.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/is_empty.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/is_empty.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/is_empty.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ #include "test.h" #include "toku_os.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #define FILENAME "test0.ft" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/keyrange.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/keyrange.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/keyrange.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/keyrange.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,6 @@ #include static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static const char *fname = TOKU_TEST_FILENAME; static CACHETABLE ct; @@ -111,7 +110,7 @@ static void open_ft_and_ct (bool unlink_old) { int r; if (unlink_old) unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &t, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/keytest.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/keytest.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/keytest.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/keytest.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,11 +88,12 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + #include "test.h" -#include "key.h" +#include "ft.h" -void -toku_test_keycompare (void) { +static void +test_keycompare (void) { assert(toku_keycompare("a",1, "a",1)==0); assert(toku_keycompare("aa",2, "a",1)>0); assert(toku_keycompare("a",1, "aa",2)<0); @@ -109,7 +110,7 @@ test_main (int argc , const char *argv[]) { default_parse_args(argc, argv); - toku_test_keycompare(); + test_keycompare(); if (verbose) printf("test ok\n"); return 0; } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/le-cursor-provdel.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/le-cursor-provdel.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/le-cursor-provdel.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/le-cursor-provdel.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,15 +91,13 @@ // test the LE_CURSOR next function with provisionally deleted rows -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "le-cursor.h" #include "test.h" -static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static int -get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) { +get_next_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) { DBT *CAST_FROM_VOIDP(key_dbt, extra); if (!lock_only) { toku_dbt_set(keylen, key, key_dbt, NULL); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/le-cursor-right.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/le-cursor-right.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/le-cursor-right.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/le-cursor-right.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,15 +94,14 @@ // - LE_CURSOR somewhere else -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "le-cursor.h" #include "test.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static int -get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) { +get_next_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) { DBT *CAST_FROM_VOIDP(key_dbt, extra); if (!lock_only) { toku_dbt_set(keylen, key, key_dbt, NULL); @@ -196,7 +195,7 @@ int error; CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); FT_HANDLE ft = NULL; error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare); @@ -230,7 +229,7 @@ int error; CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); FT_HANDLE ft = NULL; error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare); @@ -284,7 +283,7 @@ int error; CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); FT_HANDLE ft = NULL; error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/le-cursor-walk.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/le-cursor-walk.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/le-cursor-walk.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/le-cursor-walk.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,16 +91,15 @@ // test the LE_CURSOR next function -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "le-cursor.h" #include "test.h" #include static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static int -get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) { +get_next_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) { DBT *CAST_FROM_VOIDP(key_dbt, extra); if (!lock_only) { toku_dbt_set(keylen, key, key_dbt, NULL); @@ -192,7 +191,7 @@ int error; CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); FT_HANDLE ft = NULL; error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/list-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/list-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/list-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/list-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-bad-checksum.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-bad-checksum.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-bad-checksum.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-bad-checksum.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" // log a couple of timestamp entries and verify the log by walking diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-bw.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-bw.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-bw.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-bw.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-empty-logdir.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-empty-logdir.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-empty-logdir.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-empty-logdir.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" // a logcursor in an empty directory should not find any log entries diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-2.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" const int N = 2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-3.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" const int N = 2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" const int N = 2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-fw.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-fw.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-fw.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-fw.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-print.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-print.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-print.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-print.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include "test.h" -#include "logcursor.h" +#include "logger/logcursor.h" int test_main(int argc, const char *argv[]) { int r; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-timestamp.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-timestamp.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logcursor-timestamp.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logcursor-timestamp.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" static uint64_t now(void) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logfilemgr-create-destroy.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logfilemgr-create-destroy.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logfilemgr-create-destroy.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logfilemgr-create-destroy.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,8 +88,9 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "test.h" -#include "logfilemgr.h" + +#include "ft/tests/test.h" +#include "ft/logger/logfilemgr.h" int test_main(int argc __attribute__((unused)), const char *argv[] __attribute__((unused))) { int r; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logfilemgr-print.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logfilemgr-print.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/logfilemgr-print.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/logfilemgr-print.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,8 +88,8 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "test.h" -#include "logfilemgr.h" +#include "ft/tests/test.h" +#include "ft/logger/logfilemgr.h" int test_main(int argc __attribute__((unused)), const char *argv[] __attribute__((unused))) { int r; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test2.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test3.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test4.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test4.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test5.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test5.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test5.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test5.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test6.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test6.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test6.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test6.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test7.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test7.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test7.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test7.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test-maybe-trim.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test-maybe-trim.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/log-test-maybe-trim.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/log-test-maybe-trim.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ // verify that the log file trimmer does not delete the log file containing the // begin checkpoint when the checkpoint log entries span multiple log files. -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" int diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/make-tree.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/make-tree.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/make-tree.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/make-tree.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -125,8 +125,8 @@ // apply an insert to the leaf node txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u = {.id = { &thekey, &theval }} }; - toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, NULL, &gc_info, NULL, NULL); + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); + toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); leafnode->max_msn_applied_to_node_on_disk = msn; @@ -152,7 +152,7 @@ unsigned int key = htonl(val); DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key); DBT theval; toku_fill_dbt(&theval, &val, sizeof val); - toku_ft_append_to_child_buffer(ft->ft->compare_fun, NULL, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval); + toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, toku_xids_get_root_xids(), true, &thekey, &theval); node->max_msn_applied_to_node_on_disk = msn; } } @@ -209,7 +209,7 @@ // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); // create the ft TOKUTXN null_txn = NULL; @@ -222,7 +222,7 @@ FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey); // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); ft->ft->h->max_msn_in_ft = last_dummymsn(); // capture msn of last message injected into tree diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/mempool-115.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/mempool-115.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/mempool-115.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/mempool-115.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -123,6 +123,7 @@ idx, key, keysize, + keysize, // old_keylen size_needed, // old_le_size size_needed, &r, @@ -148,7 +149,7 @@ // just copy this code from a previous test // don't care what it does, just want to get a node up and running sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -156,9 +157,8 @@ sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "b", 2); - sn.totalchildkeylens = 2; + DBT pivotkey; + sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1); BP_STATE(&sn,0) = PT_AVAIL; BP_STATE(&sn,1) = PT_AVAIL; set_BLB(&sn, 0, toku_create_empty_bn()); @@ -167,8 +167,6 @@ le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5); le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5); - - // now this is the test. If I keep getting space for overwrite // like crazy, it should expose the bug bn_data* bnd = BLB_DATA(&sn, 0); @@ -186,15 +184,7 @@ // on. It may be that some algorithm has changed. assert(new_size < 5*old_size); - - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); + toku_destroy_ftnode_internals(&sn); } }; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/minicron-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/minicron-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/minicron-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/minicron-test.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,272 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include "test.h" -#include "minicron.h" -#include - -#include -#include - -static double -tdiff (struct timeval *a, struct timeval *b) { - return (a->tv_sec-b->tv_sec) + (a->tv_usec-b->tv_usec)*1e-6; -} - -struct timeval starttime; -static double elapsed (void) { - struct timeval now; - gettimeofday(&now, 0); - return tdiff(&now, &starttime); -} - -static int -#ifndef GCOV -__attribute__((__noreturn__)) -#endif -never_run (void *a) { - assert(a==0); - assert(0); -#if defined(GCOV) - return 0; -#endif -} - -// Can we start something with period=0 (the function should never run) and shut it down. -static void* -test1 (void* v) -{ - struct minicron m; - ZERO_STRUCT(m); - int r = toku_minicron_setup(&m, 0, never_run, 0); assert(r==0); - sleep(1); - r = toku_minicron_shutdown(&m); assert(r==0); - return v; -} - -// Can we start something with period=10 and shut it down after 2 seconds (the function should never run) . -static void* -test2 (void* v) -{ - struct minicron m; - ZERO_STRUCT(m); - int r = toku_minicron_setup(&m, 10000, never_run, 0); assert(r==0); - sleep(2); - r = toku_minicron_shutdown(&m); assert(r==0); - return v; -} - -struct tenx { - struct timeval tv; - int counter; -}; - -static int -run_5x (void *v) { - struct tenx *CAST_FROM_VOIDP(tx, v); - struct timeval now; - gettimeofday(&now, 0); - double diff = tdiff(&now, &tx->tv); - if (verbose) printf("T=%f tx->counter=%d\n", diff, tx->counter); - // We only verify that the timer was not premature. - // Sometimes it will be delayed, but there's no good way to test it and nothing we can do about it. - if (!(diff>0.5 + tx->counter)) { - printf("T=%f tx->counter=%d\n", diff, tx->counter); - assert(0); - } - tx->counter++; - return 0; -} - -// Start something with period=1 and run it a few times -static void* -test3 (void* v) -{ - struct minicron m; - struct tenx tx; - gettimeofday(&tx.tv, 0); - tx.counter=0; - ZERO_STRUCT(m); - int r = toku_minicron_setup(&m, 1000, run_5x, &tx); assert(r==0); - sleep(5); - r = toku_minicron_shutdown(&m); assert(r==0); - assert(tx.counter>=4 && tx.counter<=5); // after 5 seconds it could have run 4 or 5 times. - return v; -} - -static int -run_3sec (void *v) { - if (verbose) printf("start3sec at %.6f\n", elapsed()); - int *CAST_FROM_VOIDP(counter, v); - (*counter)++; - sleep(3); - if (verbose) printf("end3sec at %.6f\n", elapsed()); - return 0; -} - -// make sure that if f is really slow that it doesn't run too many times -static void* -test4 (void *v) { - struct minicron m; - int counter = 0; - ZERO_STRUCT(m); - int r = toku_minicron_setup(&m, 2000, run_3sec, &counter); assert(r==0); - sleep(10); - r = toku_minicron_shutdown(&m); assert(r==0); - assert(counter==3); - return v; -} - -static void* -test5 (void *v) { - struct minicron m; - int counter = 0; - ZERO_STRUCT(m); - int r = toku_minicron_setup(&m, 10000, run_3sec, &counter); assert(r==0); - toku_minicron_change_period(&m, 2000); - sleep(10); - r = toku_minicron_shutdown(&m); assert(r==0); - assert(counter==3); - return v; -} - -static void* -test6 (void *v) { - struct minicron m; - ZERO_STRUCT(m); - int r = toku_minicron_setup(&m, 5000, never_run, 0); assert(r==0); - toku_minicron_change_period(&m, 0); - sleep(7); - r = toku_minicron_shutdown(&m); assert(r==0); - return v; -} - -// test that we actually run once per period, even if the execution is long -static void* -test7 (void *v) { - struct minicron m; - int counter = 0; - ZERO_STRUCT(m); - int r = toku_minicron_setup(&m, 5000, run_3sec, &counter); assert(r==0); - sleep(17); - r = toku_minicron_shutdown(&m); assert(r==0); - assert(counter==3); - return v; -} - -typedef void*(*ptf)(void*); -int -test_main (int argc, const char *argv[]) { - default_parse_args(argc,argv); - gettimeofday(&starttime, 0); - - ptf testfuns[] = {test1, test2, test3, - test4, - test5, - test6, - test7 - }; -#define N (sizeof(testfuns)/sizeof(testfuns[0])) - toku_pthread_t tests[N]; - - unsigned int i; - for (i=0; ift->h->max_msn_in_ft = msn; - FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_leaf_apply_msg(ft->ft->compare_fun, ft->ft->update_fun, &ft->ft->cmp_descriptor, leafnode, -1, &msg, &gc_info, nullptr, nullptr); + toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg, &gc_info, nullptr, nullptr); { int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair); assert(r==0); assert(pair.call_count==1); } - FT_MSG_S badmsg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval }} }; - toku_ft_leaf_apply_msg(ft->ft->compare_fun, ft->ft->update_fun, &ft->ft->cmp_descriptor, leafnode, -1, &badmsg, &gc_info, nullptr, nullptr); + ft_msg badmsg(&thekey, &badval, FT_INSERT, msn, toku_xids_get_root_xids()); + toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, badmsg, &gc_info, nullptr, nullptr); // message should be rejected for duplicate msn, row should still have original val { @@ -154,8 +154,8 @@ // now verify that message with proper msn gets through msn = next_dummymsn(); ft->ft->h->max_msn_in_ft = msn; - FT_MSG_S msg2 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &val2 }} }; - toku_ft_leaf_apply_msg(ft->ft->compare_fun, ft->ft->update_fun, &ft->ft->cmp_descriptor, leafnode, -1, &msg2, &gc_info, nullptr, nullptr); + ft_msg msg2(&thekey, &val2, FT_INSERT, msn, toku_xids_get_root_xids()); + toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg2, &gc_info, nullptr, nullptr); // message should be accepted, val should have new value { @@ -166,8 +166,8 @@ // now verify that message with lesser (older) msn is rejected msn.msn = msn.msn - 10; - FT_MSG_S msg3 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval } }}; - toku_ft_leaf_apply_msg(ft->ft->compare_fun, ft->ft->update_fun, &ft->ft->cmp_descriptor, leafnode, -1, &msg3, &gc_info, nullptr, nullptr); + ft_msg msg3(&thekey, &badval, FT_INSERT, msn, toku_xids_get_root_xids()); + toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg3, &gc_info, nullptr, nullptr); // message should be rejected, val should still have value in pair2 { @@ -202,7 +202,7 @@ // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); // create the ft TOKUTXN null_txn = NULL; @@ -213,7 +213,7 @@ FTNODE newroot = make_node(ft, 0); // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // KLUDGE: Unpin the new root so toku_ft_lookup() can pin it. (Pin lock is no longer a recursive // mutex.) Just leaving it unpinned for this test program works because it is the only diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/orthopush-flush.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/orthopush-flush.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/orthopush-flush.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/orthopush-flush.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,25 +94,9 @@ #include "ule.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static const char *fname = TOKU_TEST_FILENAME; static txn_gc_info non_mvcc_gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - -static int dummy_cmp(DB *db __attribute__((unused)), - const DBT *a, const DBT *b) { - int c; - if (a->size > b->size) { - c = memcmp(a->data, b->data, b->size); - } else if (a->size < b->size) { - c = memcmp(a->data, b->data, a->size); - } else { - return memcmp(a->data, b->data, a->size); - } - if (c == 0) { - c = a->size - b->size; - } - return c; -} +static toku::comparator dummy_cmp; // generate size random bytes into dest static void @@ -148,7 +132,7 @@ // generate a random message with xids and a key starting with pfx, insert // it in bnc, and save it in output params save and is_fresh_out static void -insert_random_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool *is_fresh_out, XIDS xids, int pfx) +insert_random_message(NONLEAF_CHILDINFO bnc, ft_msg **save, bool *is_fresh_out, XIDS xids, int pfx) { int keylen = (random() % 128) + 16; int vallen = (random() % 128) + 16; @@ -160,23 +144,15 @@ MSN msn = next_dummymsn(); bool is_fresh = (random() & 0x100) == 0; - DBT *keydbt, *valdbt; - XMALLOC(keydbt); - XMALLOC(valdbt); - toku_fill_dbt(keydbt, key, keylen + (sizeof pfx)); - toku_fill_dbt(valdbt, val, vallen); - FT_MSG_S *XMALLOC(result); - result->type = FT_INSERT; - result->msn = msn; - result->xids = xids; - result->u.id.key = keydbt; - result->u.id.val = valdbt; - *save = result; + DBT keydbt, valdbt; + toku_fill_dbt(&keydbt, key, keylen + (sizeof pfx)); + toku_fill_dbt(&valdbt, val, vallen); + *save = new ft_msg(&keydbt, &valdbt, FT_INSERT, msn, xids); *is_fresh_out = is_fresh; toku_bnc_insert_msg(bnc, key, keylen + (sizeof pfx), val, vallen, FT_INSERT, msn, xids, is_fresh, - NULL, dummy_cmp); + dummy_cmp); } // generate a random message with xids and a key starting with pfx, insert @@ -209,17 +185,12 @@ valdbt = &valdbt_s; toku_fill_dbt(keydbt, key, (sizeof *pfxp) + keylen); toku_fill_dbt(valdbt, val, vallen); - FT_MSG_S msg; - msg.type = FT_INSERT; - msg.msn = msn; - msg.xids = xids; - msg.u.id.key = keydbt; - msg.u.id.val = valdbt; *keylenp = keydbt->size; *keyp = toku_xmemdup(keydbt->data, keydbt->size); + ft_msg msg(keydbt, valdbt, FT_INSERT, msn, xids); int64_t numbytes; - toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes); - toku_ft_bn_apply_msg(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, &non_mvcc_gc_info, NULL, NULL); + toku_le_apply_msg(msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes); + toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb, msg, &non_mvcc_gc_info, NULL, NULL); if (msn.msn > blb->max_msn_applied.msn) { blb->max_msn_applied = msn; } @@ -259,21 +230,16 @@ valdbt = &valdbt_s; toku_fill_dbt(keydbt, key, (sizeof *pfxp) + keylen); toku_fill_dbt(valdbt, val, vallen); - FT_MSG_S msg; - msg.type = FT_INSERT; - msg.msn = msn; - msg.xids = xids; - msg.u.id.key = keydbt; - msg.u.id.val = valdbt; *keylenp = keydbt->size; *keyp = toku_xmemdup(keydbt->data, keydbt->size); + ft_msg msg(keydbt, valdbt, FT_INSERT, msn, xids); int64_t numbytes; - toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes); - toku_ft_bn_apply_msg(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, &non_mvcc_gc_info, NULL, NULL); + toku_le_apply_msg(msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes); + toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb1, msg, &non_mvcc_gc_info, NULL, NULL); if (msn.msn > blb1->max_msn_applied.msn) { blb1->max_msn_applied = msn; } - toku_ft_bn_apply_msg(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, &non_mvcc_gc_info, NULL, NULL); + toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb2, msg, &non_mvcc_gc_info, NULL, NULL); if (msn.msn > blb2->max_msn_applied.msn) { blb2->max_msn_applied = msn; } @@ -300,7 +266,7 @@ // the update message will overwrite the value with something generated // here, and add one to the int pointed to by applied static void -insert_random_update_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool is_fresh, XIDS xids, int pfx, int *applied, MSN *max_msn) +insert_random_update_message(NONLEAF_CHILDINFO bnc, ft_msg **save, bool is_fresh, XIDS xids, int pfx, int *applied, MSN *max_msn) { int keylen = (random() % 16) + 16; int vallen = (random() % 16) + 16; @@ -313,48 +279,38 @@ update_extra->num_applications = applied; MSN msn = next_dummymsn(); - DBT *keydbt, *valdbt; - XMALLOC(keydbt); - XMALLOC(valdbt); - toku_fill_dbt(keydbt, key, keylen + (sizeof pfx)); - toku_fill_dbt(valdbt, update_extra, sizeof *update_extra); - FT_MSG_S *XMALLOC(result); - result->type = FT_UPDATE; - result->msn = msn; - result->xids = xids; - result->u.id.key = keydbt; - result->u.id.val = valdbt; - *save = result; + DBT keydbt, valdbt; + toku_fill_dbt(&keydbt, key, keylen + (sizeof pfx)); + toku_fill_dbt(&valdbt, update_extra, sizeof *update_extra); + *save = new ft_msg(&keydbt, &valdbt, FT_UPDATE, msn, xids); toku_bnc_insert_msg(bnc, key, keylen + (sizeof pfx), update_extra, sizeof *update_extra, FT_UPDATE, msn, xids, is_fresh, - NULL, dummy_cmp); + dummy_cmp); if (msn.msn > max_msn->msn) { *max_msn = msn; } } -const int M = 1024 * 1024; - // flush from one internal node to another, where both only have one // buffer static void flush_to_internal(FT_HANDLE t) { int r; - FT_MSG_S **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 - FT_MSG_S **MALLOC_N(4096,child_messages); + ft_msg **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 + ft_msg **MALLOC_N(4096,child_messages); bool *MALLOC_N(4096,parent_messages_is_fresh); bool *MALLOC_N(4096,child_messages_is_fresh); memset(parent_messages_is_fresh, 0, 4096*(sizeof parent_messages_is_fresh[0])); memset(child_messages_is_fresh, 0, 4096*(sizeof child_messages_is_fresh[0])); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123, xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_0, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234); CKERR(r); NONLEAF_CHILDINFO child_bnc = toku_create_empty_nl(); @@ -384,41 +340,60 @@ memset(parent_messages_present, 0, sizeof parent_messages_present); memset(child_messages_present, 0, sizeof child_messages_present); - FIFO_ITERATE(child_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - { - DBT keydbt; - DBT valdbt; - toku_fill_dbt(&keydbt, key, keylen); - toku_fill_dbt(&valdbt, val, vallen); - int found = 0; - for (i = 0; i < num_parent_messages; ++i) { - if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 && - msn.msn == parent_messages[i]->msn.msn) { - assert(parent_messages_present[i] == 0); - assert(found == 0); - assert(dummy_cmp(NULL, &valdbt, parent_messages[i]->u.id.val) == 0); - assert(type == parent_messages[i]->type); - assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids)); - assert(parent_messages_is_fresh[i] == is_fresh); - parent_messages_present[i]++; - found++; - } - } - for (i = 0; i < num_child_messages; ++i) { - if (dummy_cmp(NULL, &keydbt, child_messages[i]->u.id.key) == 0 && - msn.msn == child_messages[i]->msn.msn) { - assert(child_messages_present[i] == 0); - assert(found == 0); - assert(dummy_cmp(NULL, &valdbt, child_messages[i]->u.id.val) == 0); - assert(type == child_messages[i]->type); - assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids)); - assert(child_messages_is_fresh[i] == is_fresh); - child_messages_present[i]++; - found++; - } - } - assert(found == 1); - }); + struct checkit_fn { + int num_parent_messages; + ft_msg **parent_messages; + int *parent_messages_present; + bool *parent_messages_is_fresh; + int num_child_messages; + ft_msg **child_messages; + int *child_messages_present; + bool *child_messages_is_fresh; + checkit_fn(int np, ft_msg **pm, int *npp, bool *pmf, int nc, ft_msg **cm, int *ncp, bool *cmf) : + num_parent_messages(np), parent_messages(pm), parent_messages_present(npp), parent_messages_is_fresh(pmf), + num_child_messages(nc), child_messages(cm), child_messages_present(ncp), child_messages_is_fresh(cmf) { + } + int operator()(const ft_msg &msg, bool is_fresh) { + DBT keydbt; + DBT valdbt; + toku_fill_dbt(&keydbt, msg.kdbt()->data, msg.kdbt()->size); + toku_fill_dbt(&valdbt, msg.vdbt()->data, msg.vdbt()->size); + int found = 0; + MSN msn = msg.msn(); + enum ft_msg_type type = msg.type(); + XIDS xids = msg.xids(); + for (int k = 0; k < num_parent_messages; ++k) { + if (dummy_cmp(&keydbt, parent_messages[k]->kdbt()) == 0 && + msn.msn == parent_messages[k]->msn().msn) { + assert(parent_messages_present[k] == 0); + assert(found == 0); + assert(dummy_cmp(&valdbt, parent_messages[k]->vdbt()) == 0); + assert(type == parent_messages[k]->type()); + assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(parent_messages[k]->xids())); + assert(parent_messages_is_fresh[k] == is_fresh); + parent_messages_present[k]++; + found++; + } + } + for (int k = 0; k < num_child_messages; ++k) { + if (dummy_cmp(&keydbt, child_messages[k]->kdbt()) == 0 && + msn.msn == child_messages[k]->msn().msn) { + assert(child_messages_present[k] == 0); + assert(found == 0); + assert(dummy_cmp(&valdbt, child_messages[k]->vdbt()) == 0); + assert(type == child_messages[k]->type()); + assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(child_messages[k]->xids())); + assert(child_messages_is_fresh[k] == is_fresh); + child_messages_present[k]++; + found++; + } + } + assert(found == 1); + return 0; + } + } checkit(num_parent_messages, parent_messages, parent_messages_present, parent_messages_is_fresh, + num_child_messages, child_messages, child_messages_present, child_messages_is_fresh); + child_bnc->msg_buffer.iterate(checkit); for (i = 0; i < num_parent_messages; ++i) { assert(parent_messages_present[i] == 1); @@ -427,23 +402,19 @@ assert(child_messages_present[i] == 1); } - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); for (i = 0; i < num_parent_messages; ++i) { - toku_free(parent_messages[i]->u.id.key->data); - toku_free((DBT *) parent_messages[i]->u.id.key); - toku_free(parent_messages[i]->u.id.val->data); - toku_free((DBT *) parent_messages[i]->u.id.val); - toku_free(parent_messages[i]); + toku_free(parent_messages[i]->kdbt()->data); + toku_free(parent_messages[i]->vdbt()->data); + delete parent_messages[i]; } for (i = 0; i < num_child_messages; ++i) { - toku_free(child_messages[i]->u.id.key->data); - toku_free((DBT *) child_messages[i]->u.id.key); - toku_free(child_messages[i]->u.id.val->data); - toku_free((DBT *) child_messages[i]->u.id.val); - toku_free(child_messages[i]); + toku_free(child_messages[i]->kdbt()->data); + toku_free(child_messages[i]->vdbt()->data); + delete child_messages[i]; } destroy_nonleaf_childinfo(parent_bnc); toku_ftnode_free(&child); @@ -458,22 +429,22 @@ flush_to_internal_multiple(FT_HANDLE t) { int r; - FT_MSG_S **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 - FT_MSG_S **MALLOC_N(4096,child_messages); + ft_msg **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 + ft_msg **MALLOC_N(4096,child_messages); bool *MALLOC_N(4096,parent_messages_is_fresh); bool *MALLOC_N(4096,child_messages_is_fresh); memset(parent_messages_is_fresh, 0, 4096*(sizeof parent_messages_is_fresh[0])); memset(child_messages_is_fresh, 0, 4096*(sizeof child_messages_is_fresh[0])); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123, xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_0, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234); CKERR(r); NONLEAF_CHILDINFO child_bncs[8]; - FT_MSG childkeys[7]; + ft_msg *childkeys[7]; int i; for (i = 0; i < 8; ++i) { child_bncs[i] = toku_create_empty_nl(); @@ -487,7 +458,7 @@ insert_random_message(child_bncs[i%8], &child_messages[i], &child_messages_is_fresh[i], xids_123, i%8); total_size += toku_bnc_memory_used(child_bncs[i%8]); if (i % 8 < 7) { - if (childkeys[i%8] == NULL || dummy_cmp(NULL, child_messages[i]->u.id.key, childkeys[i%8]->u.id.key) > 0) { + if (childkeys[i%8] == NULL || dummy_cmp(child_messages[i]->kdbt(), childkeys[i%8]->kdbt()) > 0) { childkeys[i%8] = child_messages[i]; } } @@ -508,7 +479,7 @@ set_BNC(child, i, child_bncs[i]); BP_STATE(child, i) = PT_AVAIL; if (i < 7) { - toku_clone_dbt(&child->childkeys[i], *childkeys[i]->u.id.key); + child->pivotkeys.insert_at(childkeys[i]->kdbt(), i); } } @@ -525,41 +496,60 @@ memset(child_messages_present, 0, sizeof child_messages_present); for (int j = 0; j < 8; ++j) { - FIFO_ITERATE(child_bncs[j]->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - { - DBT keydbt; - DBT valdbt; - toku_fill_dbt(&keydbt, key, keylen); - toku_fill_dbt(&valdbt, val, vallen); - int found = 0; - for (i = 0; i < num_parent_messages; ++i) { - if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 && - msn.msn == parent_messages[i]->msn.msn) { - assert(parent_messages_present[i] == 0); - assert(found == 0); - assert(dummy_cmp(NULL, &valdbt, parent_messages[i]->u.id.val) == 0); - assert(type == parent_messages[i]->type); - assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids)); - assert(parent_messages_is_fresh[i] == is_fresh); - parent_messages_present[i]++; - found++; - } - } - for (i = 0; i < num_child_messages; ++i) { - if (dummy_cmp(NULL, &keydbt, child_messages[i]->u.id.key) == 0 && - msn.msn == child_messages[i]->msn.msn) { - assert(child_messages_present[i] == 0); - assert(found == 0); - assert(dummy_cmp(NULL, &valdbt, child_messages[i]->u.id.val) == 0); - assert(type == child_messages[i]->type); - assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids)); - assert(child_messages_is_fresh[i] == is_fresh); - child_messages_present[i]++; - found++; - } - } - assert(found == 1); - }); + struct checkit_fn { + int num_parent_messages; + ft_msg **parent_messages; + int *parent_messages_present; + bool *parent_messages_is_fresh; + int num_child_messages; + ft_msg **child_messages; + int *child_messages_present; + bool *child_messages_is_fresh; + checkit_fn(int np, ft_msg **pm, int *npp, bool *pmf, int nc, ft_msg **cm, int *ncp, bool *cmf) : + num_parent_messages(np), parent_messages(pm), parent_messages_present(npp), parent_messages_is_fresh(pmf), + num_child_messages(nc), child_messages(cm), child_messages_present(ncp), child_messages_is_fresh(cmf) { + } + int operator()(const ft_msg &msg, bool is_fresh) { + DBT keydbt; + DBT valdbt; + toku_fill_dbt(&keydbt, msg.kdbt()->data, msg.kdbt()->size); + toku_fill_dbt(&valdbt, msg.vdbt()->data, msg.vdbt()->size); + int found = 0; + MSN msn = msg.msn(); + enum ft_msg_type type = msg.type(); + XIDS xids = msg.xids(); + for (int _i = 0; _i < num_parent_messages; ++_i) { + if (dummy_cmp(&keydbt, parent_messages[_i]->kdbt()) == 0 && + msn.msn == parent_messages[_i]->msn().msn) { + assert(parent_messages_present[_i] == 0); + assert(found == 0); + assert(dummy_cmp(&valdbt, parent_messages[_i]->vdbt()) == 0); + assert(type == parent_messages[_i]->type()); + assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(parent_messages[_i]->xids())); + assert(parent_messages_is_fresh[_i] == is_fresh); + parent_messages_present[_i]++; + found++; + } + } + for (int _i = 0; _i < num_child_messages; ++_i) { + if (dummy_cmp(&keydbt, child_messages[_i]->kdbt()) == 0 && + msn.msn == child_messages[_i]->msn().msn) { + assert(child_messages_present[_i] == 0); + assert(found == 0); + assert(dummy_cmp(&valdbt, child_messages[_i]->vdbt()) == 0); + assert(type == child_messages[_i]->type()); + assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(child_messages[_i]->xids())); + assert(child_messages_is_fresh[_i] == is_fresh); + child_messages_present[_i]++; + found++; + } + } + assert(found == 1); + return 0; + } + } checkit(num_parent_messages, parent_messages, parent_messages_present, parent_messages_is_fresh, + num_child_messages, child_messages, child_messages_present, child_messages_is_fresh); + child_bncs[j]->msg_buffer.iterate(checkit); } for (i = 0; i < num_parent_messages; ++i) { @@ -569,23 +559,19 @@ assert(child_messages_present[i] == 1); } - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); for (i = 0; i < num_parent_messages; ++i) { - toku_free(parent_messages[i]->u.id.key->data); - toku_free((DBT *) parent_messages[i]->u.id.key); - toku_free(parent_messages[i]->u.id.val->data); - toku_free((DBT *) parent_messages[i]->u.id.val); - toku_free(parent_messages[i]); + toku_free(parent_messages[i]->kdbt()->data); + toku_free(parent_messages[i]->vdbt()->data); + delete parent_messages[i]; } for (i = 0; i < num_child_messages; ++i) { - toku_free(child_messages[i]->u.id.key->data); - toku_free((DBT *) child_messages[i]->u.id.key); - toku_free(child_messages[i]->u.id.val->data); - toku_free((DBT *) child_messages[i]->u.id.val); - toku_free(child_messages[i]); + toku_free(child_messages[i]->kdbt()->data); + toku_free(child_messages[i]->vdbt()->data); + delete child_messages[i]; } destroy_nonleaf_childinfo(parent_bnc); toku_ftnode_free(&child); @@ -607,7 +593,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { int r; - FT_MSG_S **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 + ft_msg **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 LEAFENTRY* child_messages = NULL; XMALLOC_N(4096,child_messages); void** key_pointers = NULL; @@ -619,11 +605,11 @@ int *MALLOC_N(4096,parent_messages_applied); memset(parent_messages_applied, 0, 4096*(sizeof parent_messages_applied[0])); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123, xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_0, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234); CKERR(r); BASEMENTNODE child_blbs[8]; @@ -653,7 +639,7 @@ total_size += child_blbs[i%8]->data_buffer.get_memory_size(); if (i % 8 < 7) { DBT keydbt; - if (childkeys[i%8].size == 0 || dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) { + if (childkeys[i%8].size == 0 || dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) { toku_fill_dbt(&childkeys[i%8], key_pointers[i], keylens[i]); } } @@ -663,7 +649,7 @@ for (i = 0; i < num_child_messages; ++i) { DBT keydbt; if (i % 8 < 7) { - assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0); + assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0); } } @@ -679,13 +665,13 @@ int num_parent_messages = i; for (i = 0; i < 7; ++i) { - toku_clone_dbt(&child->childkeys[i], childkeys[i]); + child->pivotkeys.insert_at(&childkeys[i], i); } if (make_leaf_up_to_date) { for (i = 0; i < num_parent_messages; ++i) { if (!parent_messages_is_fresh[i]) { - toku_ft_leaf_apply_msg(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL); + toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL); } } for (i = 0; i < 8; ++i) { @@ -717,15 +703,16 @@ BP_STATE(parentnode, 0) = PT_AVAIL; parentnode->max_msn_applied_to_node_on_disk = max_parent_msn; struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL }; - const struct pivot_bounds infinite_bounds = { .lower_bound_exclusive = NULL, .upper_bound_inclusive = NULL }; bool msgs_applied; - toku_apply_ancestors_messages_to_node(t, child, &ancestors, &infinite_bounds, &msgs_applied, -1); + toku_apply_ancestors_messages_to_node(t, child, &ancestors, pivot_bounds::infinite_bounds(), &msgs_applied, -1); - FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - { - key = key; keylen = keylen; val = val; vallen = vallen; type = type; msn = msn; xids = xids; - assert(!is_fresh); - }); + struct checkit_fn { + int operator()(const ft_msg &UU(msg), bool is_fresh) { + assert(!is_fresh); + return 0; + } + } checkit; + parent_bnc->msg_buffer.iterate(checkit); invariant(parent_bnc->fresh_message_tree.size() + parent_bnc->stale_message_tree.size() == (uint32_t) num_parent_messages); @@ -763,10 +750,10 @@ } int found = 0; for (i = num_parent_messages - 1; i >= 0; --i) { - if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0) { + if (dummy_cmp(&keydbt, parent_messages[i]->kdbt()) == 0) { if (found == 0) { - struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(e, parent_messages[i]->u.id.val->data); - assert(dummy_cmp(NULL, &valdbt, &e->new_val) == 0); + struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(e, parent_messages[i]->vdbt()->data); + assert(dummy_cmp(&valdbt, &e->new_val) == 0); found++; } assert(parent_messages_present[i] == 0); @@ -782,9 +769,9 @@ toku_fill_dbt(&childkeydbt, key_pointers[i], keylens[i]); toku_fill_dbt(&childvaldbt, valp, vallen); } - if (dummy_cmp(NULL, &keydbt, &childkeydbt) == 0) { + if (dummy_cmp(&keydbt, &childkeydbt) == 0) { if (found == 0) { - assert(dummy_cmp(NULL, &valdbt, &childvaldbt) == 0); + assert(dummy_cmp(&valdbt, &childvaldbt) == 0); found++; } assert(child_messages_present[i] == 0); @@ -801,18 +788,16 @@ assert(child_messages_present[i] == 1); } - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); for (i = 0; i < num_parent_messages; ++i) { - toku_free(parent_messages[i]->u.id.key->data); - toku_free((DBT *) parent_messages[i]->u.id.key); - struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->u.id.val->data); + toku_free(parent_messages[i]->kdbt()->data); + struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->vdbt()->data); toku_free(extra->new_val.data); - toku_free(parent_messages[i]->u.id.val->data); - toku_free((DBT *) parent_messages[i]->u.id.val); - toku_free(parent_messages[i]); + toku_free(parent_messages[i]->vdbt()->data); + delete parent_messages[i]; } for (i = 0; i < num_child_messages; ++i) { toku_free(child_messages[i]); @@ -837,7 +822,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) { int r; - FT_MSG_S **MALLOC_N(4096,parent_messages); // 128k / 32 = 4k + ft_msg **MALLOC_N(4096,parent_messages); // 128k / 32 = 4k LEAFENTRY* child_messages = NULL; XMALLOC_N(4096,child_messages); void** key_pointers = NULL; @@ -849,11 +834,11 @@ int *MALLOC_N(4096,parent_messages_applied); memset(parent_messages_applied, 0, 4096*(sizeof parent_messages_applied[0])); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123, xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_0, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234); CKERR(r); BASEMENTNODE child_blbs[8]; @@ -879,7 +864,7 @@ insert_random_message_to_bn(t, child_blbs[i%8], &key_pointers[i], &keylens[i], &child_messages[i], xids_123, i%8); total_size += child_blbs[i%8]->data_buffer.get_memory_size(); DBT keydbt; - if (childkeys[i%8].size == 0 || dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) { + if (childkeys[i%8].size == 0 || dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) { toku_fill_dbt(&childkeys[i%8], key_pointers[i], keylens[i]); } } @@ -887,7 +872,7 @@ for (i = 0; i < num_child_messages; ++i) { DBT keydbt; - assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0); + assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0); } { @@ -902,14 +887,14 @@ int num_parent_messages = i; for (i = 0; i < 7; ++i) { - toku_clone_dbt(&child->childkeys[i], childkeys[i]); + child->pivotkeys.insert_at(&childkeys[i], i); } if (make_leaf_up_to_date) { for (i = 0; i < num_parent_messages; ++i) { - if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 && + if (dummy_cmp(parent_messages[i]->kdbt(), &childkeys[7]) <= 0 && !parent_messages_is_fresh[i]) { - toku_ft_leaf_apply_msg(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL); + toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL); } } for (i = 0; i < 8; ++i) { @@ -923,7 +908,7 @@ for (i = 0; i < num_parent_messages; ++i) { if (make_leaf_up_to_date && - dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 && + dummy_cmp(parent_messages[i]->kdbt(), &childkeys[7]) <= 0 && !parent_messages_is_fresh[i]) { assert(parent_messages_applied[i] == 1); } else { @@ -940,30 +925,39 @@ parentnode->max_msn_applied_to_node_on_disk = max_parent_msn; struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL }; DBT lbe, ubi; - const struct pivot_bounds bounds = { - .lower_bound_exclusive = toku_init_dbt(&lbe), - .upper_bound_inclusive = toku_clone_dbt(&ubi, childkeys[7]) - }; + toku_init_dbt(&lbe); + toku_clone_dbt(&ubi, childkeys[7]); + const pivot_bounds bounds(lbe, ubi); bool msgs_applied; - toku_apply_ancestors_messages_to_node(t, child, &ancestors, &bounds, &msgs_applied, -1); + toku_apply_ancestors_messages_to_node(t, child, &ancestors, bounds, &msgs_applied, -1); - FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - { - val = val; vallen = vallen; type = type; msn = msn; xids = xids; - DBT keydbt; - toku_fill_dbt(&keydbt, key, keylen); - if (dummy_cmp(NULL, &keydbt, &childkeys[7]) > 0) { - for (i = 0; i < num_parent_messages; ++i) { - if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 && - msn.msn == parent_messages[i]->msn.msn) { - assert(is_fresh == parent_messages_is_fresh[i]); - break; - } - } - } else { - assert(!is_fresh); - } - }); + struct checkit_fn { + DBT *childkeys; + int num_parent_messages; + ft_msg **parent_messages; + bool *parent_messages_is_fresh; + checkit_fn(DBT *ck, int np, ft_msg **pm, bool *pmf) : + childkeys(ck), num_parent_messages(np), parent_messages(pm), parent_messages_is_fresh(pmf) { + } + int operator()(const ft_msg &msg, bool is_fresh) { + DBT keydbt; + toku_fill_dbt(&keydbt, msg.kdbt()->data, msg.kdbt()->size); + MSN msn = msg.msn(); + if (dummy_cmp(&keydbt, &childkeys[7]) > 0) { + for (int _i = 0; _i < num_parent_messages; ++_i) { + if (dummy_cmp(&keydbt, parent_messages[_i]->kdbt()) == 0 && + msn.msn == parent_messages[_i]->msn().msn) { + assert(is_fresh == parent_messages_is_fresh[_i]); + break; + } + } + } else { + assert(!is_fresh); + } + return 0; + } + } checkit(childkeys, num_parent_messages, parent_messages, parent_messages_is_fresh); + parent_bnc->msg_buffer.iterate(checkit); toku_ftnode_free(&parentnode); @@ -974,25 +968,23 @@ assert(total_messages <= num_parent_messages + num_child_messages); for (i = 0; i < num_parent_messages; ++i) { - if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0) { + if (dummy_cmp(parent_messages[i]->kdbt(), &childkeys[7]) <= 0) { assert(parent_messages_applied[i] == 1); } else { assert(parent_messages_applied[i] == 0); } } - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); for (i = 0; i < num_parent_messages; ++i) { - toku_free(parent_messages[i]->u.id.key->data); - toku_free((DBT *) parent_messages[i]->u.id.key); - struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->u.id.val->data); + toku_free(parent_messages[i]->kdbt()->data); + struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->vdbt()->data); toku_free(extra->new_val.data); - toku_free(parent_messages[i]->u.id.val->data); - toku_free((DBT *) parent_messages[i]->u.id.val); - toku_free(parent_messages[i]); + toku_free(parent_messages[i]->vdbt()->data); + delete parent_messages[i]; } for (i = 0; i < num_child_messages; ++i) { toku_free(child_messages[i]); @@ -1019,7 +1011,7 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) { int r; - FT_MSG_S **MALLOC_N(4096,parent_messages); // 128k / 32 = 4k + ft_msg **MALLOC_N(4096,parent_messages); // 128k / 32 = 4k LEAFENTRY* child_messages = NULL; XMALLOC_N(4096,child_messages); void** key_pointers = NULL; @@ -1031,11 +1023,11 @@ int *MALLOC_N(4096,parent_messages_applied); memset(parent_messages_applied, 0, 4096*(sizeof parent_messages_applied[0])); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123, xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_0, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234); CKERR(r); BASEMENTNODE child1_blbs[8], child2_blbs[8]; @@ -1070,7 +1062,7 @@ total_size += child1_blbs[i%8]->data_buffer.get_memory_size(); if (i % 8 < 7) { DBT keydbt; - if (child1keys[i%8].size == 0 || dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) > 0) { + if (child1keys[i%8].size == 0 || dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) > 0) { toku_fill_dbt(&child1keys[i%8], key_pointers[i], keylens[i]); toku_fill_dbt(&child2keys[i%8], key_pointers[i], keylens[i]); } @@ -1081,8 +1073,8 @@ for (i = 0; i < num_child_messages; ++i) { DBT keydbt; if (i % 8 < 7) { - assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) <= 0); - assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child2keys[i%8]) <= 0); + assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) <= 0); + assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child2keys[i%8]) <= 0); } } @@ -1098,15 +1090,15 @@ int num_parent_messages = i; for (i = 0; i < 7; ++i) { - toku_clone_dbt(&child1->childkeys[i], child1keys[i]); - toku_clone_dbt(&child2->childkeys[i], child2keys[i]); + child1->pivotkeys.insert_at(&child1keys[i], i); + child2->pivotkeys.insert_at(&child2keys[i], i); } if (make_leaf_up_to_date) { for (i = 0; i < num_parent_messages; ++i) { if (!parent_messages_is_fresh[i]) { - toku_ft_leaf_apply_msg(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL); - toku_ft_leaf_apply_msg(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL); + toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child1, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL); + toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child2, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL); } } for (i = 0; i < 8; ++i) { @@ -1130,15 +1122,16 @@ BP_STATE(parentnode, 0) = PT_AVAIL; parentnode->max_msn_applied_to_node_on_disk = max_parent_msn; struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL }; - const struct pivot_bounds infinite_bounds = { .lower_bound_exclusive = NULL, .upper_bound_inclusive = NULL }; bool msgs_applied; - toku_apply_ancestors_messages_to_node(t, child2, &ancestors, &infinite_bounds, &msgs_applied, -1); + toku_apply_ancestors_messages_to_node(t, child2, &ancestors, pivot_bounds::infinite_bounds(), &msgs_applied, -1); - FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - { - key = key; keylen = keylen; val = val; vallen = vallen; type = type; msn = msn; xids = xids; - assert(!is_fresh); - }); + struct checkit_fn { + int operator()(const ft_msg &UU(msg), bool is_fresh) { + assert(!is_fresh); + return 0; + } + } checkit; + parent_bnc->msg_buffer.iterate(checkit); invariant(parent_bnc->fresh_message_tree.size() + parent_bnc->stale_message_tree.size() == (uint32_t) num_parent_messages); @@ -1170,23 +1163,21 @@ toku_fill_dbt(&key2dbt, keyp, keylen); toku_fill_dbt(&val2dbt, valp, vallen); } - assert(dummy_cmp(NULL, &key1dbt, &key2dbt) == 0); - assert(dummy_cmp(NULL, &val1dbt, &val2dbt) == 0); + assert(dummy_cmp(&key1dbt, &key2dbt) == 0); + assert(dummy_cmp(&val1dbt, &val2dbt) == 0); } } - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); for (i = 0; i < num_parent_messages; ++i) { - toku_free(parent_messages[i]->u.id.key->data); - toku_free((DBT *) parent_messages[i]->u.id.key); - struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->u.id.val->data); + toku_free(parent_messages[i]->kdbt()->data); + struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->vdbt()->data); toku_free(extra->new_val.data); - toku_free(parent_messages[i]->u.id.val->data); - toku_free((DBT *) parent_messages[i]->u.id.val); - toku_free(parent_messages[i]); + toku_free(parent_messages[i]->vdbt()->data); + delete parent_messages[i]; } for (i = 0; i < num_child_messages; ++i) { toku_free(key_pointers[i]); @@ -1219,14 +1210,32 @@ } } +static int cmp_fn(DB *db __attribute__((unused)), + const DBT *a, const DBT *b) { + int c; + if (a->size > b->size) { + c = memcmp(a->data, b->data, b->size); + } else if (a->size < b->size) { + c = memcmp(a->data, b->data, a->size); + } else { + return memcmp(a->data, b->data, a->size); + } + if (c == 0) { + c = a->size - b->size; + } + return c; +} + int test_main (int argc, const char *argv[]) { parse_args(argc, argv); + dummy_cmp.create(cmp_fn, nullptr); + initialize_dummymsn(); int r; CACHETABLE ct; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); unlink(fname); FT_HANDLE t; r = toku_open_ft_handle(fname, 1, &t, 128*1024, 4096, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -1256,5 +1265,7 @@ r = toku_close_ft_handle_nolsn(t, 0); assert(r==0); toku_cachetable_close(&ct); + dummy_cmp.destroy(); + return 0; } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/pqueue-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/pqueue-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/pqueue-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/pqueue-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,8 +91,8 @@ #include "test.h" -#include "ftloader-internal.h" -#include "pqueue.h" +#include "loader/loader-internal.h" +#include "loader/pqueue.h" int found_dup = -1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/queue-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/queue-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/queue-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/queue-test.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,185 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#include -#include "toku_os.h" -#include -#include -#include -#include -#include -#include "queue.h" - -static int verbose=1; - -static int count_0 = 0; -static uint64_t e_max_weight=0, d_max_weight = 0; // max weight seen by enqueue thread and dequeue thread respectively. - -static void *start_0 (void *arg) { - QUEUE q = (QUEUE)arg; - void *item; - uint64_t weight; - long count = 0; - while (1) { - uint64_t this_max_weight; - int r=queue_deq(q, &item, &weight, &this_max_weight); - if (r==EOF) break; - assert(r==0); - if (this_max_weight>d_max_weight) d_max_weight=this_max_weight; - long v = (long)item; - //printf("D(%ld)=%ld %ld\n", v, this_max_weight, d_max_weight); - assert(v==count); - count_0++; - count++; - } - return NULL; -} - -static void enq (QUEUE q, long v, uint64_t weight) { - uint64_t this_max_weight; - int r = queue_enq(q, (void*)v, (weight==0)?0:1, &this_max_weight); - assert(r==0); - if (this_max_weight>e_max_weight) e_max_weight=this_max_weight; - //printf("E(%ld)=%ld %ld\n", v, this_max_weight, e_max_weight); -} - -static void queue_test_0 (uint64_t weight) -// Test a queue that can hold WEIGHT items. -{ - //printf("\n"); - count_0 = 0; - e_max_weight = 0; - d_max_weight = 0; - QUEUE q; - int r; - r = queue_create(&q, weight); assert(r==0); - toku_pthread_t thread; - r = toku_pthread_create(&thread, NULL, start_0, q); assert(r==0); - enq(q, 0L, weight); - enq(q, 1L, weight); - enq(q, 2L, weight); - enq(q, 3L, weight); - sleep(1); - enq(q, 4L, weight); - enq(q, 5L, weight); - r = queue_eof(q); assert(r==0); - void *result; - r = toku_pthread_join(thread, &result); assert(r==0); - assert(result==NULL); - assert(count_0==6); - r = queue_destroy(q); - assert(d_max_weight <= weight); - assert(e_max_weight <= weight); -} - - -static void parse_args (int argc, const char *argv[]) { - const char *progname=argv[0]; - argc--; argv++; - while (argc>0) { - if (strcmp(argv[0],"-v")==0) { - verbose++; - } else if (strcmp(argv[0],"-q")==0) { - verbose--; - } else { - fprintf(stderr, "Usage:\n %s [-v] [-q]\n", progname); - exit(1); - } - argc--; argv++; - } - if (verbose<0) verbose=0; -} - -int main (int argc, const char *argv[]) { - parse_args(argc, argv); - queue_test_0(0LL); - queue_test_0(1LL); - queue_test_0(2LL); - return 0; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/quicklz-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/quicklz-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/quicklz-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/quicklz-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ #ident "$Id$" #include "test.h" -#include "quicklz.h" +#include "serialize/quicklz.h" static void test_qlz_random_i (int i) { if (verbose) printf("i=%d\n", i); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-bad-last-entry.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-bad-last-entry.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-bad-last-entry.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-bad-last-entry.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -146,7 +146,7 @@ else break; // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-cbegin.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-cbegin.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-cbegin.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-cbegin.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,7 +119,7 @@ r = close(devnul); assert(r==0); - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -113,7 +113,7 @@ r = toku_logger_close(&logger); assert(r == 0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend-hello.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend-hello.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend-hello.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend-hello.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -128,7 +128,7 @@ r = close(devnul); assert(r==0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-cend-cbegin.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-cend-cbegin.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-cend-cbegin.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-cend-cbegin.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -121,7 +121,7 @@ } // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-datadir-is-file.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-datadir-is-file.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-datadir-is-file.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-datadir-is-file.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -130,7 +130,7 @@ strncat(buf, testfile, TOKU_PATH_MAX); r = system(buf); CKERR(r); } - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-empty.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-empty.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-empty.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-empty.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,7 +116,7 @@ } // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-fopen-missing-file.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-fopen-missing-file.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-fopen-missing-file.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-fopen-missing-file.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -109,7 +109,7 @@ toku_log_begin_checkpoint(logger, &beginlsn, true, 0, 0); toku_log_end_checkpoint(logger, NULL, true, beginlsn, 0, 0, 0); - BYTESTRING iname = { (uint32_t) strlen("missing_tokudb_file"), (char *) "missing_tokudb_file" }; + BYTESTRING iname = { (uint32_t) strlen("missing_tokuft_file"), (char *) "missing_tokuft_file" }; FILENUM filenum = {42}; uint32_t treeflags = 0; toku_log_fopen(logger, NULL, true, iname, filenum, treeflags); @@ -122,7 +122,7 @@ r = close(devnul); assert(r==0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-hello.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-hello.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-hello.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-hello.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -122,7 +122,7 @@ r = close(devnul); assert(r==0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-lsn-error-during-forward-scan.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-lsn-error-during-forward-scan.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-lsn-error-during-forward-scan.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-lsn-error-during-forward-scan.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -152,7 +152,7 @@ toku_recover_set_callback(recover_callback_at_turnaround, NULL); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-no-datadir.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-no-datadir.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-no-datadir.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-no-datadir.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,7 +116,7 @@ r = close(devnul); assert(r==0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, "/junk", TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-no-log.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-no-log.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-no-log.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-no-log.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -108,7 +108,7 @@ r = close(devnul); assert(r==0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-no-logdir.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-no-logdir.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-no-logdir.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-no-logdir.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -102,7 +102,7 @@ r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); assert(r == 0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, NULL, NULL, 0, 0, 0, NULL, 0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-test5123.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-test5123.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/recovery-test5123.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/recovery-test5123.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ #include "test.h" #include "toku_os.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "test-ft-txns.h" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/shortcut.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/shortcut.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/shortcut.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/shortcut.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -108,7 +108,7 @@ unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/subblock-test-checksum.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/subblock-test-checksum.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/subblock-test-checksum.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/subblock-test-checksum.cc 2014-10-08 13:19:51.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,8 +91,8 @@ #include "test.h" -#include "compress.h" -#include "sub_block.h" +#include "serialize/compress.h" +#include "serialize/sub_block.h" #include #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/subblock-test-compression.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/subblock-test-compression.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/subblock-test-compression.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/subblock-test-compression.cc 2014-10-08 13:19:51.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #include #include -#include "sub_block.h" +#include "serialize/sub_block.h" static void test_sub_block_compression(void *buf, int total_size, int my_max_sub_blocks, int n_cores, enum toku_compression_method method) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/subblock-test-index.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/subblock-test-index.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/subblock-test-index.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/subblock-test-index.cc 2014-10-08 13:19:51.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #include #include -#include "sub_block.h" +#include "serialize/sub_block.h" static void test_sub_block_index(void) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/subblock-test-size.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/subblock-test-size.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/subblock-test-size.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/subblock-test-size.cc 2014-10-08 13:19:51.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #include #include -#include "sub_block.h" +#include "serialize/sub_block.h" static void test_sub_block_size(int total_size) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test1308a.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test1308a.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test1308a.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test1308a.cc 2014-10-08 13:19:51.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test3681.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test3681.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test3681.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test3681.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,19 +95,18 @@ // * Thread 1 calls apply_msg_to_in_memory_leaves, calls get_and_pin_if_in_memory, tries to get a read lock on the root node and blocks on the rwlock because there is a write request on the lock. -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "test.h" CACHETABLE ct; FT_HANDLE t; -static DB * const null_db = 0; static TOKUTXN const null_txn = 0; volatile bool done = false; static void setup (void) { - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); const char *fname = TOKU_TEST_FILENAME; unlink(fname); { int r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test3856.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test3856.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test3856.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test3856.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ // it used to be the case that we copied the left and right keys of a // range to be prelocked but never freed them, this test checks that they -// are freed (as of this time, this happens in destroy_bfe_for_prefetch) +// are freed (as of this time, this happens in ftnode_fetch_extra::destroy()) #include "test.h" @@ -99,7 +99,6 @@ static const char *fname = TOKU_TEST_FILENAME; static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static int const nodesize = 1<<12, basementnodesize = 1<<9; static const enum toku_compression_method compression_method = TOKU_DEFAULT_COMPRESSION_METHOD; static int const count = 1000; @@ -111,7 +110,7 @@ } static int -found(ITEMLEN UU(keylen), bytevec key, ITEMLEN UU(vallen), bytevec UU(val), void *UU(extra), bool lock_only) +found(uint32_t UU(keylen), const void *key, uint32_t UU(vallen), const void *UU(val), void *UU(extra), bool lock_only) { assert(key != NULL && !lock_only); return 0; @@ -123,7 +122,7 @@ CACHETABLE ct; FT_HANDLE t; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); unlink(fname); int r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, null_txn, string_cmp); assert(r==0); @@ -137,7 +136,7 @@ r = toku_close_ft_handle_nolsn(t, 0); assert(r == 0); toku_cachetable_close(&ct); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, null_txn, string_cmp); assert(r == 0); for (int n = 0; n < count/100; ++n) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test3884.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test3884.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test3884.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test3884.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ // it used to be the case that we copied the left and right keys of a // range to be prelocked but never freed them, this test checks that they -// are freed (as of this time, this happens in destroy_bfe_for_prefetch) +// are freed (as of this time, this happens in ftnode_fetch_extra::destroy()) #include "test.h" @@ -111,7 +111,6 @@ #define dummy_msn_3884 ((MSN) { (uint64_t) 3884 * MIN_MSN.msn }) static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static const char *fname = TOKU_TEST_FILENAME; static void @@ -154,12 +153,11 @@ setup_ftnode_header(struct ftnode *node) { node->flags = 0x11223344; - node->thisnodename.b = 20; + node->blocknum.b = 20; node->layout_version = FT_LAYOUT_VERSION; node->layout_version_original = FT_LAYOUT_VERSION; node->height = 0; node->dirty = 1; - node->totalchildkeylens = 0; node->oldest_referenced_xid_known = TXNID_NONE; } @@ -169,12 +167,12 @@ node->n_children = n_children; node->max_msn_applied_to_node_on_disk = msn; MALLOC_N(node->n_children, node->bp); - MALLOC_N(node->n_children - 1, node->childkeys); for (int bn = 0; bn < node->n_children; ++bn) { BP_STATE(node, bn) = PT_AVAIL; set_BLB(node, bn, toku_create_empty_bn()); BLB_MAX_MSN_APPLIED(node, bn) = msn; } + node->pivotkeys.create_empty(); } static void @@ -186,7 +184,7 @@ } // -// Maximum node size according to the BRT: 1024 (expected node size after split) +// Maximum node size according to the FT: 1024 (expected node size after split) // Maximum basement node size: 256 // Actual node size before split: 2048 // Actual basement node size before split: 256 @@ -210,15 +208,15 @@ insert_dummy_value(&sn, bn, k, i); } if (bn < sn.n_children - 1) { - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } } unlink(fname); CACHETABLE ct; FT_HANDLE ft; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; @@ -233,15 +231,12 @@ r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } // -// Maximum node size according to the BRT: 1024 (expected node size after split) +// Maximum node size according to the FT: 1024 (expected node size after split) // Maximum basement node size: 256 (except the last) // Actual node size before split: 4095 // Actual basement node size before split: 256 (except the last, of size 2K) @@ -270,8 +265,8 @@ k = bn * eltsperbn + i; big_val_size += insert_dummy_value(&sn, bn, k, i); } - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } else { k = bn * eltsperbn; // we want this to be as big as the rest of our data and a @@ -288,7 +283,7 @@ unlink(fname); CACHETABLE ct; FT_HANDLE ft; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; @@ -300,16 +295,13 @@ r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } // -// Maximum node size according to the BRT: 1024 (expected node size after split) +// Maximum node size according to the FT: 1024 (expected node size after split) // Maximum basement node size: 256 (except the last) // Actual node size before split: 4095 // Actual basement node size before split: 256 (except the last, of size 2K) @@ -339,8 +331,8 @@ k = bn * eltsperbn + i; big_val_size += insert_dummy_value(&sn, bn, k, i); } - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } else { k = bn * eltsperbn; // we want this to be slightly smaller than all the rest of @@ -360,7 +352,7 @@ unlink(fname); CACHETABLE ct; FT_HANDLE ft; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; @@ -372,10 +364,7 @@ r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } @@ -405,8 +394,8 @@ totalbytes += insert_dummy_value(&sn, bn, k, i-1); } if (bn < sn.n_children - 1) { - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } } { // now add the first element @@ -424,7 +413,7 @@ unlink(fname); CACHETABLE ct; FT_HANDLE ft; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; @@ -436,10 +425,7 @@ r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } @@ -476,15 +462,15 @@ } } if (bn < sn.n_children - 1) { - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } } unlink(fname); CACHETABLE ct; FT_HANDLE ft; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; @@ -496,14 +482,11 @@ r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } -// Maximum node size according to the BRT: 1024 (expected node size after split) +// Maximum node size according to the FT: 1024 (expected node size after split) // Maximum basement node size: 256 // Actual node size before split: 2048 // Actual basement node size before split: 256 @@ -530,15 +513,15 @@ insert_dummy_value(&sn, bn, k, i); } if (bn < sn.n_children - 1) { - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } } unlink(fname); CACHETABLE ct; FT_HANDLE ft; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; @@ -553,10 +536,7 @@ r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test4115.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test4115.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test4115.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test4115.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,6 @@ #include static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; const char *fname = TOKU_TEST_FILENAME; CACHETABLE ct; @@ -117,7 +116,7 @@ static void open_ft_and_ct (bool unlink_old) { int r; if (unlink_old) unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &t, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); toku_ft_set_bt_compare(t, dont_allow_prefix); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test4244.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test4244.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test4244.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test4244.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,6 @@ #include static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -110,7 +109,7 @@ int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -148,8 +147,8 @@ // then node_internal should be huge // we pin it and verify that it is not FTNODE node; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, t->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(t->ft); toku_pin_ftnode( t->ft, node_internal, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-assert.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-assert.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-assert.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-assert.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-bjm.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-bjm.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-bjm.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-bjm.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "background_job_manager.h" +#include "cachetable/background_job_manager.h" #include "test.h" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_block_allocator_merge.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_block_allocator_merge.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_block_allocator_merge.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_block_allocator_merge.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,236 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." -#include "../block_allocator.h" -#include -#include -// Test the merger. - -int verbose = 0; - -static void -print_array (uint64_t n, const struct block_allocator_blockpair a[/*n*/]) { - printf("{"); - for (uint64_t i=0; ioffset < b->offset) return -1; - if (a->offset > b->offset) return +1; - return 0; -} - -static void -test_merge (uint64_t an, const struct block_allocator_blockpair a[/*an*/], - uint64_t bn, const struct block_allocator_blockpair b[/*bn*/]) { - if (verbose>1) { printf("a:"); print_array(an, a); } - if (verbose>1) { printf("b:"); print_array(bn, b); } - struct block_allocator_blockpair *MALLOC_N(an+bn, q); - struct block_allocator_blockpair *MALLOC_N(an+bn, m); - if (q==0 || m==0) { - fprintf(stderr, "malloc failed, continuing\n"); - goto malloc_failed; - } - for (uint64_t i=0; i1) { printf("q:"); print_array(an+bn, q); } - if (verbose) printf("merge\n"); - block_allocator_merge_blockpairs_into(an, m, bn, b); - if (verbose) printf("compare\n"); - if (verbose>1) { printf("m:"); print_array(an+bn, m); } - for (uint64_t i=0; i #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -184,7 +183,7 @@ toku_flusher_thread_set_callback(flusher_callback, &after_child_pin); - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink("foo1.ft_handle"); r = toku_open_ft_handle("foo1.ft_handle", 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -228,8 +227,8 @@ ); FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); toku_pin_ftnode( t->ft, node_root, @@ -283,7 +282,7 @@ // // now pin the root, verify that we have a message in there, and that it is clean // - fill_bfe_for_full_read(&bfe, c_ft->ft); + bfe.create_for_full_read(c_ft->ft); toku_pin_ftnode( c_ft->ft, node_root, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-merge.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-merge.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-merge.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-merge.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,10 +96,9 @@ #include #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -175,7 +174,7 @@ toku_flusher_thread_set_callback(flusher_callback, &state); - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink("foo2.ft_handle"); unlink("bar2.ft_handle"); // note the basement node size is 5 times the node size @@ -246,8 +245,8 @@ toku_unpin_ftnode(t->ft, node); - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); toku_pin_ftnode_with_dep_nodes( t->ft, node_root, @@ -306,7 +305,7 @@ // // now pin the root, verify that the state is what we expect // - fill_bfe_for_full_read(&bfe, c_ft->ft); + bfe.create_for_full_read(c_ft->ft); toku_pin_ftnode_with_dep_nodes( c_ft->ft, node_root, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-rebalance.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-rebalance.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-rebalance.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-rebalance.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,10 +96,9 @@ #include #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -175,7 +174,7 @@ toku_flusher_thread_set_callback(flusher_callback, &state); - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink("foo3.ft_handle"); unlink("bar3.ft_handle"); // note the basement node size is 5 times the node size @@ -266,8 +265,8 @@ toku_unpin_ftnode(t->ft, node); - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); toku_pin_ftnode( t->ft, node_root, @@ -322,7 +321,7 @@ // // now pin the root, verify that the state is what we expect // - fill_bfe_for_full_read(&bfe, c_ft->ft); + bfe.create_for_full_read(c_ft->ft); toku_pin_ftnode( c_ft->ft, node_root, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-split.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-split.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-split.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-split.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,10 +96,9 @@ #include #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -184,7 +183,7 @@ toku_flusher_thread_set_callback(flusher_callback, &after_split); - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink("foo4.ft_handle"); unlink("bar4.ft_handle"); // note the basement node size is 5 times the node size @@ -242,8 +241,8 @@ ); FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); toku_pin_ftnode( t->ft, node_root, @@ -298,7 +297,7 @@ // // now pin the root, verify that we have a message in there, and that it is clean // - fill_bfe_for_full_read(&bfe, c_ft->ft); + bfe.create_for_full_read(c_ft->ft); toku_pin_ftnode( c_ft->ft, node_root, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-del-inorder.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-del-inorder.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-del-inorder.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-del-inorder.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,6 @@ static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -109,7 +108,7 @@ int r; - toku_cachetable_create(&ct, 16*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 16*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-dirty-flushes-on-cleaner.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-dirty-flushes-on-cleaner.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-dirty-flushes-on-cleaner.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-dirty-flushes-on-cleaner.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,11 +94,10 @@ #include #include "ft-flusher.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -132,7 +131,7 @@ int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &ft, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -238,8 +237,8 @@ // now lock and release the leaf node to make sure it is what we expect it to be. FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, ft->ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft->ft); toku_pin_ftnode_with_dep_nodes( ft->ft, node_leaf, @@ -269,7 +268,7 @@ // node is in memory and another is // on disk // - fill_bfe_for_min_read(&bfe, ft->ft); + bfe.create_for_min_read(ft->ft); toku_pin_ftnode_with_dep_nodes( ft->ft, node_leaf, @@ -290,7 +289,7 @@ // // now let us induce a clean on the internal node // - fill_bfe_for_min_read(&bfe, ft->ft); + bfe.create_for_min_read(ft->ft); toku_pin_ftnode_with_dep_nodes( ft->ft, node_internal, @@ -315,7 +314,7 @@ ); // verify that node_internal's buffer is empty - fill_bfe_for_min_read(&bfe, ft->ft); + bfe.create_for_min_read(ft->ft); toku_pin_ftnode_with_dep_nodes( ft->ft, node_internal, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-dump-ft.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-dump-ft.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-dump-ft.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-dump-ft.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,7 +94,6 @@ #include "test.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; int test_main(int argc, const char *argv[]) { @@ -106,7 +105,7 @@ FILE *f = fopen("test-dump-ft.out", "w"); unlink(n); assert(f); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(n, 1, &t, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); int i; for (i=0; i<10000; i++) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-flushes-on-cleaner.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-flushes-on-cleaner.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-flushes-on-cleaner.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-flushes-on-cleaner.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,11 +94,10 @@ #include #include "ft-flusher.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -132,7 +131,7 @@ int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &ft, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -244,8 +243,8 @@ assert_zero(r); // now lock and release the leaf node to make sure it is what we expect it to be. FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, ft->ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft->ft); toku_pin_ftnode( ft->ft, node_leaf, @@ -281,7 +280,7 @@ // but only one should have broadcast message // applied. // - fill_bfe_for_full_read(&bfe, ft->ft); + bfe.create_for_full_read(ft->ft); } else { // @@ -290,7 +289,7 @@ // node is in memory and another is // on disk // - fill_bfe_for_min_read(&bfe, ft->ft); + bfe.create_for_min_read(ft->ft); } toku_pin_ftnode( ft->ft, @@ -315,7 +314,7 @@ // // now let us induce a clean on the internal node // - fill_bfe_for_min_read(&bfe, ft->ft); + bfe.create_for_min_read(ft->ft); toku_pin_ftnode( ft->ft, node_internal, @@ -338,7 +337,7 @@ ); // verify that node_internal's buffer is empty - fill_bfe_for_min_read(&bfe, ft->ft); + bfe.create_for_min_read(ft->ft); toku_pin_ftnode( ft->ft, node_internal, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-ft-overflow.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-ft-overflow.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-ft-overflow.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-ft-overflow.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,7 +97,6 @@ static const char *fname = TOKU_TEST_FILENAME; static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static void test_overflow (void) { @@ -106,7 +105,7 @@ uint32_t nodesize = 1<<20; int r; unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &t, nodesize, nodesize / 8, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); DBT k,v; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-ft-txns.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-ft-txns.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-ft-txns.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-ft-txns.h 2014-10-08 13:19:51.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TEST_FT_TXNS_H -#define TEST_FT_TXNS_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,6 +87,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -136,7 +136,7 @@ CKERR(r); DB_ENV *CAST_FROM_VOIDP(ctv, (void *) &ct); // Use intermediate to avoid compiler warning. - r = tokudb_recover(ctv, + r = tokuft_recover(ctv, NULL_prepared_txn_callback, xid_lsn_keep_cachetable_callback, logger, @@ -179,5 +179,3 @@ int r = toku_logger_close(loggerp); CKERR(r); } - -#endif /* TEST_FT_TXNS_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,6 +86,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -99,15 +101,19 @@ #include #include -#include "ft.h" -#include "key.h" -#include "block_table.h" -#include "log-internal.h" -#include "logger.h" -#include "fttypes.h" -#include "ft-ops.h" -#include "cachetable.h" -#include "cachetable-internal.h" +#include "ft/serialize/block_allocator.h" +#include "ft/serialize/block_table.h" +#include "ft/cachetable/cachetable.h" +#include "ft/cachetable/cachetable-internal.h" +#include "ft/cursor.h" +#include "ft/ft.h" +#include "ft/ft-ops.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/logger/log-internal.h" +#include "ft/logger/logger.h" +#include "ft/node.h" +#include "util/bytestring.h" #define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, strerror(r)); assert(__r==0); }) #define CKERR2(r,r2) do { if (r!=r2) fprintf(stderr, "%s:%d error %d %s, expected %d\n", __FILE__, __LINE__, r, strerror(r), r2); assert(r==r2); } while (0) @@ -118,15 +124,17 @@ fflush(stderr); \ } while (0) -const ITEMLEN len_ignore = 0xFFFFFFFF; +const uint32_t len_ignore = 0xFFFFFFFF; +static const prepared_txn_callback_t NULL_prepared_txn_callback __attribute__((__unused__)) = NULL; +static const keep_cachetable_callback_t NULL_keep_cachetable_callback __attribute__((__unused__)) = NULL; +static const TOKULOGGER NULL_logger __attribute__((__unused__)) = NULL; // dummymsn needed to simulate msn because test messages are injected at a lower level than toku_ft_root_put_msg() #define MIN_DUMMYMSN ((MSN) {(uint64_t)1<<62}) static MSN dummymsn; static int dummymsn_initialized = 0; - static void initialize_dummymsn(void) { if (dummymsn_initialized == 0) { @@ -150,14 +158,14 @@ struct check_pair { - ITEMLEN keylen; // A keylen equal to 0xFFFFFFFF means don't check the keylen or the key. - bytevec key; // A NULL key means don't check the key. - ITEMLEN vallen; // Similarly for vallen and null val. - bytevec val; + uint32_t keylen; // A keylen equal to 0xFFFFFFFF means don't check the keylen or the key. + const void *key; // A NULL key means don't check the key. + uint32_t vallen; // Similarly for vallen and null val. + const void *val; int call_count; }; static int -lookup_checkf (ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *pair_v, bool lock_only) { +lookup_checkf (uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *pair_v, bool lock_only) { if (!lock_only) { struct check_pair *pair = (struct check_pair *) pair_v; if (key!=NULL) { @@ -182,8 +190,8 @@ { DBT k; toku_fill_dbt(&k, keystring, strlen(keystring) + 1); - struct check_pair pair = {(ITEMLEN) (1+strlen(keystring)), keystring, - (ITEMLEN) (1+strlen(valstring)), valstring, + struct check_pair pair = {(uint32_t) (1+strlen(keystring)), keystring, + (uint32_t) (1+strlen(valstring)), valstring, 0}; int r = toku_ft_lookup(t, &k, lookup_checkf, &pair); assert(r==0); @@ -195,7 +203,7 @@ { DBT k; toku_fill_dbt(&k, keystring, strlen(keystring) + 1); - struct check_pair pair = {(ITEMLEN) (1+strlen(keystring)), keystring, + struct check_pair pair = {(uint32_t) (1+strlen(keystring)), keystring, 0, 0, 0}; int r = toku_ft_lookup(t, &k, lookup_checkf, &pair); @@ -392,4 +400,3 @@ toku_ft_layer_destroy(); return r; } - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-hot-with-bounds.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-hot-with-bounds.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-hot-with-bounds.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-hot-with-bounds.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,10 +96,9 @@ #include #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -113,7 +112,7 @@ int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(TOKU_TEST_FILENAME); r = toku_open_ft_handle(TOKU_TEST_FILENAME, 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -181,8 +180,8 @@ // the root, one in each buffer, let's verify this. FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); toku_pin_ftnode( t->ft, node_root, @@ -211,7 +210,7 @@ // at this point, we have should have flushed // only the middle buffer, let's verify this. node = NULL; - fill_bfe_for_min_read(&bfe, t->ft); + bfe.create_for_min_read(t->ft); toku_pin_ftnode( t->ft, node_root, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-inc-split.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-inc-split.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-inc-split.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-inc-split.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -120,7 +120,6 @@ static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -137,7 +136,7 @@ int i; int r; - toku_cachetable_create(&ct, 16*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 16*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-leafentry-child-txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-leafentry-child-txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-leafentry-child-txn.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-leafentry-child-txn.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,10 +91,9 @@ #include #include "test.h" -#include "fttypes.h" -#include "ule.h" -#include "ule-internal.h" +#include "ft/ule.h" +#include "ft/ule-internal.h" static void init_empty_ule(ULE ule) { ule->num_cuxrs = 0; @@ -111,17 +110,6 @@ ule->uxrs[index].xid = xid; } -static FT_MSG_S -msg_init(enum ft_msg_type type, XIDS xids, - DBT *key, DBT *val) { - FT_MSG_S msg; - msg.type = type; - msg.xids = xids; - msg.u.id.key = key; - msg.u.id.val = val; - return msg; -} - //Test all the different things that can happen to a //committed leafentry (logical equivalent of a committed insert). static void @@ -144,14 +132,14 @@ // test case where we apply a message and the innermost child_id // is the same as the innermost committed TXNID - XIDS root_xids = xids_get_root_xids(); + XIDS root_xids = toku_xids_get_root_xids(); TXNID root_txnid = 1000; TXNID child_id = 10; XIDS msg_xids_1; XIDS msg_xids_2; - r = xids_create_child(root_xids, &msg_xids_1, root_txnid); + r = toku_xids_create_child(root_xids, &msg_xids_1, root_txnid); assert(r==0); - r = xids_create_child(msg_xids_1, &msg_xids_2, child_id); + r = toku_xids_create_child(msg_xids_1, &msg_xids_2, child_id); assert(r==0); init_empty_ule(&ule_initial); @@ -161,45 +149,49 @@ add_committed_entry(&ule_initial, &val, 10); // now do the application of xids to the ule - FT_MSG_S msg; // do a commit - msg = msg_init(FT_COMMIT_ANY, msg_xids_2, &key, &val); - test_msg_modify_ule(&ule_initial, &msg); - assert(ule->num_cuxrs == 2); - assert(ule->uxrs[0].xid == TXNID_NONE); - assert(ule->uxrs[1].xid == 10); - assert(ule->uxrs[0].valp == &val_data_one); - assert(ule->uxrs[1].valp == &val_data_two); + { + ft_msg msg(&key, &val, FT_COMMIT_ANY, ZERO_MSN, msg_xids_2); + test_msg_modify_ule(&ule_initial, msg); + assert(ule->num_cuxrs == 2); + assert(ule->uxrs[0].xid == TXNID_NONE); + assert(ule->uxrs[1].xid == 10); + assert(ule->uxrs[0].valp == &val_data_one); + assert(ule->uxrs[1].valp == &val_data_two); + } // do an abort - msg = msg_init(FT_ABORT_ANY, msg_xids_2, &key, &val); - test_msg_modify_ule(&ule_initial, &msg); - assert(ule->num_cuxrs == 2); - assert(ule->uxrs[0].xid == TXNID_NONE); - assert(ule->uxrs[1].xid == 10); - assert(ule->uxrs[0].valp == &val_data_one); - assert(ule->uxrs[1].valp == &val_data_two); + { + ft_msg msg(&key, &val, FT_ABORT_ANY, ZERO_MSN, msg_xids_2); + test_msg_modify_ule(&ule_initial, msg); + assert(ule->num_cuxrs == 2); + assert(ule->uxrs[0].xid == TXNID_NONE); + assert(ule->uxrs[1].xid == 10); + assert(ule->uxrs[0].valp == &val_data_one); + assert(ule->uxrs[1].valp == &val_data_two); + } // do an insert val.data = &val_data_three; - msg = msg_init(FT_INSERT, msg_xids_2, &key, &val); - test_msg_modify_ule(&ule_initial, &msg); - // now that message applied, verify that things are good - assert(ule->num_cuxrs == 2); - assert(ule->num_puxrs == 2); - assert(ule->uxrs[0].xid == TXNID_NONE); - assert(ule->uxrs[1].xid == 10); - assert(ule->uxrs[2].xid == 1000); - assert(ule->uxrs[3].xid == 10); - assert(ule->uxrs[0].valp == &val_data_one); - assert(ule->uxrs[1].valp == &val_data_two); - assert(ule->uxrs[2].type == XR_PLACEHOLDER); - assert(ule->uxrs[3].valp == &val_data_three); - - - xids_destroy(&msg_xids_2); - xids_destroy(&msg_xids_1); - xids_destroy(&root_xids); + { + ft_msg msg(&key, &val, FT_INSERT, ZERO_MSN, msg_xids_2); + test_msg_modify_ule(&ule_initial, msg); + // now that message applied, verify that things are good + assert(ule->num_cuxrs == 2); + assert(ule->num_puxrs == 2); + assert(ule->uxrs[0].xid == TXNID_NONE); + assert(ule->uxrs[1].xid == 10); + assert(ule->uxrs[2].xid == 1000); + assert(ule->uxrs[3].xid == 10); + assert(ule->uxrs[0].valp == &val_data_one); + assert(ule->uxrs[1].valp == &val_data_two); + assert(ule->uxrs[2].type == XR_PLACEHOLDER); + assert(ule->uxrs[3].valp == &val_data_three); + } + + toku_xids_destroy(&msg_xids_2); + toku_xids_destroy(&msg_xids_1); + toku_xids_destroy(&root_xids); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-leafentry-nested.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-leafentry-nested.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-leafentry-nested.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-leafentry-nested.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,10 +91,9 @@ #include #include "test.h" -#include "fttypes.h" -#include "ule.h" -#include "ule-internal.h" +#include "ft/ule.h" +#include "ft/ule-internal.h" enum {MAX_SIZE = 256}; static XIDS nested_xids[MAX_TRANSACTION_RECORDS]; @@ -213,7 +212,7 @@ static void test_ule_packs_to_nothing (ULE ule) { LEAFENTRY le; - int r = le_pack(ule, NULL, 0, NULL, 0, 0, &le, nullptr); + int r = le_pack(ule, NULL, 0, NULL, 0, 0, 0, &le, nullptr); assert(r==0); assert(le==NULL); } @@ -319,7 +318,7 @@ size_t memsize; LEAFENTRY le; - int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, &le, nullptr); + int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr); assert(r==0); assert(le!=NULL); memsize = le_memsize_from_ule(&ule); @@ -329,7 +328,7 @@ verify_ule_equal(&ule, &tmp_ule); LEAFENTRY tmp_le; size_t tmp_memsize; - r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, &tmp_le, nullptr); + r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, 0, &tmp_le, nullptr); tmp_memsize = le_memsize_from_ule(&tmp_ule); assert(r==0); assert(tmp_memsize == memsize); @@ -377,7 +376,7 @@ size_t memsize; LEAFENTRY le; - int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, &le, nullptr); + int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr); assert(r==0); assert(le!=NULL); memsize = le_memsize_from_ule(&ule); @@ -387,7 +386,7 @@ verify_ule_equal(&ule, &tmp_ule); LEAFENTRY tmp_le; size_t tmp_memsize; - r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, &tmp_le, nullptr); + r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, 0, &tmp_le, nullptr); tmp_memsize = le_memsize_from_ule(&tmp_ule); assert(r==0); assert(tmp_memsize == memsize); @@ -442,13 +441,13 @@ } static void -test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) { +test_le_apply(ULE ule_initial, const ft_msg &msg, ULE ule_expected) { int r; LEAFENTRY le_initial; LEAFENTRY le_expected; LEAFENTRY le_result; - r = le_pack(ule_initial, nullptr, 0, nullptr, 0, 0, &le_initial, nullptr); + r = le_pack(ule_initial, nullptr, 0, nullptr, 0, 0, 0, &le_initial, nullptr); CKERR(r); size_t result_memsize = 0; @@ -458,6 +457,7 @@ le_initial, nullptr, 0, + 0, &gc_info, &le_result, &ignoreme); @@ -467,7 +467,7 @@ } size_t expected_memsize = 0; - r = le_pack(ule_expected, nullptr, 0, nullptr, 0, 0, &le_expected, nullptr); + r = le_pack(ule_expected, nullptr, 0, nullptr, 0, 0, 0, &le_expected, nullptr); CKERR(r); if (le_expected) { expected_memsize = leafentry_memsize(le_expected); @@ -495,17 +495,6 @@ .uxrs = (UXR_S *)ule_committed_delete.uxrs_static }; -static FT_MSG_S -msg_init(enum ft_msg_type type, XIDS xids, - DBT *key, DBT *val) { - FT_MSG_S msg; - msg.type = type; - msg.xids = xids; - msg.u.id.key = key; - msg.u.id.val = val; - return msg; -} - static uint32_t next_nesting_level(uint32_t current) { uint32_t rval = current + 1; @@ -530,13 +519,13 @@ } static void -generate_provpair_for(ULE ule, FT_MSG msg) { +generate_provpair_for(ULE ule, const ft_msg &msg) { uint32_t level; - XIDS xids = msg->xids; + XIDS xids = msg.xids(); ule->uxrs = ule->uxrs_static; ule->num_cuxrs = 1; - ule->num_puxrs = xids_get_num_xids(xids); + ule->num_puxrs = toku_xids_get_num_xids(xids); uint32_t num_uxrs = ule->num_cuxrs + ule->num_puxrs; ule->uxrs[0].type = XR_DELETE; ule->uxrs[0].vallen = 0; @@ -546,12 +535,12 @@ ule->uxrs[level].type = XR_PLACEHOLDER; ule->uxrs[level].vallen = 0; ule->uxrs[level].valp = NULL; - ule->uxrs[level].xid = xids_get_xid(xids, level-1); + ule->uxrs[level].xid = toku_xids_get_xid(xids, level-1); } ule->uxrs[num_uxrs - 1].type = XR_INSERT; - ule->uxrs[num_uxrs - 1].vallen = msg->u.id.val->size; - ule->uxrs[num_uxrs - 1].valp = msg->u.id.val->data; - ule->uxrs[num_uxrs - 1].xid = xids_get_innermost_xid(xids); + ule->uxrs[num_uxrs - 1].vallen = msg.vdbt()->size; + ule->uxrs[num_uxrs - 1].valp = msg.vdbt()->data; + ule->uxrs[num_uxrs - 1].xid = toku_xids_get_innermost_xid(xids); } //Test all the different things that can happen to a @@ -559,7 +548,6 @@ static void test_le_empty_apply(void) { ULE_S ule_initial = ule_committed_delete; - FT_MSG_S msg; DBT key; DBT val; @@ -584,34 +572,41 @@ //Abort/commit of an empty le is an empty le ULE_S ule_expected = ule_committed_delete; - msg = msg_init(FT_COMMIT_ANY, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - msg = msg_init(FT_COMMIT_BROADCAST_TXN, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - - msg = msg_init(FT_ABORT_ANY, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - msg = msg_init(FT_ABORT_BROADCAST_TXN, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); + { + ft_msg msg(&key, &val, FT_COMMIT_ANY, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_COMMIT_BROADCAST_TXN, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_ABORT_ANY, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_ABORT_BROADCAST_TXN, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } } { //delete of an empty le is an empty le ULE_S ule_expected = ule_committed_delete; - msg = msg_init(FT_DELETE_ANY, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); + ft_msg msg(&key, &val, FT_DELETE_ANY, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); } { - msg = msg_init(FT_INSERT, msg_xids, &key, &val); + ft_msg msg(&key, &val, FT_INSERT, ZERO_MSN, msg_xids); ULE_S ule_expected; - generate_provpair_for(&ule_expected, &msg); - test_le_apply(&ule_initial, &msg, &ule_expected); + generate_provpair_for(&ule_expected, msg); + test_le_apply(&ule_initial, msg, &ule_expected); } { - msg = msg_init(FT_INSERT_NO_OVERWRITE, msg_xids, &key, &val); + ft_msg msg(&key, &val, FT_INSERT_NO_OVERWRITE, ZERO_MSN, msg_xids); ULE_S ule_expected; - generate_provpair_for(&ule_expected, &msg); - test_le_apply(&ule_initial, &msg, &ule_expected); + generate_provpair_for(&ule_expected, msg); + test_le_apply(&ule_initial, msg, &ule_expected); } } } @@ -619,36 +614,36 @@ } static void -generate_provdel_for(ULE ule, FT_MSG msg) { +generate_provdel_for(ULE ule, const ft_msg &msg) { uint32_t level; - XIDS xids = msg->xids; + XIDS xids = msg.xids(); ule->num_cuxrs = 1; - ule->num_puxrs = xids_get_num_xids(xids); + ule->num_puxrs = toku_xids_get_num_xids(xids); uint32_t num_uxrs = ule->num_cuxrs + ule->num_puxrs; ule->uxrs[0].type = XR_INSERT; - ule->uxrs[0].vallen = msg->u.id.val->size; - ule->uxrs[0].valp = msg->u.id.val->data; + ule->uxrs[0].vallen = msg.vdbt()->size; + ule->uxrs[0].valp = msg.vdbt()->data; ule->uxrs[0].xid = TXNID_NONE; for (level = ule->num_cuxrs; level < ule->num_cuxrs + ule->num_puxrs - 1; level++) { ule->uxrs[level].type = XR_PLACEHOLDER; ule->uxrs[level].vallen = 0; ule->uxrs[level].valp = NULL; - ule->uxrs[level].xid = xids_get_xid(xids, level-1); + ule->uxrs[level].xid = toku_xids_get_xid(xids, level-1); } ule->uxrs[num_uxrs - 1].type = XR_DELETE; ule->uxrs[num_uxrs - 1].vallen = 0; ule->uxrs[num_uxrs - 1].valp = NULL; - ule->uxrs[num_uxrs - 1].xid = xids_get_innermost_xid(xids); + ule->uxrs[num_uxrs - 1].xid = toku_xids_get_innermost_xid(xids); } static void -generate_both_for(ULE ule, DBT *oldval, FT_MSG msg) { +generate_both_for(ULE ule, DBT *oldval, const ft_msg &msg) { uint32_t level; - XIDS xids = msg->xids; + XIDS xids = msg.xids(); ule->num_cuxrs = 1; - ule->num_puxrs = xids_get_num_xids(xids); + ule->num_puxrs = toku_xids_get_num_xids(xids); uint32_t num_uxrs = ule->num_cuxrs + ule->num_puxrs; ule->uxrs[0].type = XR_INSERT; ule->uxrs[0].vallen = oldval->size; @@ -658,12 +653,12 @@ ule->uxrs[level].type = XR_PLACEHOLDER; ule->uxrs[level].vallen = 0; ule->uxrs[level].valp = NULL; - ule->uxrs[level].xid = xids_get_xid(xids, level-1); + ule->uxrs[level].xid = toku_xids_get_xid(xids, level-1); } ule->uxrs[num_uxrs - 1].type = XR_INSERT; - ule->uxrs[num_uxrs - 1].vallen = msg->u.id.val->size; - ule->uxrs[num_uxrs - 1].valp = msg->u.id.val->data; - ule->uxrs[num_uxrs - 1].xid = xids_get_innermost_xid(xids); + ule->uxrs[num_uxrs - 1].vallen = msg.vdbt()->size; + ule->uxrs[num_uxrs - 1].valp = msg.vdbt()->data; + ule->uxrs[num_uxrs - 1].xid = toku_xids_get_innermost_xid(xids); } //Test all the different things that can happen to a @@ -672,7 +667,6 @@ test_le_committed_apply(void) { ULE_S ule_initial; ule_initial.uxrs = ule_initial.uxrs_static; - FT_MSG_S msg; DBT key; DBT val; @@ -695,23 +689,30 @@ if (nesting_level > 0) { //Commit/abort will not change a committed le ULE_S ule_expected = ule_initial; - msg = msg_init(FT_COMMIT_ANY, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - msg = msg_init(FT_COMMIT_BROADCAST_TXN, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - - msg = msg_init(FT_ABORT_ANY, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - msg = msg_init(FT_ABORT_BROADCAST_TXN, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); + { + ft_msg msg(&key, &val, FT_COMMIT_ANY, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_COMMIT_BROADCAST_TXN, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_ABORT_ANY, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_ABORT_BROADCAST_TXN, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } } { - msg = msg_init(FT_DELETE_ANY, msg_xids, &key, &val); + ft_msg msg(&key, &val, FT_DELETE_ANY, ZERO_MSN, msg_xids); ULE_S ule_expected; ule_expected.uxrs = ule_expected.uxrs_static; - generate_provdel_for(&ule_expected, &msg); - test_le_apply(&ule_initial, &msg, &ule_expected); + generate_provdel_for(&ule_expected, msg); + test_le_apply(&ule_initial, msg, &ule_expected); } { @@ -720,11 +721,11 @@ fillrandom(valbuf2, valsize2); DBT val2; toku_fill_dbt(&val2, valbuf2, valsize2); - msg = msg_init(FT_INSERT, msg_xids, &key, &val2); + ft_msg msg(&key, &val2, FT_INSERT, ZERO_MSN, msg_xids); ULE_S ule_expected; ule_expected.uxrs = ule_expected.uxrs_static; - generate_both_for(&ule_expected, &val, &msg); - test_le_apply(&ule_initial, &msg, &ule_expected); + generate_both_for(&ule_expected, &val, msg); + test_le_apply(&ule_initial, msg, &ule_expected); } { //INSERT_NO_OVERWRITE will not change a committed insert @@ -734,8 +735,8 @@ fillrandom(valbuf2, valsize2); DBT val2; toku_fill_dbt(&val2, valbuf2, valsize2); - msg = msg_init(FT_INSERT_NO_OVERWRITE, msg_xids, &key, &val2); - test_le_apply(&ule_initial, &msg, &ule_expected); + ft_msg msg(&key, &val2, FT_INSERT_NO_OVERWRITE, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); } } } @@ -749,7 +750,7 @@ static bool ule_worth_running_garbage_collection(ULE ule, TXNID oldest_referenced_xid_known) { LEAFENTRY le; - int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, &le, nullptr); CKERR(r); + int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr); CKERR(r); invariant_notnull(le); txn_gc_info gc_info(nullptr, oldest_referenced_xid_known, oldest_referenced_xid_known, true); bool worth_running = toku_le_worth_running_garbage_collection(le, &gc_info); @@ -854,7 +855,6 @@ } static void test_le_optimize(void) { - FT_MSG_S msg; DBT key; DBT val; ULE_S ule_initial; @@ -868,11 +868,11 @@ TXNID optimize_txnid = 1000; memset(&key, 0, sizeof(key)); memset(&val, 0, sizeof(val)); - XIDS root_xids = xids_get_root_xids(); + XIDS root_xids = toku_xids_get_root_xids(); XIDS msg_xids; - int r = xids_create_child(root_xids, &msg_xids, optimize_txnid); + int r = toku_xids_create_child(root_xids, &msg_xids, optimize_txnid); assert(r==0); - msg = msg_init(FT_OPTIMIZE, msg_xids, &key, &val); + ft_msg msg(&key, &val, FT_OPTIMIZE, ZERO_MSN, msg_xids); // // create the key @@ -897,8 +897,8 @@ ule_expected.uxrs[0].vallen = valsize; ule_expected.uxrs[0].valp = valbuf; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); // // add another committed entry and ensure no effect @@ -915,8 +915,8 @@ ule_expected.uxrs[1].vallen = 0; ule_expected.uxrs[1].valp = NULL; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); // // now test when there is one provisional, three cases, after, equal, and before FT_OPTIMIZE's transaction @@ -928,20 +928,20 @@ ule_expected.num_cuxrs = 1; ule_expected.num_puxrs = 1; ule_expected.uxrs[1].xid = 1500; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); ule_initial.uxrs[1].xid = 1000; ule_expected.uxrs[1].xid = 1000; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); ule_initial.uxrs[1].xid = 500; ule_expected.uxrs[1].xid = 500; ule_expected.num_cuxrs = 2; ule_expected.num_puxrs = 0; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); // // now test cases with two provisional @@ -962,13 +962,13 @@ ule_expected.uxrs[2].vallen = valsize; ule_expected.uxrs[2].valp = valbuf; ule_expected.uxrs[1].xid = 1200; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); ule_initial.uxrs[1].xid = 1000; ule_expected.uxrs[1].xid = 1000; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); ule_initial.uxrs[1].xid = 800; ule_expected.uxrs[1].xid = 800; @@ -977,12 +977,12 @@ ule_expected.uxrs[1].type = ule_initial.uxrs[2].type; ule_expected.uxrs[1].valp = ule_initial.uxrs[2].valp; ule_expected.uxrs[1].vallen = ule_initial.uxrs[2].vallen; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); - xids_destroy(&msg_xids); - xids_destroy(&root_xids); + toku_xids_destroy(&msg_xids); + toku_xids_destroy(&root_xids); } //TODO: #1125 tests: @@ -1020,9 +1020,9 @@ static void init_xids(void) { uint32_t i; - nested_xids[0] = xids_get_root_xids(); + nested_xids[0] = toku_xids_get_root_xids(); for (i = 1; i < MAX_TRANSACTION_RECORDS; i++) { - int r = xids_create_child(nested_xids[i-1], &nested_xids[i], i * 37 + random() % 36); + int r = toku_xids_create_child(nested_xids[i-1], &nested_xids[i], i * 37 + random() % 36); assert(r==0); } } @@ -1031,7 +1031,7 @@ destroy_xids(void) { uint32_t i; for (i = 0; i < MAX_TRANSACTION_RECORDS; i++) { - xids_destroy(&nested_xids[i]); + toku_xids_destroy(&nested_xids[i]); } } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_logcursor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_logcursor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_logcursor.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_logcursor.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,9 +90,8 @@ #include #include -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" -#include "fttypes.h" #if defined(HAVE_LIMITS_H) # include @@ -105,7 +104,6 @@ const int FSYNC = 1; const int NO_FSYNC = 0; -const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN; const char *namea="a.db"; const char *nameb="b.db"; const char *a="a"; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-merges-on-cleaner.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-merges-on-cleaner.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-merges-on-cleaner.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-merges-on-cleaner.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,10 +94,9 @@ #include #include "ft-flusher.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -131,7 +130,7 @@ int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &ft, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -230,8 +229,8 @@ r = toku_ft_lookup(ft, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair); assert(r==0); - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, ft->ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft->ft); toku_pin_ftnode( ft->ft, node_internal, @@ -253,7 +252,7 @@ ); // verify that node_internal's buffer is empty - fill_bfe_for_min_read(&bfe, ft->ft); + bfe.create_for_min_read(ft->ft); toku_pin_ftnode( ft->ft, node_internal, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_oexcl.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_oexcl.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_oexcl.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_oexcl.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-oldest-referenced-xid-flush.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-oldest-referenced-xid-flush.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-oldest-referenced-xid-flush.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-oldest-referenced-xid-flush.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -131,7 +131,7 @@ FT_HANDLE t; BLOCKNUM grandchild_leaf_blocknum, child_nonleaf_blocknum, root_blocknum; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink("foo1.ft_handle"); r = toku_open_ft_handle("foo1.ft_handle", 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, nullptr, toku_builtin_compare_fun); assert(r==0); @@ -167,8 +167,8 @@ // first verify the child FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); toku_pin_ftnode( t->ft, child_nonleaf_blocknum, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-pick-child-to-flush.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-pick-child-to-flush.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-pick-child-to-flush.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-pick-child-to-flush.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,10 +97,9 @@ #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -165,7 +164,7 @@ BLOCKNUM node_leaf[2]; int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -245,7 +244,7 @@ // what we say and flushes the child we pick FTNODE node = NULL; toku_pin_node_with_min_bfe(&node, node_internal, t); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); assert(node->n_children == 2); assert(!node->dirty); assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) > 0); @@ -268,7 +267,7 @@ assert(num_flushes_called == 1); toku_pin_node_with_min_bfe(&node, node_internal, t); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); assert(node->dirty); assert(node->n_children == 2); // child 0 should have empty buffer because it flushed @@ -287,7 +286,7 @@ toku_pin_node_with_min_bfe(&node, node_internal, t); assert(node->dirty); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); assert(node->n_children == 2); // both buffers should be empty now assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) == 0); @@ -305,7 +304,7 @@ toku_pin_node_with_min_bfe(&node, node_internal, t); assert(node->dirty); // nothing was flushed, but since we were trying to flush to a leaf, both become dirty - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); assert(node->n_children == 2); // both buffers should be empty now assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) == 0); @@ -326,7 +325,7 @@ // use a for loop so to get us down both paths for (int i = 0; i < 2; i++) { toku_pin_node_with_min_bfe(&node, node_root, t); - toku_assert_entire_node_in_memory(node); // entire root is in memory + toku_ftnode_assert_fully_in_memory(node); // entire root is in memory curr_child_to_flush = i; num_flushes_called = 0; toku_ft_flush_some_child(t->ft, node, &fa); @@ -376,7 +375,7 @@ //now let's do the same test as above toku_pin_node_with_min_bfe(&node, node_root, t); - toku_assert_entire_node_in_memory(node); // entire root is in memory + toku_ftnode_assert_fully_in_memory(node); // entire root is in memory curr_child_to_flush = 0; num_flushes_called = 0; toku_ft_flush_some_child(t->ft, node, &fa); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2014 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ #include "test.h" -#include +#include #include // Each FT maintains a sequential insert heuristic to determine if its @@ -110,7 +110,7 @@ FT_HANDLE ft_handle; CACHETABLE ct; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(name, 1, &ft_handle, 4*1024*1024, 64*1024, TOKU_DEFAULT_COMPRESSION_METHOD, ct, NULL, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_split_merge.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_split_merge.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_split_merge.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_split_merge.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2014 Tokutek, Inc. DISCLAIMER: @@ -90,8 +90,9 @@ #include "test.h" -#include +#include #include +#include // Promotion tracks the rightmost blocknum in the FT when a message // is successfully promoted to a non-root leaf node on the right extreme. @@ -109,7 +110,7 @@ FT_HANDLE ft_handle; CACHETABLE ct; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(name, 1, &ft_handle, 4*1024*1024, 64*1024, TOKU_DEFAULT_COMPRESSION_METHOD, ct, NULL, @@ -142,13 +143,13 @@ BLOCKNUM root_blocknum = ft->h->root_blocknum; FTNODE root_node; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); toku_pin_ftnode(ft, root_blocknum, toku_cachetable_hash(ft->cf, ft->h->root_blocknum), &bfe, PL_WRITE_EXPENSIVE, &root_node, true); // root blocknum should be consistent - invariant(root_node->thisnodename.b == ft->h->root_blocknum.b); + invariant(root_node->blocknum.b == ft->h->root_blocknum.b); // root should have split at least once, and it should now be at height 1 invariant(root_node->n_children > 1); invariant(root_node->height == 1); @@ -179,7 +180,7 @@ toku_pin_ftnode(ft, rightmost_blocknum_before_merge, toku_cachetable_hash(ft->cf, rightmost_blocknum_before_merge), &bfe, PL_WRITE_EXPENSIVE, &rightmost_leaf, true); - invariant(get_node_reactivity(ft, rightmost_leaf) == RE_FUSIBLE); + invariant(toku_ftnode_get_reactivity(ft, rightmost_leaf) == RE_FUSIBLE); toku_unpin_ftnode(ft, rightmost_leaf); // - merge the rightmost child now that it's fusible diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_toku_malloc_plain_free.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_toku_malloc_plain_free.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test_toku_malloc_plain_free.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test_toku_malloc_plain_free.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-txn-child-manager.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-txn-child-manager.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-txn-child-manager.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-txn-child-manager.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ #include "test.h" #include "toku_os.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "test-ft-txns.h" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-upgrade-recovery-logs.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-upgrade-recovery-logs.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/test-upgrade-recovery-logs.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/test-upgrade-recovery-logs.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,193 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// Test that recovery works correctly on a recovery log in a log directory. + +#include "test.h" +#include + +static void run_recovery(const char *testdir) { + int r; + + int log_version; + char shutdown[32+1]; + r = sscanf(testdir, "upgrade-recovery-logs-%d-%32s", &log_version, shutdown); + assert(r == 2); + + char **logfiles = nullptr; + int n_logfiles = 0; + r = toku_logger_find_logfiles(testdir, &logfiles, &n_logfiles); + CKERR(r); + assert(n_logfiles > 0); + + FILE *f = fopen(logfiles[n_logfiles-1], "r"); + assert(f); + uint32_t real_log_version; + r = toku_read_logmagic(f, &real_log_version); + CKERR(r); + assert((uint32_t)log_version == (uint32_t)real_log_version); + r = fclose(f); + CKERR(r); + + toku_logger_free_logfiles(logfiles, n_logfiles); + + // test needs recovery + r = tokuft_needs_recovery(testdir, false); + if (strcmp(shutdown, "clean") == 0) { + CKERR(r); // clean does not need recovery + } else if (strncmp(shutdown, "dirty", 5) == 0) { + CKERR2(r, 1); // dirty needs recovery + } else { + CKERR(EINVAL); + } + + // test maybe upgrade log + LSN lsn_of_clean_shutdown; + bool upgrade_in_progress; + r = toku_maybe_upgrade_log(testdir, testdir, &lsn_of_clean_shutdown, &upgrade_in_progress); + if (strcmp(shutdown, "dirty") == 0 && log_version <= 24) { + CKERR2(r, TOKUDB_UPGRADE_FAILURE); // we dont support dirty upgrade from versions <= 24 + return; + } else { + CKERR(r); + } + + if (!verbose) { + // redirect stderr + int devnul = open(DEV_NULL_FILE, O_WRONLY); + assert(devnul >= 0); + int rr = toku_dup2(devnul, fileno(stderr)); + assert(rr == fileno(stderr)); + rr = close(devnul); + assert(rr == 0); + } + + // run recovery + if (r == 0) { + r = tokuft_recover(NULL, + NULL_prepared_txn_callback, + NULL_keep_cachetable_callback, + NULL_logger, testdir, testdir, 0, 0, 0, NULL, 0); + CKERR(r); + } +} + +int test_main(int argc, const char *argv[]) { + int i = 0; + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "-q") == 0) { + if (verbose > 0) + verbose--; + continue; + } + break; + } + if (i < argc) { + const char *full_test_dir = argv[i]; + const char *test_dir = basename((char *)full_test_dir); + if (strcmp(full_test_dir, test_dir) != 0) { + int r; + char cmd[32 + strlen(full_test_dir) + strlen(test_dir)]; + sprintf(cmd, "rm -rf %s", test_dir); + r = system(cmd); + CKERR(r); + sprintf(cmd, "cp -r %s %s", full_test_dir, test_dir); + r = system(cmd); + CKERR(r); + } + run_recovery(test_dir); + } + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/upgrade_test_simple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/upgrade_test_simple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/upgrade_test_simple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/upgrade_test_simple.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,13 +97,12 @@ #include "ft-flusher.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = NULL; -static DB * const null_db = NULL; static int -noop_getf(ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN UU(vallen), bytevec UU(val), void *extra, bool UU(lock_only)) +noop_getf(uint32_t UU(keylen), const void *UU(key), uint32_t UU(vallen), const void *UU(val), void *extra, bool UU(lock_only)) { int *CAST_FROM_VOIDP(calledp, extra); (*calledp)++; @@ -176,7 +175,7 @@ FT_HANDLE t; CACHETABLE ct; - toku_cachetable_create(&ct, 16*(1<<20), ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 16*(1<<20), ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 0, &t, diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-bad-msn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-bad-msn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-bad-msn.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-bad-msn.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -127,9 +127,9 @@ MSN msn = next_dummymsn(); // apply an insert to the leaf node - FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // Create bad tree (don't do following): // leafnode->max_msn_applied_to_node = msn; @@ -156,7 +156,7 @@ unsigned int key = htonl(val); DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key); DBT theval; toku_fill_dbt(&theval, &val, sizeof val); - toku_ft_append_to_child_buffer(ft->ft->compare_fun, NULL, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval); + toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, toku_xids_get_root_xids(), true, &thekey, &theval); // Create bad tree (don't do following): // node->max_msn_applied_to_node = msn; @@ -212,7 +212,7 @@ // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); // create the ft TOKUTXN null_txn = NULL; @@ -225,7 +225,7 @@ FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey); // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // Create bad tree (don't do following): // newroot->max_msn_applied_to_node = last_dummymsn(); // capture msn of last message injected into tree diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-bad-pivots.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-bad-pivots.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-bad-pivots.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-bad-pivots.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -115,9 +115,9 @@ // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -182,7 +182,7 @@ // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); // create the ft TOKUTXN null_txn = NULL; @@ -195,7 +195,7 @@ FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey); // discard the old root block - toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root toku_unpin_ftnode(ft->ft, newroot); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-dup-in-leaf.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-dup-in-leaf.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-dup-in-leaf.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-dup-in-leaf.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,9 +116,9 @@ // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -140,7 +140,7 @@ // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); // create the ft TOKUTXN null_txn = NULL; @@ -155,7 +155,7 @@ populate_leaf(newroot, htonl(2), 2); // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root toku_unpin_ftnode(ft->ft, newroot); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-dup-pivots.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-dup-pivots.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-dup-pivots.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-dup-pivots.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -115,9 +115,9 @@ // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -185,7 +185,7 @@ // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); // create the ft TOKUTXN null_txn = NULL; @@ -199,7 +199,7 @@ // discard the old root block // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root toku_unpin_ftnode(ft->ft, newroot); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-misrouted-msgs.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-misrouted-msgs.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-misrouted-msgs.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-misrouted-msgs.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,9 +116,9 @@ // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_msg_once(BLB(leafnode,0), &msg, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -144,7 +144,7 @@ DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key); DBT theval; toku_fill_dbt(&theval, &val, sizeof val); MSN msn = next_dummymsn(); - toku_ft_append_to_child_buffer(ft->ft->compare_fun, NULL, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval); + toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, toku_xids_get_root_xids(), true, &thekey, &theval); } } @@ -197,7 +197,7 @@ // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); // create the ft TOKUTXN null_txn = NULL; @@ -211,7 +211,7 @@ // discard the old root block // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root toku_unpin_ftnode(ft->ft, newroot); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-unsorted-leaf.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-unsorted-leaf.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-unsorted-leaf.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-unsorted-leaf.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -118,9 +118,9 @@ // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -142,7 +142,7 @@ // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); // create the ft TOKUTXN null_txn = NULL; @@ -156,7 +156,7 @@ populate_leaf(newroot, htonl(1), 2); // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root toku_unpin_ftnode(ft->ft, newroot); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-unsorted-pivots.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-unsorted-pivots.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/verify-unsorted-pivots.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/verify-unsorted-pivots.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -115,9 +115,9 @@ // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S msg = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_msg_once(BLB(leafnode, 0), &msg, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -182,7 +182,7 @@ // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); // create the ft TOKUTXN null_txn = NULL; @@ -195,7 +195,7 @@ FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey); // discard the old root block - toku_ft_set_new_root_blocknum(ft->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root toku_unpin_ftnode(ft->ft, newroot); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/xid_lsn_independent.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/xid_lsn_independent.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/xid_lsn_independent.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/xid_lsn_independent.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ #include "test.h" #include "toku_os.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #define ENVDIR TOKU_TEST_FILENAME #include "test-ft-txns.h" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ybt-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ybt-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tests/ybt-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tests/ybt-test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -111,11 +111,11 @@ toku_init_dbt(&t0); toku_init_dbt(&t1); { - bytevec temp1 = "hello"; + const void *temp1 = "hello"; toku_dbt_set(6, temp1, &t0, &v0); } { - bytevec temp2 = "foo"; + const void *temp2 = "foo"; toku_dbt_set( 4, temp2, &t1, &v1); } assert(t0.size==6); @@ -124,7 +124,7 @@ assert(strcmp((char*)t1.data, "foo")==0); { - bytevec temp3 = "byebye"; + const void *temp3 = "byebye"; toku_dbt_set(7, temp3, &t1, &v0); /* Use v0, not v1 */ } // This assertion would be wrong, since v0 may have been realloc'd, and t0.data may now point @@ -141,7 +141,7 @@ t0.flags = DB_DBT_USERMEM; t0.ulen = 0; { - bytevec temp4 = "hello"; + const void *temp4 = "hello"; toku_dbt_set(6, temp4, &t0, 0); } assert(t0.data==0); @@ -152,7 +152,7 @@ t0.flags = DB_DBT_REALLOC; cleanup(&v0); { - bytevec temp5 = "internationalization"; + const void *temp5 = "internationalization"; toku_dbt_set(21, temp5, &t0, &v0); } assert(v0.data==0); /* Didn't change v0 */ @@ -160,7 +160,7 @@ assert(strcmp((char*)t0.data, "internationalization")==0); { - bytevec temp6 = "provincial"; + const void *temp6 = "provincial"; toku_dbt_set(11, temp6, &t0, &v0); } assert(t0.size==11); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tokuconst.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tokuconst.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tokuconst.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tokuconst.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,108 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKUCONST_H -#define TOKUCONST_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -/* The number of transaction ids stored in the xids structure is - * represented by an 8-bit value. The value 255 is reserved. - * The constant MAX_NESTED_TRANSACTIONS is one less because - * one slot in the packed leaf entry is used for the implicit - * root transaction (id 0). - */ - - -enum {MAX_NESTED_TRANSACTIONS = 253}; -enum {MAX_TRANSACTION_RECORDS = MAX_NESTED_TRANSACTIONS + 1}; - - -#endif - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tokuftdump.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tokuftdump.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/tokuftdump.cc 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/tokuftdump.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,701 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -// Dump a fractal tree file - -#include "cachetable.h" -#include "ft.h" -#include "fttypes.h" -#include "ft-internal.h" -#include -#include -#include -#include -#include -#include - -static int do_dump_data = 1; -static int do_interactive = 0; -static int do_header = 0; -static int do_fragmentation = 0; -static int do_garbage = 0; -static int do_translation_table = 0; -static int do_rootnode = 0; -static int do_tsv = 0; - -static const char *arg0; -static const char *fname; - -static void format_time(const uint64_t time_int, char *buf) { - time_t timer = (time_t) time_int; - ctime_r(&timer, buf); - assert(buf[24] == '\n'); - buf[24] = 0; -} - -static void print_item(bytevec val, ITEMLEN len) { - printf("\""); - ITEMLEN i; - for (i=0; idbt.size); - simple_hex_dump((unsigned char*) d->dbt.data, d->dbt.size); - printf("\n"); -} - -static void open_header(int fd, FT *header, CACHEFILE cf) { - FT ft = NULL; - int r; - r = toku_deserialize_ft_from (fd, MAX_LSN, &ft); - if (r != 0) { - fprintf(stderr, "%s: can not deserialize from %s error %d\n", arg0, fname, r); - exit(1); - } - assert_zero(r); - ft->cf = cf; - *header = ft; -} - -static void dump_header(FT ft) { - char timestr[26]; - printf("ft:\n"); - printf(" layout_version=%d\n", ft->h->layout_version); - printf(" layout_version_original=%d\n", ft->h->layout_version_original); - printf(" layout_version_read_from_disk=%d\n", ft->layout_version_read_from_disk); - printf(" build_id=%d\n", ft->h->build_id); - printf(" build_id_original=%d\n", ft->h->build_id_original); - format_time(ft->h->time_of_creation, timestr); - printf(" time_of_creation= %" PRIu64 " %s\n", ft->h->time_of_creation, timestr); - format_time(ft->h->time_of_last_modification, timestr); - printf(" time_of_last_modification=%" PRIu64 " %s\n", ft->h->time_of_last_modification, timestr); - printf(" dirty=%d\n", ft->h->dirty); - printf(" checkpoint_count=%" PRId64 "\n", ft->h->checkpoint_count); - printf(" checkpoint_lsn=%" PRId64 "\n", ft->h->checkpoint_lsn.lsn); - printf(" nodesize=%u\n", ft->h->nodesize); - printf(" basementnodesize=%u\n", ft->h->basementnodesize); - printf(" compression_method=%u\n", (unsigned) ft->h->compression_method); - printf(" unnamed_root=%" PRId64 "\n", ft->h->root_blocknum.b); - printf(" flags=%u\n", ft->h->flags); - dump_descriptor(&ft->descriptor); - printf(" estimated numrows=%" PRId64 "\n", ft->in_memory_stats.numrows); - printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes); -} - -static int print_le(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t idx UU(), void *const ai UU()) { - print_klpair(stdout, key, keylen, le); - printf("\n"); - return 0; -} - -static void dump_node(int fd, BLOCKNUM blocknum, FT h) { - FTNODE n; - struct ftnode_fetch_extra bfe; - FTNODE_DISK_DATA ndd = NULL; - fill_bfe_for_full_read(&bfe, h); - int r = toku_deserialize_ftnode_from (fd, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); - assert_zero(r); - assert(n!=0); - printf("ftnode\n"); - DISKOFF disksize, diskoffset; - toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &diskoffset, &disksize); - printf(" diskoffset =%" PRId64 "\n", diskoffset); - printf(" disksize =%" PRId64 "\n", disksize); - printf(" serialize_size =%u\n", toku_serialize_ftnode_size(n)); - printf(" flags =%u\n", n->flags); - printf(" thisnodename=%" PRId64 "\n", n->thisnodename.b); - //printf(" log_lsn =%lld\n", n->log_lsn.lsn); // The log_lsn is a memory-only value. - printf(" height =%d\n", n->height); - printf(" layout_version=%d\n", n->layout_version); - printf(" layout_version_original=%d\n", n->layout_version_original); - printf(" layout_version_read_from_disk=%d\n", n->layout_version_read_from_disk); - printf(" build_id=%d\n", n->build_id); - printf(" max_msn_applied_to_node_on_disk=%" PRId64 " (0x%" PRIx64 ")\n", n->max_msn_applied_to_node_on_disk.msn, n->max_msn_applied_to_node_on_disk.msn); - printf("io time %lf decompress time %lf deserialize time %lf\n", - tokutime_to_seconds(bfe.io_time), - tokutime_to_seconds(bfe.decompress_time), - tokutime_to_seconds(bfe.deserialize_time) - ); - - printf(" n_children=%d\n", n->n_children); - printf(" total_childkeylens=%u\n", n->totalchildkeylens); - - printf(" pivots:\n"); - for (int i=0; in_children-1; i++) { - const DBT *piv = &n->childkeys[i]; - printf(" pivot %2d:", i); - if (n->flags) - printf(" flags=%x ", n->flags); - print_item(piv->data, piv->size); - printf("\n"); - } - printf(" children:\n"); - for (int i=0; in_children; i++) { - printf(" child %d: ", i); - if (n->height > 0) { - printf("%" PRId64 "\n", BP_BLOCKNUM(n, i).b); - NONLEAF_CHILDINFO bnc = BNC(n, i); - unsigned int n_bytes = toku_bnc_nbytesinbuf(bnc); - int n_entries = toku_bnc_n_entries(bnc); - if (n_bytes > 0 || n_entries > 0) { - printf(" buffer contains %u bytes (%d items)\n", n_bytes, n_entries); - } - if (do_dump_data) { - FIFO_ITERATE(bnc->buffer, key, keylen, data, datalen, typ, msn, xids, UU(is_fresh), - { - printf(" msn=%" PRIu64 " (0x%" PRIx64 ") ", msn.msn, msn.msn); - printf(" TYPE="); - switch ((enum ft_msg_type)typ) { - case FT_NONE: printf("NONE"); goto ok; - case FT_INSERT: printf("INSERT"); goto ok; - case FT_INSERT_NO_OVERWRITE: printf("INSERT_NO_OVERWRITE"); goto ok; - case FT_DELETE_ANY: printf("DELETE_ANY"); goto ok; - case FT_ABORT_ANY: printf("ABORT_ANY"); goto ok; - case FT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok; - case FT_COMMIT_BROADCAST_ALL: printf("COMMIT_BROADCAST_ALL"); goto ok; - case FT_COMMIT_BROADCAST_TXN: printf("COMMIT_BROADCAST_TXN"); goto ok; - case FT_ABORT_BROADCAST_TXN: printf("ABORT_BROADCAST_TXN"); goto ok; - case FT_OPTIMIZE: printf("OPTIMIZE"); goto ok; - case FT_OPTIMIZE_FOR_UPGRADE: printf("OPTIMIZE_FOR_UPGRADE"); goto ok; - case FT_UPDATE: printf("UPDATE"); goto ok; - case FT_UPDATE_BROADCAST_ALL: printf("UPDATE_BROADCAST_ALL"); goto ok; - } - printf("HUH?"); - ok: - printf(" xid="); - xids_fprintf(stdout, xids); - printf(" "); - print_item(key, keylen); - if (datalen>0) { - printf(" "); - print_item(data, datalen); - } - printf("\n"); - } - ); - } - } else { - printf(" n_bytes_in_buffer= %" PRIu64 "", BLB_DATA(n, i)->get_disk_size()); - printf(" items_in_buffer=%u\n", BLB_DATA(n, i)->num_klpairs()); - if (do_dump_data) { - BLB_DATA(n, i)->iterate(NULL); - } - } - } - toku_ftnode_free(&n); - toku_free(ndd); -} - -static void dump_block_translation(FT h, uint64_t offset) { - toku_blocknum_dump_translation(h->blocktable, make_blocknum(offset)); -} - -static void dump_fragmentation(int UU(f), FT h, int tsv) { - int64_t used_space; - int64_t total_space; - toku_blocktable_internal_fragmentation(h->blocktable, &total_space, &used_space); - int64_t fragsizes = total_space - used_space; - - if (tsv) { - printf("%" PRId64 "\t%" PRId64 "\t%" PRId64 "\t%.1f\n", used_space, total_space, fragsizes, - 100. * ((double)fragsizes / (double)(total_space))); - } else { - printf("used_size\t%" PRId64 "\n", used_space); - printf("total_size\t%" PRId64 "\n", total_space); - printf("fragsizes\t%" PRId64 "\n", fragsizes); - printf("fragmentation\t%.1f\n", 100. * ((double)fragsizes / (double)(total_space))); - } -} - -typedef struct { - int fd; - FT h; - uint64_t blocksizes; - uint64_t leafsizes; - uint64_t leafblocks; -} frag_help_extra; - -static int nodesizes_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void *extra) { - frag_help_extra *CAST_FROM_VOIDP(info, extra); - FTNODE n; - FTNODE_DISK_DATA ndd = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, info->h); - int r = toku_deserialize_ftnode_from(info->fd, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); - if (r==0) { - info->blocksizes += size; - if (n->height == 0) { - info->leafsizes += size; - info->leafblocks++; - } - toku_ftnode_free(&n); - toku_free(ndd); - } - return 0; -} - -static void dump_nodesizes(int fd, FT h) { - frag_help_extra info; - memset(&info, 0, sizeof(info)); - info.fd = fd; - info.h = h; - toku_blocktable_iterate(h->blocktable, TRANSLATION_CHECKPOINTED, - nodesizes_helper, &info, true, true); - printf("leafblocks\t%" PRIu64 "\n", info.leafblocks); - printf("blocksizes\t%" PRIu64 "\n", info.blocksizes); - printf("leafsizes\t%" PRIu64 "\n", info.leafsizes); -} - -static void dump_garbage_stats(int fd, FT ft) { - assert(fd == toku_cachefile_get_fd(ft->cf)); - uint64_t total_space = 0; - uint64_t used_space = 0; - toku_ft_get_garbage(ft, &total_space, &used_space); - printf("garbage total size\t%" PRIu64 "\n", total_space); - printf("garbage used size\t%" PRIu64 "\n", used_space); -} - -typedef struct __dump_node_extra { - int fd; - FT h; -} dump_node_extra; - -static int dump_node_wrapper(BLOCKNUM b, int64_t UU(size), int64_t UU(address), void *extra) { - dump_node_extra *CAST_FROM_VOIDP(info, extra); - dump_node(info->fd, b, info->h); - return 0; -} - -static uint32_t get_unaligned_uint32(unsigned char *p) { - uint32_t n; - memcpy(&n, p, sizeof n); - return n; -} - -struct dump_sub_block { - uint32_t compressed_size; - uint32_t uncompressed_size; - uint32_t xsum; -}; - -static void sub_block_deserialize(struct dump_sub_block *sb, unsigned char *sub_block_header) { - sb->compressed_size = toku_dtoh32(get_unaligned_uint32(sub_block_header+0)); - sb->uncompressed_size = toku_dtoh32(get_unaligned_uint32(sub_block_header+4)); - sb->xsum = toku_dtoh32(get_unaligned_uint32(sub_block_header+8)); -} - -static void verify_block(unsigned char *cp, uint64_t file_offset, uint64_t size) { - // verify the header checksum - const size_t node_header = 8 + sizeof (uint32_t) + sizeof (uint32_t) + sizeof (uint32_t); - - printf("%.8s layout_version=%u %u build=%d\n", cp, get_unaligned_uint32(cp+8), get_unaligned_uint32(cp+12), get_unaligned_uint32(cp+16)); - - unsigned char *sub_block_header = &cp[node_header]; - uint32_t n_sub_blocks = toku_dtoh32(get_unaligned_uint32(&sub_block_header[0])); - uint32_t header_length = node_header + n_sub_blocks * sizeof (struct dump_sub_block); - header_length += sizeof (uint32_t); // CRC - if (header_length > size) { - printf("header length too big: %u\n", header_length); - return; - } - uint32_t header_xsum = toku_x1764_memory(cp, header_length); - uint32_t expected_xsum = toku_dtoh32(get_unaligned_uint32(&cp[header_length])); - if (header_xsum != expected_xsum) { - printf("header checksum failed: %u %u\n", header_xsum, expected_xsum); - return; - } - - // deserialize the sub block header - struct dump_sub_block sub_block[n_sub_blocks]; - sub_block_header += sizeof (uint32_t); - for (uint32_t i = 0 ; i < n_sub_blocks; i++) { - sub_block_deserialize(&sub_block[i], sub_block_header); - sub_block_header += sizeof (struct dump_sub_block); - } - - // verify the sub block header - uint32_t offset = header_length + 4; - for (uint32_t i = 0 ; i < n_sub_blocks; i++) { - uint32_t xsum = toku_x1764_memory(cp + offset, sub_block[i].compressed_size); - printf("%u: %u %u %u", i, sub_block[i].compressed_size, sub_block[i].uncompressed_size, sub_block[i].xsum); - if (xsum != sub_block[i].xsum) - printf(" fail %u offset %" PRIu64, xsum, file_offset + offset); - printf("\n"); - offset += sub_block[i].compressed_size; - } - if (offset != size) - printf("offset %u expected %" PRIu64 "\n", offset, size); -} - -static void dump_block(int fd, BLOCKNUM blocknum, FT h) { - DISKOFF offset, size; - toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &offset, &size); - printf("%" PRId64 " at %" PRId64 " size %" PRId64 "\n", blocknum.b, offset, size); - - unsigned char *CAST_FROM_VOIDP(vp, toku_malloc(size)); - uint64_t r = pread(fd, vp, size, offset); - if (r == (uint64_t)size) { - verify_block(vp, offset, size); - } - toku_free(vp); -} - -static void dump_file(int fd, uint64_t offset, uint64_t size, FILE *outfp) { - unsigned char *XMALLOC_N(size, vp); - uint64_t r = pread(fd, vp, size, offset); - if (r == size) { - if (outfp == stdout) { - hex_dump(vp, offset, size); - } else { - size_t wrote = fwrite(vp, size, 1, outfp); - assert(wrote == 1); - } - } - toku_free(vp); -} - -static void set_file(int fd, uint64_t offset, unsigned char newc) { - toku_os_pwrite(fd, &newc, sizeof newc, offset); -} - -static int readline(char *line, int maxline) { - int i = 0; - int c; - while ((c = getchar()) != EOF && c != '\n' && i < maxline) { - line[i++] = (char)c; - } - line[i++] = 0; - return c == EOF ? EOF : i; -} - -static int split_fields(char *line, char *fields[], int maxfields) { - int i; - for (i=0; i"); fflush(stdout); - enum { maxline = 64}; - char line[maxline+1]; - int r = readline(line, maxline); - if (r == EOF) - break; - const int maxfields = 4; - char *fields[maxfields]; - int nfields = split_fields(line, fields, maxfields); - if (nfields == 0) - continue; - if (strcmp(fields[0], "help") == 0) { - interactive_help(); - } else if (strcmp(fields[0], "header") == 0) { - toku_ft_free(ft); - open_header(fd, &ft, cf); - dump_header(ft); - } else if (strcmp(fields[0], "block") == 0 && nfields == 2) { - BLOCKNUM blocknum = make_blocknum(getuint64(fields[1])); - dump_block(fd, blocknum, ft); - } else if (strcmp(fields[0], "node") == 0 && nfields == 2) { - BLOCKNUM off = make_blocknum(getuint64(fields[1])); - dump_node(fd, off, ft); - } else if (strcmp(fields[0], "dumpdata") == 0 && nfields == 2) { - do_dump_data = strtol(fields[1], NULL, 10); - } else if (strcmp(fields[0], "block_translation") == 0 || strcmp(fields[0], "bx") == 0) { - uint64_t offset = 0; - if (nfields == 2) - offset = getuint64(fields[1]); - dump_block_translation(ft, offset); - } else if (strcmp(fields[0], "fragmentation") == 0) { - dump_fragmentation(fd, ft, do_tsv); - } else if (strcmp(fields[0], "nodesizes") == 0) { - dump_nodesizes(fd, ft); - } else if (strcmp(fields[0], "garbage") == 0) { - dump_garbage_stats(fd, ft); - } else if (strcmp(fields[0], "file") == 0 && nfields >= 3) { - uint64_t offset = getuint64(fields[1]); - uint64_t size = getuint64(fields[2]); - FILE *outfp = stdout; - if (nfields >= 4) - outfp = fopen(fields[3], "w"); - dump_file(fd, offset, size, outfp); - } else if (strcmp(fields[0], "setfile") == 0 && nfields == 3) { - uint64_t offset = getuint64(fields[1]); - unsigned char newc = getuint64(fields[2]); - set_file(fd, offset, newc); - } else if (strcmp(fields[0], "quit") == 0 || strcmp(fields[0], "q") == 0) { - break; - } - } -} - -static int usage(void) { - fprintf(stderr, "Usage: %s ", arg0); - fprintf(stderr, "--interactive "); - fprintf(stderr, "--nodata "); - fprintf(stderr, "--dumpdata 0|1 "); - fprintf(stderr, "--header "); - fprintf(stderr, "--rootnode "); - fprintf(stderr, "--fragmentation "); - fprintf(stderr, "--garbage "); - fprintf(stderr, "--tsv "); - fprintf(stderr, "--translation-table "); - fprintf(stderr, "--tsv "); - fprintf(stderr, "ftfilename \n"); - return 1; -} - -int main (int argc, const char *const argv[]) { - arg0 = argv[0]; - argc--; argv++; - while (argc>0) { - if (strcmp(argv[0], "--interactive") == 0 || strcmp(argv[0], "--i") == 0) { - do_interactive = 1; - } else if (strcmp(argv[0], "--nodata") == 0) { - do_dump_data = 0; - } else if (strcmp(argv[0], "--dumpdata") == 0 && argc > 1) { - argc--; argv++; - do_dump_data = atoi(argv[0]); - } else if (strcmp(argv[0], "--header") == 0) { - do_header = 1; - } else if (strcmp(argv[0], "--rootnode") == 0) { - do_rootnode = 1; - } else if (strcmp(argv[0], "--fragmentation") == 0) { - do_fragmentation = 1; - } else if (strcmp(argv[0], "--garbage") == 0) { - do_garbage = 1; - } else if (strcmp(argv[0], "--tsv") == 0) { - do_tsv = 1; - } else if (strcmp(argv[0], "--translation-table") == 0) { - do_translation_table = 1; - } else if (strcmp(argv[0], "--help") == 0 || strcmp(argv[0], "-?") == 0 || strcmp(argv[0], "-h") == 0) { - return usage(); - } else { - break; - } - argc--; argv++; - } - if (argc != 1) - return usage(); - - int r = toku_ft_layer_init(); - assert_zero(r); - - fname = argv[0]; - int fd = open(fname, O_RDWR + O_BINARY); - if (fd < 0) { - fprintf(stderr, "%s: can not open %s errno %d\n", arg0, fname, errno); - return 1; - } - - // create a cachefile for the header - CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 1<<25, (LSN){0}, 0); - - CACHEFILE cf = NULL; - r = toku_cachetable_openfd (&cf, ct, fd, fname); - assert_zero(r); - - FT ft = NULL; - open_header(fd, &ft, cf); - - if (do_interactive) { - run_iteractive_loop(fd, ft, cf); - } else { - if (do_header) { - dump_header(ft); - } - if (do_rootnode) { - dump_node(fd, ft->h->root_blocknum, ft); - } - if (do_fragmentation) { - dump_fragmentation(fd, ft, do_tsv); - } - if (do_translation_table) { - toku_dump_translation_table_pretty(stdout, ft->blocktable); - } - if (do_garbage) { - dump_garbage_stats(fd, ft); - } - if (!do_header && !do_rootnode && !do_fragmentation && !do_translation_table && !do_garbage) { - printf("Block translation:"); - - toku_dump_translation_table(stdout, ft->blocktable); - - struct __dump_node_extra info; - info.fd = fd; - info.h = ft; - toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED, - dump_node_wrapper, &info, true, true); - } - } - toku_cachefile_close(&cf, false, ZERO_LSN); - toku_cachetable_close(&ct); - toku_ft_free(ft); - toku_ft_layer_destroy(); - return 0; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback-apply.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback-apply.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback-apply.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback-apply.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,308 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "ft/logger/log-internal.h" +#include "ft/txn/rollback-apply.h" + +static void poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint) { + if (txn->progress_poll_fun) { + TOKU_TXN_PROGRESS_S progress = { + .entries_total = txn->roll_info.num_rollentries, + .entries_processed = txn->roll_info.num_rollentries_processed, + .is_commit = is_commit, + .stalled_on_checkpoint = stall_for_checkpoint}; + txn->progress_poll_fun(&progress, txn->progress_poll_fun_extra); + } +} + +int toku_commit_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn) { + int r=0; + rolltype_dispatch_assign(item, toku_commit_, r, txn, lsn); + txn->roll_info.num_rollentries_processed++; + if (txn->roll_info.num_rollentries_processed % 1024 == 0) { + poll_txn_progress_function(txn, true, false); + } + return r; +} + +int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn) { + int r=0; + rolltype_dispatch_assign(item, toku_rollback_, r, txn, lsn); + txn->roll_info.num_rollentries_processed++; + if (txn->roll_info.num_rollentries_processed % 1024 == 0) { + poll_txn_progress_function(txn, false, false); + } + return r; +} + +int note_ft_used_in_txns_parent(const FT &ft, uint32_t UU(index), TOKUTXN const child); +int note_ft_used_in_txns_parent(const FT &ft, uint32_t UU(index), TOKUTXN const child) { + TOKUTXN parent = child->parent; + toku_txn_maybe_note_ft(parent, ft); + return 0; +} + +static int apply_txn(TOKUTXN txn, LSN lsn, apply_rollback_item func) { + int r = 0; + // do the commit/abort calls and free everything + // we do the commit/abort calls in reverse order too. + struct roll_entry *item; + //printf("%s:%d abort\n", __FILE__, __LINE__); + + BLOCKNUM next_log = ROLLBACK_NONE; + + bool is_current = false; + if (txn_has_current_rollback_log(txn)) { + next_log = txn->roll_info.current_rollback; + is_current = true; + } + else if (txn_has_spilled_rollback_logs(txn)) { + next_log = txn->roll_info.spilled_rollback_tail; + } + + uint64_t last_sequence = txn->roll_info.num_rollback_nodes; + bool found_head = false; + while (next_log.b != ROLLBACK_NONE.b) { + ROLLBACK_LOG_NODE log; + //pin log + toku_get_and_pin_rollback_log(txn, next_log, &log); + toku_rollback_verify_contents(log, txn->txnid, last_sequence - 1); + + toku_maybe_prefetch_previous_rollback_log(txn, log); + + last_sequence = log->sequence; + if (func) { + while ((item=log->newest_logentry)) { + log->newest_logentry = item->prev; + r = func(txn, item, lsn); + if (r!=0) return r; + } + } + if (next_log.b == txn->roll_info.spilled_rollback_head.b) { + assert(!found_head); + found_head = true; + assert(log->sequence == 0); + } + next_log = log->previous; + { + //Clean up transaction structure to prevent + //toku_txn_close from double-freeing + if (is_current) { + txn->roll_info.current_rollback = ROLLBACK_NONE; + is_current = false; + } + else { + txn->roll_info.spilled_rollback_tail = next_log; + } + if (found_head) { + assert(next_log.b == ROLLBACK_NONE.b); + txn->roll_info.spilled_rollback_head = next_log; + } + } + bool give_back = false; + // each txn tries to give back at most one rollback log node + // to the cache. + if (next_log.b == ROLLBACK_NONE.b) { + give_back = txn->logger->rollback_cache.give_rollback_log_node( + txn, + log + ); + } + if (!give_back) { + toku_rollback_log_unpin_and_remove(txn, log); + } + } + return r; +} + +//Commit each entry in the rollback log. +//If the transaction has a parent, it just promotes its information to its parent. +int toku_rollback_commit(TOKUTXN txn, LSN lsn) { + int r=0; + if (txn->parent!=0) { + // First we must put a rollinclude entry into the parent if we spilled + + if (txn_has_spilled_rollback_logs(txn)) { + uint64_t num_nodes = txn->roll_info.num_rollback_nodes; + if (txn_has_current_rollback_log(txn)) { + num_nodes--; //Don't count the in-progress rollback log. + } + toku_logger_save_rollback_rollinclude(txn->parent, txn->txnid, num_nodes, + txn->roll_info.spilled_rollback_head, + txn->roll_info.spilled_rollback_tail); + //Remove ownership from child. + txn->roll_info.spilled_rollback_head = ROLLBACK_NONE; + txn->roll_info.spilled_rollback_tail = ROLLBACK_NONE; + } + // if we're commiting a child rollback, put its entries into the parent + // by pinning both child and parent and then linking the child log entry + // list to the end of the parent log entry list. + if (txn_has_current_rollback_log(txn)) { + //Pin parent log + toku_txn_lock(txn->parent); + ROLLBACK_LOG_NODE parent_log; + toku_get_and_pin_rollback_log_for_new_entry(txn->parent, &parent_log); + + //Pin child log + ROLLBACK_LOG_NODE child_log; + toku_get_and_pin_rollback_log(txn, txn->roll_info.current_rollback, &child_log); + toku_rollback_verify_contents(child_log, txn->txnid, txn->roll_info.num_rollback_nodes - 1); + + // Append the list to the front of the parent. + if (child_log->oldest_logentry) { + // There are some entries, so link them in. + child_log->oldest_logentry->prev = parent_log->newest_logentry; + if (!parent_log->oldest_logentry) { + parent_log->oldest_logentry = child_log->oldest_logentry; + } + parent_log->newest_logentry = child_log->newest_logentry; + parent_log->rollentry_resident_bytecount += child_log->rollentry_resident_bytecount; + txn->parent->roll_info.rollentry_raw_count += txn->roll_info.rollentry_raw_count; + child_log->rollentry_resident_bytecount = 0; + } + if (parent_log->oldest_logentry==NULL) { + parent_log->oldest_logentry = child_log->oldest_logentry; + } + child_log->newest_logentry = child_log->oldest_logentry = 0; + // Put all the memarena data into the parent. + if (child_log->rollentry_arena.total_size_in_use() > 0) { + // If there are no bytes to move, then just leave things alone, and let the memory be reclaimed on txn is closed. + child_log->rollentry_arena.move_memory(&parent_log->rollentry_arena); + } + // each txn tries to give back at most one rollback log node + // to the cache. All other rollback log nodes for this child + // transaction are included in the parent's rollback log, + // so this is the only node we can give back to the cache + bool give_back = txn->logger->rollback_cache.give_rollback_log_node( + txn, + child_log + ); + if (!give_back) { + toku_rollback_log_unpin_and_remove(txn, child_log); + } + txn->roll_info.current_rollback = ROLLBACK_NONE; + + toku_maybe_spill_rollbacks(txn->parent, parent_log); + toku_rollback_log_unpin(txn->parent, parent_log); + assert(r == 0); + toku_txn_unlock(txn->parent); + } + + // Note the open FTs, the omts must be merged + r = txn->open_fts.iterate(txn); + assert(r==0); + + //If this transaction needs an fsync (if it commits) + //save that in the parent. Since the commit really happens in the root txn. + txn->parent->force_fsync_on_commit |= txn->force_fsync_on_commit; + txn->parent->roll_info.num_rollentries += txn->roll_info.num_rollentries; + } else { + r = apply_txn(txn, lsn, toku_commit_rollback_item); + assert(r==0); + } + + return r; +} + +int toku_rollback_abort(TOKUTXN txn, LSN lsn) { + int r; + r = apply_txn(txn, lsn, toku_abort_rollback_item); + assert(r==0); + return r; +} + +int toku_rollback_discard(TOKUTXN txn) { + txn->roll_info.current_rollback = ROLLBACK_NONE; + return 0; +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback-apply.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback-apply.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback-apply.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback-apply.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,101 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +typedef int(*apply_rollback_item)(TOKUTXN txn, struct roll_entry *item, LSN lsn); +int toku_commit_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn); +int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn); + +int toku_rollback_commit(TOKUTXN txn, LSN lsn); +int toku_rollback_abort(TOKUTXN txn, LSN lsn); +int toku_rollback_discard(TOKUTXN txn); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,387 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include + +#include "ft/serialize/block_table.h" +#include "ft/ft.h" +#include "ft/logger/log-internal.h" +#include "ft/txn/rollback-ct-callbacks.h" + +static void rollback_unpin_remove_callback(CACHEKEY* cachekey, bool for_checkpoint, void* extra) { + FT CAST_FROM_VOIDP(ft, extra); + ft->blocktable.free_blocknum(cachekey, ft, for_checkpoint); +} + +void toku_rollback_log_unpin_and_remove(TOKUTXN txn, ROLLBACK_LOG_NODE log) { + int r; + CACHEFILE cf = txn->logger->rollback_cachefile; + FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf)); + r = toku_cachetable_unpin_and_remove (cf, log->ct_pair, rollback_unpin_remove_callback, ft); + assert(r == 0); +} + +int +toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind) { + if (xidxidfind) return +1; + return 0; +} + +// TODO: fix this name +// toku_rollback_malloc +void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size) { + return log->rollentry_arena.malloc_from_arena(size); +} + +// TODO: fix this name +// toku_rollback_memdup +void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len) { + void *r = toku_malloc_in_rollback(log, len); + memcpy(r, v, len); + return r; +} + +static inline PAIR_ATTR make_rollback_pair_attr(long size) { + PAIR_ATTR result={ + .size = size, + .nonleaf_size = 0, + .leaf_size = 0, + .rollback_size = size, + .cache_pressure_size = 0, + .is_valid = true + }; + return result; +} + +PAIR_ATTR +rollback_memory_size(ROLLBACK_LOG_NODE log) { + size_t size = sizeof(*log); + if (&log->rollentry_arena) { + size += log->rollentry_arena.total_footprint(); + } + return make_rollback_pair_attr(size); +} + +static void toku_rollback_node_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p) { + ROLLBACK_LOG_NODE CAST_FROM_VOIDP(log, value_data); + log->ct_pair = p; +} + +// +// initializes an empty rollback log node +// Does not touch the blocknum, that is the +// responsibility of the caller +// +void rollback_empty_log_init(ROLLBACK_LOG_NODE log) { + // Having a txnid set to TXNID_NONE is how we determine if the + // rollback log node is empty or in use. + log->txnid.parent_id64 = TXNID_NONE; + log->txnid.child_id64 = TXNID_NONE; + + log->layout_version = FT_LAYOUT_VERSION; + log->layout_version_original = FT_LAYOUT_VERSION; + log->layout_version_read_from_disk = FT_LAYOUT_VERSION; + log->dirty = true; + log->sequence = 0; + log->previous = make_blocknum(0); + log->oldest_logentry = NULL; + log->newest_logentry = NULL; + log->rollentry_arena.create(0); + log->rollentry_resident_bytecount = 0; +} + +static void rollback_initialize_for_txn( + ROLLBACK_LOG_NODE log, + TOKUTXN txn, + BLOCKNUM previous + ) +{ + log->txnid = txn->txnid; + log->sequence = txn->roll_info.num_rollback_nodes++; + log->previous = previous; + log->oldest_logentry = NULL; + log->newest_logentry = NULL; + log->rollentry_arena.create(1024); + log->rollentry_resident_bytecount = 0; + log->dirty = true; +} + +// TODO: fix this name +void make_rollback_log_empty(ROLLBACK_LOG_NODE log) { + log->rollentry_arena.destroy(); + rollback_empty_log_init(log); +} + +// create and pin a new rollback log node. chain it to the other rollback nodes +// by providing a previous blocknum and assigning the new rollback log +// node the next sequence number +static void rollback_log_create ( + TOKUTXN txn, + BLOCKNUM previous, + ROLLBACK_LOG_NODE *result + ) +{ + ROLLBACK_LOG_NODE XMALLOC(log); + rollback_empty_log_init(log); + + CACHEFILE cf = txn->logger->rollback_cachefile; + FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf)); + rollback_initialize_for_txn(log, txn, previous); + ft->blocktable.allocate_blocknum(&log->blocknum, ft); + const uint32_t hash = toku_cachetable_hash(ft->cf, log->blocknum); + *result = log; + toku_cachetable_put(cf, log->blocknum, hash, + log, rollback_memory_size(log), + get_write_callbacks_for_rollback_log(ft), + toku_rollback_node_save_ct_pair); + txn->roll_info.current_rollback = log->blocknum; +} + +void toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log) { + int r; + CACHEFILE cf = txn->logger->rollback_cachefile; + r = toku_cachetable_unpin( + cf, + log->ct_pair, + (enum cachetable_dirty)log->dirty, + rollback_memory_size(log) + ); + assert(r == 0); +} + +//Requires: log is pinned +// log is current +//After: +// Maybe there is no current after (if it spilled) +void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log) { + if (log->rollentry_resident_bytecount > txn->logger->write_block_size) { + assert(log->blocknum.b == txn->roll_info.current_rollback.b); + //spill + if (!txn_has_spilled_rollback_logs(txn)) { + //First spilled. Copy to head. + txn->roll_info.spilled_rollback_head = txn->roll_info.current_rollback; + } + //Unconditionally copy to tail. Old tail does not need to be cached anymore. + txn->roll_info.spilled_rollback_tail = txn->roll_info.current_rollback; + + txn->roll_info.current_rollback = ROLLBACK_NONE; + } +} + +int find_filenum (const FT &h, const FT &hfind); +int find_filenum (const FT &h, const FT &hfind) { + FILENUM fnum = toku_cachefile_filenum(h->cf); + FILENUM fnumfind = toku_cachefile_filenum(hfind->cf); + if (fnum.fileidfnumfind.fileid) return +1; + return 0; +} + +//Notify a transaction that it has touched an ft. +void toku_txn_maybe_note_ft (TOKUTXN txn, FT ft) { + toku_txn_lock(txn); + FT ftv; + uint32_t idx; + int r = txn->open_fts.find_zero(ft, &ftv, &idx); + if (r == 0) { + // already there + assert(ftv == ft); + goto exit; + } + r = txn->open_fts.insert_at(ft, idx); + assert_zero(r); + // TODO(leif): if there's anything that locks the reflock and then + // the txn lock, this may deadlock, because it grabs the reflock. + toku_ft_add_txn_ref(ft); +exit: + toku_txn_unlock(txn); +} + +// Return the number of bytes that went into the rollback data structure (the uncompressed count if there is compression) +int toku_logger_txn_rollback_stats(TOKUTXN txn, struct txn_stat *txn_stat) +{ + toku_txn_lock(txn); + txn_stat->rollback_raw_count = txn->roll_info.rollentry_raw_count; + txn_stat->rollback_num_entries = txn->roll_info.num_rollentries; + toku_txn_unlock(txn); + return 0; +} + +void toku_maybe_prefetch_previous_rollback_log(TOKUTXN txn, ROLLBACK_LOG_NODE log) { + //Currently processing 'log'. Prefetch the next (previous) log node. + + BLOCKNUM name = log->previous; + int r = 0; + if (name.b != ROLLBACK_NONE.b) { + CACHEFILE cf = txn->logger->rollback_cachefile; + uint32_t hash = toku_cachetable_hash(cf, name); + FT CAST_FROM_VOIDP(h, toku_cachefile_get_userdata(cf)); + bool doing_prefetch = false; + r = toku_cachefile_prefetch(cf, name, hash, + get_write_callbacks_for_rollback_log(h), + toku_rollback_fetch_callback, + toku_rollback_pf_req_callback, + toku_rollback_pf_callback, + h, + &doing_prefetch); + assert(r == 0); + } +} + +void toku_rollback_verify_contents(ROLLBACK_LOG_NODE log, + TXNID_PAIR txnid, uint64_t sequence) +{ + assert(log->txnid.parent_id64 == txnid.parent_id64); + assert(log->txnid.child_id64 == txnid.child_id64); + assert(log->sequence == sequence); +} + +void toku_get_and_pin_rollback_log(TOKUTXN txn, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log) { + void * value; + CACHEFILE cf = txn->logger->rollback_cachefile; + FT CAST_FROM_VOIDP(h, toku_cachefile_get_userdata(cf)); + uint32_t hash = toku_cachetable_hash(cf, blocknum); + int r = toku_cachetable_get_and_pin_with_dep_pairs(cf, blocknum, hash, + &value, NULL, + get_write_callbacks_for_rollback_log(h), + toku_rollback_fetch_callback, + toku_rollback_pf_req_callback, + toku_rollback_pf_callback, + PL_WRITE_CHEAP, // lock_type + h, + 0, NULL, NULL + ); + assert(r == 0); + ROLLBACK_LOG_NODE CAST_FROM_VOIDP(pinned_log, value); + assert(pinned_log->blocknum.b == blocknum.b); + *log = pinned_log; +} + +void toku_get_and_pin_rollback_log_for_new_entry (TOKUTXN txn, ROLLBACK_LOG_NODE *log) { + ROLLBACK_LOG_NODE pinned_log = NULL; + invariant(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING); // hot indexing may call this function for prepared transactions + if (txn_has_current_rollback_log(txn)) { + toku_get_and_pin_rollback_log(txn, txn->roll_info.current_rollback, &pinned_log); + toku_rollback_verify_contents(pinned_log, txn->txnid, txn->roll_info.num_rollback_nodes - 1); + } else { + // For each transaction, we try to acquire the first rollback log + // from the rollback log node cache, so that we avoid + // putting something new into the cachetable. However, + // if transaction has spilled rollbacks, that means we + // have already done a lot of work for this transaction, + // and subsequent rollback log nodes are created + // and put into the cachetable. The idea is for + // transactions that don't do a lot of work to (hopefully) + // get a rollback log node from a cache, as opposed to + // taking the more expensive route of creating a new one. + if (!txn_has_spilled_rollback_logs(txn)) { + txn->logger->rollback_cache.get_rollback_log_node(txn, &pinned_log); + if (pinned_log != NULL) { + rollback_initialize_for_txn( + pinned_log, + txn, + txn->roll_info.spilled_rollback_tail + ); + txn->roll_info.current_rollback = pinned_log->blocknum; + } + } + if (pinned_log == NULL) { + rollback_log_create(txn, txn->roll_info.spilled_rollback_tail, &pinned_log); + } + } + assert(pinned_log->txnid.parent_id64 == txn->txnid.parent_id64); + assert(pinned_log->txnid.child_id64 == txn->txnid.child_id64); + assert(pinned_log->blocknum.b != ROLLBACK_NONE.b); + *log = pinned_log; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,315 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/memory.h" +#include "portability/toku_portability.h" + +#include "ft/serialize/block_table.h" +#include "ft/ft-internal.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/txn/rollback.h" +#include "ft/txn/rollback-ct-callbacks.h" + +#include "util/memarena.h" + +// Address used as a sentinel. Otherwise unused. +static struct serialized_rollback_log_node cloned_rollback; + +// Cleanup the rollback memory +static void +rollback_log_destroy(ROLLBACK_LOG_NODE log) { + make_rollback_log_empty(log); + toku_free(log); +} + +// flush an ununused log to disk, by allocating a size 0 blocknum in +// the blocktable +static void +toku_rollback_flush_unused_log( + ROLLBACK_LOG_NODE log, + BLOCKNUM logname, + int fd, + FT ft, + bool write_me, + bool keep_me, + bool for_checkpoint, + bool is_clone + ) +{ + if (write_me) { + DISKOFF offset; + ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX); + } + if (!keep_me && !is_clone) { + toku_free(log); + } +} + +// flush a used log to disk by serializing and writing the node out +static void +toku_rollback_flush_used_log ( + ROLLBACK_LOG_NODE log, + SERIALIZED_ROLLBACK_LOG_NODE serialized, + int fd, + FT ft, + bool write_me, + bool keep_me, + bool for_checkpoint, + bool is_clone + ) +{ + + if (write_me) { + int r = toku_serialize_rollback_log_to(fd, log, serialized, is_clone, ft, for_checkpoint); + assert(r == 0); + } + if (!keep_me) { + if (is_clone) { + toku_serialized_rollback_log_destroy(serialized); + } + else { + rollback_log_destroy(log); + } + } +} + +// Write something out. Keep trying even if partial writes occur. +// On error: Return negative with errno set. +// On success return nbytes. +void toku_rollback_flush_callback ( + CACHEFILE UU(cachefile), + int fd, + BLOCKNUM logname, + void *rollback_v, + void** UU(disk_data), + void *extraargs, + PAIR_ATTR size, + PAIR_ATTR* new_size, + bool write_me, + bool keep_me, + bool for_checkpoint, + bool is_clone + ) +{ + ROLLBACK_LOG_NODE log = nullptr; + SERIALIZED_ROLLBACK_LOG_NODE serialized = nullptr; + bool is_unused = false; + if (is_clone) { + is_unused = (rollback_v == &cloned_rollback); + CAST_FROM_VOIDP(serialized, rollback_v); + } + else { + CAST_FROM_VOIDP(log, rollback_v); + is_unused = rollback_log_is_unused(log); + } + *new_size = size; + FT ft; + CAST_FROM_VOIDP(ft, extraargs); + if (is_unused) { + toku_rollback_flush_unused_log( + log, + logname, + fd, + ft, + write_me, + keep_me, + for_checkpoint, + is_clone + ); + } + else { + toku_rollback_flush_used_log( + log, + serialized, + fd, + ft, + write_me, + keep_me, + for_checkpoint, + is_clone + ); + } +} + +int toku_rollback_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM logname, uint32_t fullhash UU(), + void **rollback_pv, void** UU(disk_data), PAIR_ATTR *sizep, int * UU(dirtyp), void *extraargs) { + int r; + FT CAST_FROM_VOIDP(h, extraargs); + assert(h->cf == cachefile); + ROLLBACK_LOG_NODE *result = (ROLLBACK_LOG_NODE*)rollback_pv; + r = toku_deserialize_rollback_log_from(fd, logname, result, h); + if (r==0) { + (*result)->ct_pair = p; + *sizep = rollback_memory_size(*result); + } + return r; +} + +void toku_rollback_pe_est_callback( + void* rollback_v, + void* UU(disk_data), + long* bytes_freed_estimate, + enum partial_eviction_cost *cost, + void* UU(write_extraargs) + ) +{ + assert(rollback_v != NULL); + *bytes_freed_estimate = 0; + *cost = PE_CHEAP; +} + +// callback for partially evicting a cachetable entry +int toku_rollback_pe_callback ( + void *rollback_v, + PAIR_ATTR old_attr, + void* UU(extraargs), + void (*finalize)(PAIR_ATTR new_attr, void * extra), + void *finalize_extra + ) +{ + assert(rollback_v != NULL); + finalize(old_attr, finalize_extra); + return 0; +} + +// partial fetch is never required for a rollback log node +bool toku_rollback_pf_req_callback(void* UU(ftnode_pv), void* UU(read_extraargs)) { + return false; +} + +// a rollback node should never be partial fetched, +// because we always say it is not required. +// (pf req callback always returns false) +int toku_rollback_pf_callback(void* UU(ftnode_pv), void* UU(disk_data), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { + assert(false); + return 0; +} + +// the cleaner thread should never choose a rollback node for cleaning +int toku_rollback_cleaner_callback ( + void* UU(ftnode_pv), + BLOCKNUM UU(blocknum), + uint32_t UU(fullhash), + void* UU(extraargs) + ) +{ + assert(false); + return 0; +} + +void toku_rollback_clone_callback( + void* value_data, + void** cloned_value_data, + long* clone_size, + PAIR_ATTR* new_attr, + bool UU(for_checkpoint), + void* UU(write_extraargs) + ) +{ + ROLLBACK_LOG_NODE CAST_FROM_VOIDP(log, value_data); + SERIALIZED_ROLLBACK_LOG_NODE serialized = nullptr; + if (!rollback_log_is_unused(log)) { + XMALLOC(serialized); + toku_serialize_rollback_log_to_memory_uncompressed(log, serialized); + *cloned_value_data = serialized; + *clone_size = sizeof(struct serialized_rollback_log_node) + serialized->len; + } + else { + *cloned_value_data = &cloned_rollback; + *clone_size = sizeof(cloned_rollback); + } + // clear the dirty bit, because the node has been cloned + log->dirty = 0; + new_attr->is_valid = false; +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,134 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "ft/cachetable/cachetable.h" + +void toku_rollback_flush_callback(CACHEFILE cachefile, int fd, BLOCKNUM logname, void *rollback_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool UU(is_clone)); +int toku_rollback_fetch_callback(CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM logname, uint32_t fullhash, void **rollback_pv, void** UU(disk_data), PAIR_ATTR *sizep, int * UU(dirtyp), void *extraargs); +void toku_rollback_pe_est_callback( + void* rollback_v, + void* UU(disk_data), + long* bytes_freed_estimate, + enum partial_eviction_cost *cost, + void* UU(write_extraargs) + ); +int toku_rollback_pe_callback ( + void *rollback_v, + PAIR_ATTR old_attr, + void* UU(extraargs), + void (*finalize)(PAIR_ATTR new_attr, void * extra), + void *finalize_extra + ); +bool toku_rollback_pf_req_callback(void* UU(ftnode_pv), void* UU(read_extraargs)) ; +int toku_rollback_pf_callback(void* UU(ftnode_pv), void* UU(disk_data), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)); +void toku_rollback_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs); + +int toku_rollback_cleaner_callback ( + void* UU(ftnode_pv), + BLOCKNUM UU(blocknum), + uint32_t UU(fullhash), + void* UU(extraargs) + ); + +static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log(FT ft) { + CACHETABLE_WRITE_CALLBACK wc; + wc.flush_callback = toku_rollback_flush_callback; + wc.pe_est_callback = toku_rollback_pe_est_callback; + wc.pe_callback = toku_rollback_pe_callback; + wc.cleaner_callback = toku_rollback_cleaner_callback; + wc.clone_callback = toku_rollback_clone_callback; + wc.checkpoint_complete_callback = nullptr; + wc.write_extraargs = ft; + return wc; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,199 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "ft/cachetable/cachetable.h" +#include "ft/serialize/sub_block.h" +#include "ft/txn/txn.h" + +#include "util/memarena.h" + +typedef struct rollback_log_node *ROLLBACK_LOG_NODE; +typedef struct serialized_rollback_log_node *SERIALIZED_ROLLBACK_LOG_NODE; + +void toku_poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint); + +// these functions assert internally that they succeed + +// get a rollback node this txn may use for a new entry. if there +// is a current rollback node to use, pin it, otherwise create one. +void toku_get_and_pin_rollback_log_for_new_entry(TOKUTXN txn, ROLLBACK_LOG_NODE *log); + +// get a specific rollback by blocknum +void toku_get_and_pin_rollback_log(TOKUTXN txn, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log); + +// unpin a rollback node from the cachetable +void toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log); + +// assert that the given log's txnid and sequence match the ones given +void toku_rollback_verify_contents(ROLLBACK_LOG_NODE log, TXNID_PAIR txnid, uint64_t sequence); + +// if there is a previous rollback log for the given log node, prefetch it +void toku_maybe_prefetch_previous_rollback_log(TOKUTXN txn, ROLLBACK_LOG_NODE log); + +// unpin and rmove a rollback log from the cachetable +void toku_rollback_log_unpin_and_remove(TOKUTXN txn, ROLLBACK_LOG_NODE log); + +void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size); +void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len); + +// given a transaction and a log node, and if the log is too full, +// set the current rollback log to ROLLBACK_NONE and move the current +// node onto the tail of the rollback node chain. further insertions +// into the rollback log for this transaction will force the creation +// of a new rollback log. +// +// this never unpins the rollback log if a spill occurs. the caller +// is responsible for ensuring the given rollback node is unpinned +// if necessary. +void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log); + +void toku_txn_maybe_note_ft (TOKUTXN txn, struct ft *ft); +int toku_logger_txn_rollback_stats(TOKUTXN txn, struct txn_stat *txn_stat); + +int toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind); + +PAIR_ATTR rollback_memory_size(ROLLBACK_LOG_NODE log); + +// A high-level rollback log is made up of a chain of rollback log nodes. +// Each rollback log node is represented (separately) in the cachetable by +// this structure. Each portion of the rollback log chain has a block num +// and a hash to identify it. +struct rollback_log_node { + int layout_version; + int layout_version_original; + int layout_version_read_from_disk; + uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk + int dirty; + // to which transaction does this node belong? + TXNID_PAIR txnid; + // sequentially, where in the rollback log chain is this node? + // the sequence is between 0 and totalnodes-1 + uint64_t sequence; + BLOCKNUM blocknum; // on which block does this node live? + // which block number is the previous in the chain of rollback nodes + // that make up this rollback log? + BLOCKNUM previous; + struct roll_entry *oldest_logentry; + struct roll_entry *newest_logentry; + memarena rollentry_arena; + size_t rollentry_resident_bytecount; // How many bytes for the rollentries that are stored in main memory. + PAIR ct_pair; +}; + +struct serialized_rollback_log_node { + char *data; + uint32_t len; + int n_sub_blocks; + BLOCKNUM blocknum; + struct sub_block sub_block[max_sub_blocks]; +}; +typedef struct serialized_rollback_log_node *SERIALIZED_ROLLBACK_LOG_NODE; + +static inline void +toku_static_serialized_rollback_log_destroy(SERIALIZED_ROLLBACK_LOG_NODE log) { + toku_free(log->data); +} + +static inline void +toku_serialized_rollback_log_destroy(SERIALIZED_ROLLBACK_LOG_NODE log) { + toku_static_serialized_rollback_log_destroy(log); + toku_free(log); +} + +void rollback_empty_log_init(ROLLBACK_LOG_NODE log); +void make_rollback_log_empty(ROLLBACK_LOG_NODE log); + +static inline bool rollback_log_is_unused(ROLLBACK_LOG_NODE log) { + return (log->txnid.parent_id64 == TXNID_NONE); +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,162 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include +#include + +#include "txn/rollback_log_node_cache.h" + +void rollback_log_node_cache::init (uint32_t max_num_avail_nodes) { + XMALLOC_N(max_num_avail_nodes, m_avail_blocknums); + m_max_num_avail = max_num_avail_nodes; + m_first = 0; + m_num_avail = 0; + toku_pthread_mutexattr_t attr; + toku_mutexattr_init(&attr); + toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE); + toku_mutex_init(&m_mutex, &attr); + toku_mutexattr_destroy(&attr); +} + +void rollback_log_node_cache::destroy() { + toku_mutex_destroy(&m_mutex); + toku_free(m_avail_blocknums); +} + +// returns true if rollback log node was successfully added, +// false otherwise +bool rollback_log_node_cache::give_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_NODE log){ + bool retval = false; + toku_mutex_lock(&m_mutex); + if (m_num_avail < m_max_num_avail) { + retval = true; + uint32_t index = m_first + m_num_avail; + if (index >= m_max_num_avail) { + index -= m_max_num_avail; + } + m_avail_blocknums[index].b = log->blocknum.b; + m_num_avail++; + } + toku_mutex_unlock(&m_mutex); + // + // now unpin the rollback log node + // + if (retval) { + make_rollback_log_empty(log); + toku_rollback_log_unpin(txn, log); + } + return retval; +} + +// if a rollback log node is available, will set log to it, +// otherwise, will set log to NULL and caller is on his own +// for getting a rollback log node +void rollback_log_node_cache::get_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_NODE* log){ + BLOCKNUM b = ROLLBACK_NONE; + toku_mutex_lock(&m_mutex); + if (m_num_avail > 0) { + b.b = m_avail_blocknums[m_first].b; + m_num_avail--; + if (++m_first >= m_max_num_avail) { + m_first = 0; + } + } + toku_mutex_unlock(&m_mutex); + if (b.b != ROLLBACK_NONE.b) { + toku_get_and_pin_rollback_log(txn, b, log); + invariant(rollback_log_is_unused(*log)); + } else { + *log = NULL; + } +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,117 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "ft/txn/rollback.h" + +class rollback_log_node_cache { +public: + void init (uint32_t max_num_avail_nodes); + void destroy(); + // returns true if rollback log node was successfully added, + // false otherwise + bool give_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_NODE log); + // if a rollback log node is available, will set log to it, + // otherwise, will set log to NULL and caller is on his own + // for getting a rollback log node + void get_rollback_log_node(TOKUTXN txn, ROLLBACK_LOG_NODE* log); + +private: + BLOCKNUM* m_avail_blocknums; + uint32_t m_first; + uint32_t m_num_avail; + uint32_t m_max_num_avail; + toku_mutex_t m_mutex; +}; + +ENSURE_POD(rollback_log_node_cache); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/roll.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/roll.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/roll.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/roll.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,634 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +/* rollback and rollforward routines. */ + + +#include + +#include "ft/ft.h" +#include "ft/ft-ops.h" +#include "ft/log_header.h" +#include "ft/logger/log-internal.h" +#include "ft/txn/xids.h" +#include "ft/txn/rollback-apply.h" + +// functionality provided by roll.c is exposed by an autogenerated +// header file, logheader.h +// +// this (poorly) explains the absense of "roll.h" + +// these flags control whether or not we send commit messages for +// various operations + +// When a transaction is committed, should we send a FT_COMMIT message +// for each FT_INSERT message sent earlier by the transaction? +#define TOKU_DO_COMMIT_CMD_INSERT 0 + +// When a transaction is committed, should we send a FT_COMMIT message +// for each FT_DELETE_ANY message sent earlier by the transaction? +#define TOKU_DO_COMMIT_CMD_DELETE 1 + +// When a transaction is committed, should we send a FT_COMMIT message +// for each FT_UPDATE message sent earlier by the transaction? +#define TOKU_DO_COMMIT_CMD_UPDATE 0 + +int +toku_commit_fdelete (FILENUM filenum, + TOKUTXN txn, + LSN UU(oplsn)) //oplsn is the lsn of the commit +{ + int r; + CACHEFILE cf; + CACHETABLE ct = txn->logger->ct; + + // Try to get the cachefile for this filenum. A missing file on recovery + // is not an error, but a missing file outside of recovery is. + r = toku_cachefile_of_filenum(ct, filenum, &cf); + if (r == ENOENT) { + assert(txn->for_recovery); + r = 0; + goto done; + } + assert_zero(r); + + // bug fix for #4718 + // bug was introduced in with fix for #3590 + // Before Maxwell (and fix for #3590), + // the recovery log was fsynced after the xcommit was loged but + // before we processed rollback entries and before we released + // the row locks (in the lock tree). Due to performance concerns, + // the fsync was moved to after the release of row locks, which comes + // after processing rollback entries. As a result, we may be unlinking a file + // here as part of a transactoin that may abort if we do not fsync the log. + // So, we fsync the log here. + if (txn->logger) { + toku_logger_fsync_if_lsn_not_fsynced(txn->logger, txn->do_fsync_lsn); + } + + // Mark the cachefile as unlink on close. There are two ways for close + // to be eventually called on the cachefile: + // + // - when this txn completes, it will release a reference on the + // ft and close it, UNLESS it was pinned by checkpoint + // - if the cf was pinned by checkpoint, an unpin will release the + // final reference and call close. it must be the final reference + // since this txn has exclusive access to dictionary (by the + // directory row lock for its dname) and we would not get this + // far if there were other live handles. + toku_cachefile_unlink_on_close(cf); +done: + return r; +} + +int +toku_rollback_fdelete (FILENUM UU(filenum), + TOKUTXN UU(txn), + LSN UU(oplsn)) //oplsn is the lsn of the abort +{ + //Rolling back an fdelete is an no-op. + return 0; +} + +int +toku_commit_fcreate (FILENUM UU(filenum), + BYTESTRING UU(bs_fname), + TOKUTXN UU(txn), + LSN UU(oplsn)) +{ + return 0; +} + +int +toku_rollback_fcreate (FILENUM filenum, + BYTESTRING UU(bs_fname), + TOKUTXN txn, + LSN UU(oplsn)) +{ + int r; + CACHEFILE cf; + CACHETABLE ct = txn->logger->ct; + + // Try to get the cachefile for this filenum. A missing file on recovery + // is not an error, but a missing file outside of recovery is. + r = toku_cachefile_of_filenum(ct, filenum, &cf); + if (r == ENOENT) { + r = 0; + goto done; + } + assert_zero(r); + + // Mark the cachefile as unlink on close. There are two ways for close + // to be eventually called on the cachefile: + // + // - when this txn completes, it will release a reference on the + // ft and close it, UNLESS it was pinned by checkpoint + // - if the cf was pinned by checkpoint, an unpin will release the + // final reference and call close. it must be the final reference + // since this txn has exclusive access to dictionary (by the + // directory row lock for its dname) and we would not get this + // far if there were other live handles. + toku_cachefile_unlink_on_close(cf); +done: + return 0; +} + +int find_ft_from_filenum (const FT &ft, const FILENUM &filenum); +int find_ft_from_filenum (const FT &ft, const FILENUM &filenum) { + FILENUM thisfnum = toku_cachefile_filenum(ft->cf); + if (thisfnum.fileidfilenum.fileid) return +1; + return 0; +} + +// Input arg reset_root_xid_that_created true means that this operation has changed the definition of this dictionary. +// (Example use is for schema change committed with txn that inserted cmdupdatebroadcast message.) +// The oplsn argument is ZERO_LSN for normal operation. When this function is called for recovery, it has the LSN of +// the operation (insert, delete, update, etc). +static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key, BYTESTRING *data, TOKUTXN txn, LSN oplsn, + bool reset_root_xid_that_created) { + int r = 0; + //printf("%s:%d committing insert %s %s\n", __FILE__, __LINE__, key.data, data.data); + FT ft = nullptr; + r = txn->open_fts.find_zero(filenum, &ft, NULL); + if (r == DB_NOTFOUND) { + assert(txn->for_recovery); + r = 0; + goto done; + } + assert(r==0); + + if (oplsn.lsn != 0) { // if we are executing the recovery algorithm + LSN treelsn = toku_ft_checkpoint_lsn(ft); + if (oplsn.lsn <= treelsn.lsn) { // if operation was already applied to tree ... + r = 0; // ... do not apply it again. + goto done; + } + } + + DBT key_dbt,data_dbt; + XIDS xids; + xids = toku_txn_get_xids(txn); + { + const DBT *kdbt = key.len > 0 ? toku_fill_dbt(&key_dbt, key.data, key.len) : + toku_init_dbt(&key_dbt); + const DBT *vdbt = data ? toku_fill_dbt(&data_dbt, data->data, data->len) : + toku_init_dbt(&data_dbt); + ft_msg msg(kdbt, vdbt, type, ZERO_MSN, xids); + + TXN_MANAGER txn_manager = toku_logger_get_txn_manager(txn->logger); + txn_manager_state txn_state_for_gc(txn_manager); + + TXNID oldest_referenced_xid_estimate = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + !txn->for_recovery); + toku_ft_root_put_msg(ft, msg, &gc_info); + if (reset_root_xid_that_created) { + TXNID new_root_xid_that_created = toku_xids_get_outermost_xid(xids); + toku_reset_root_xid_that_created(ft, new_root_xid_that_created); + } + } +done: + return r; +} + + +static int do_nothing_with_filenum(TOKUTXN UU(txn), FILENUM UU(filenum)) { + return 0; +} + + +int toku_commit_cmdinsert (FILENUM filenum, BYTESTRING UU(key), TOKUTXN txn, LSN UU(oplsn)) { +#if TOKU_DO_COMMIT_CMD_INSERT + return do_insertion (FT_COMMIT_ANY, filenum, key, 0, txn, oplsn, false); +#else + return do_nothing_with_filenum(txn, filenum); +#endif +} + +int +toku_rollback_cmdinsert (FILENUM filenum, + BYTESTRING key, + TOKUTXN txn, + LSN oplsn) +{ + return do_insertion (FT_ABORT_ANY, filenum, key, 0, txn, oplsn, false); +} + +int +toku_commit_cmdupdate(FILENUM filenum, + BYTESTRING UU(key), + TOKUTXN txn, + LSN UU(oplsn)) +{ +#if TOKU_DO_COMMIT_CMD_UPDATE + return do_insertion(FT_COMMIT_ANY, filenum, key, 0, txn, oplsn, false); +#else + return do_nothing_with_filenum(txn, filenum); +#endif +} + +int +toku_rollback_cmdupdate(FILENUM filenum, + BYTESTRING key, + TOKUTXN txn, + LSN oplsn) +{ + return do_insertion(FT_ABORT_ANY, filenum, key, 0, txn, oplsn, false); +} + +int +toku_commit_cmdupdatebroadcast(FILENUM filenum, + bool is_resetting_op, + TOKUTXN txn, + LSN oplsn) +{ + // if is_resetting_op, reset root_xid_that_created in + // relevant ft. + bool reset_root_xid_that_created = (is_resetting_op ? true : false); + const enum ft_msg_type msg_type = (is_resetting_op + ? FT_COMMIT_BROADCAST_ALL + : FT_COMMIT_BROADCAST_TXN); + BYTESTRING nullkey = { 0, NULL }; + return do_insertion(msg_type, filenum, nullkey, 0, txn, oplsn, reset_root_xid_that_created); +} + +int +toku_rollback_cmdupdatebroadcast(FILENUM filenum, + bool UU(is_resetting_op), + TOKUTXN txn, + LSN oplsn) +{ + BYTESTRING nullkey = { 0, NULL }; + return do_insertion(FT_ABORT_BROADCAST_TXN, filenum, nullkey, 0, txn, oplsn, false); +} + +int +toku_commit_cmddelete (FILENUM filenum, + BYTESTRING key, + TOKUTXN txn, + LSN oplsn) +{ +#if TOKU_DO_COMMIT_CMD_DELETE + return do_insertion (FT_COMMIT_ANY, filenum, key, 0, txn, oplsn, false); +#else + key = key; oplsn = oplsn; + return do_nothing_with_filenum(txn, filenum); +#endif +} + +int +toku_rollback_cmddelete (FILENUM filenum, + BYTESTRING key, + TOKUTXN txn, + LSN oplsn) +{ + return do_insertion (FT_ABORT_ANY, filenum, key, 0, txn, oplsn, false); +} + +static int +toku_apply_rollinclude (TXNID_PAIR xid, + uint64_t num_nodes, + BLOCKNUM spilled_head, + BLOCKNUM spilled_tail, + TOKUTXN txn, + LSN oplsn, + apply_rollback_item func) { + int r = 0; + struct roll_entry *item; + + BLOCKNUM next_log = spilled_tail; + uint64_t last_sequence = num_nodes; + + bool found_head = false; + assert(next_log.b != ROLLBACK_NONE.b); + while (next_log.b != ROLLBACK_NONE.b) { + //pin log + ROLLBACK_LOG_NODE log; + toku_get_and_pin_rollback_log(txn, next_log, &log); + toku_rollback_verify_contents(log, xid, last_sequence - 1); + last_sequence = log->sequence; + + toku_maybe_prefetch_previous_rollback_log(txn, log); + + while ((item=log->newest_logentry)) { + log->newest_logentry = item->prev; + r = func(txn, item, oplsn); + if (r!=0) return r; + } + if (next_log.b == spilled_head.b) { + assert(!found_head); + found_head = true; + assert(log->sequence == 0); + } + next_log = log->previous; + { + //Clean up transaction structure to prevent + //toku_txn_close from double-freeing + spilled_tail = next_log; + if (found_head) { + assert(next_log.b == ROLLBACK_NONE.b); + spilled_head = next_log; + } + } + toku_rollback_log_unpin_and_remove(txn, log); + } + return r; +} + +int +toku_commit_rollinclude (TXNID_PAIR xid, + uint64_t num_nodes, + BLOCKNUM spilled_head, + BLOCKNUM spilled_tail, + TOKUTXN txn, + LSN oplsn) { + int r; + r = toku_apply_rollinclude(xid, num_nodes, + spilled_head, + spilled_tail, + txn, oplsn, + toku_commit_rollback_item); + return r; +} + +int +toku_rollback_rollinclude (TXNID_PAIR xid, + uint64_t num_nodes, + BLOCKNUM spilled_head, + BLOCKNUM spilled_tail, + TOKUTXN txn, + LSN oplsn) { + int r; + r = toku_apply_rollinclude(xid, num_nodes, + spilled_head, + spilled_tail, + txn, oplsn, + toku_abort_rollback_item); + return r; +} + +int +toku_commit_load (FILENUM old_filenum, + BYTESTRING UU(new_iname), + TOKUTXN txn, + LSN UU(oplsn)) +{ + int r; + CACHEFILE old_cf; + CACHETABLE ct = txn->logger->ct; + + // To commit a dictionary load, we delete the old file + // + // Try to get the cachefile for the old filenum. A missing file on recovery + // is not an error, but a missing file outside of recovery is. + r = toku_cachefile_of_filenum(ct, old_filenum, &old_cf); + if (r == ENOENT) { + invariant(txn->for_recovery); + r = 0; + goto done; + } + lazy_assert(r == 0); + + // bug fix for #4718 + // bug was introduced in with fix for #3590 + // Before Maxwell (and fix for #3590), + // the recovery log was fsynced after the xcommit was loged but + // before we processed rollback entries and before we released + // the row locks (in the lock tree). Due to performance concerns, + // the fsync was moved to after the release of row locks, which comes + // after processing rollback entries. As a result, we may be unlinking a file + // here as part of a transactoin that may abort if we do not fsync the log. + // So, we fsync the log here. + if (txn->logger) { + toku_logger_fsync_if_lsn_not_fsynced(txn->logger, txn->do_fsync_lsn); + } + + // TODO: Zardosht + // Explain why this condition is valid, because I forget. + if (!toku_cachefile_is_unlink_on_close(old_cf)) { + toku_cachefile_unlink_on_close(old_cf); + } +done: + return r; +} + +int +toku_rollback_load (FILENUM UU(old_filenum), + BYTESTRING new_iname, + TOKUTXN txn, + LSN UU(oplsn)) +{ + int r; + CACHEFILE new_cf; + CACHETABLE ct = txn->logger->ct; + + // To rollback a dictionary load, we delete the new file. + // Try to get the cachefile for the new fname. + char *fname_in_env = fixup_fname(&new_iname); + r = toku_cachefile_of_iname_in_env(ct, fname_in_env, &new_cf); + if (r == ENOENT) { + // It's possible the new iname was never created, so just try to + // unlink it if it's there and ignore the error if it's not. + char *fname_in_cwd = toku_cachetable_get_fname_in_cwd(ct, fname_in_env); + r = unlink(fname_in_cwd); + assert(r == 0 || get_error_errno() == ENOENT); + toku_free(fname_in_cwd); + r = 0; + } else { + assert_zero(r); + toku_cachefile_unlink_on_close(new_cf); + } + toku_free(fname_in_env); + return r; +} + +//2954 +int +toku_commit_hot_index (FILENUMS UU(hot_index_filenums), + TOKUTXN UU(txn), + LSN UU(oplsn)) +{ + // nothing + return 0; +} + +int +toku_rollback_hot_index (FILENUMS UU(hot_index_filenums), + TOKUTXN UU(txn), + LSN UU(oplsn)) +{ + return 0; +} + +int +toku_commit_dictionary_redirect (FILENUM UU(old_filenum), + FILENUM UU(new_filenum), + TOKUTXN UU(txn), + LSN UU(oplsn)) //oplsn is the lsn of the commit +{ + //Redirect only has meaning during normal operation (NOT during recovery). + if (!txn->for_recovery) { + //NO-OP + } + return 0; +} + +int +toku_rollback_dictionary_redirect (FILENUM old_filenum, + FILENUM new_filenum, + TOKUTXN txn, + LSN UU(oplsn)) //oplsn is the lsn of the abort +{ + int r = 0; + //Redirect only has meaning during normal operation (NOT during recovery). + if (!txn->for_recovery) { + CACHEFILE new_cf = NULL; + r = toku_cachefile_of_filenum(txn->logger->ct, new_filenum, &new_cf); + assert(r == 0); + FT CAST_FROM_VOIDP(new_ft, toku_cachefile_get_userdata(new_cf)); + + CACHEFILE old_cf = NULL; + r = toku_cachefile_of_filenum(txn->logger->ct, old_filenum, &old_cf); + assert(r == 0); + FT CAST_FROM_VOIDP(old_ft, toku_cachefile_get_userdata(old_cf)); + + //Redirect back from new to old. + r = toku_dictionary_redirect_abort(old_ft, new_ft, txn); + assert(r==0); + } + return r; +} + +int +toku_commit_change_fdescriptor(FILENUM filenum, + BYTESTRING UU(old_descriptor), + TOKUTXN txn, + LSN UU(oplsn)) +{ + return do_nothing_with_filenum(txn, filenum); +} + +int +toku_rollback_change_fdescriptor(FILENUM filenum, + BYTESTRING old_descriptor, + TOKUTXN txn, + LSN UU(oplsn)) +{ + CACHEFILE cf; + int r; + r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf); + if (r == ENOENT) { //Missing file on recovered transaction is not an error + assert(txn->for_recovery); + r = 0; + goto done; + } + // file must be open, because the txn that created it opened it and + // noted it, + assert(r == 0); + + FT ft; + ft = NULL; + r = txn->open_fts.find_zero(filenum, &ft, NULL); + assert(r == 0); + + DESCRIPTOR_S d; + toku_fill_dbt(&d.dbt, old_descriptor.data, old_descriptor.len); + toku_ft_update_descriptor(ft, &d); +done: + return r; +} + + + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,816 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + + +#include + +#include "ft/cachetable/checkpoint.h" +#include "ft/ft.h" +#include "ft/logger/log-internal.h" +#include "ft/ule.h" +#include "ft/txn/rollback-apply.h" +#include "ft/txn/txn.h" +#include "ft/txn/txn_manager.h" +#include "util/status.h" + +/////////////////////////////////////////////////////////////////////////////////// +// Engine status +// +// Status is intended for display to humans to help understand system behavior. +// It does not need to be perfectly thread-safe. + +static TXN_STATUS_S txn_status; + +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(txn_status, k, c, t, "txn: " l, inc) + +void +txn_status_init(void) { + // Note, this function initializes the keyname, type, and legend fields. + // Value fields are initialized to zero by compiler. + STATUS_INIT(TXN_BEGIN, TXN_BEGIN, PARCOUNT, "begin", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(TXN_READ_BEGIN, TXN_BEGIN_READ_ONLY, PARCOUNT, "begin read only", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(TXN_COMMIT, TXN_COMMITS, PARCOUNT, "successful commits", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(TXN_ABORT, TXN_ABORTS, PARCOUNT, "aborts", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + txn_status.initialized = true; +} + +void txn_status_destroy(void) { + for (int i = 0; i < TXN_STATUS_NUM_ROWS; ++i) { + if (txn_status.status[i].type == PARCOUNT) { + destroy_partitioned_counter(txn_status.status[i].value.parcount); + } + } +} + +#undef STATUS_INIT + +#define STATUS_INC(x, d) increment_partitioned_counter(txn_status.status[x].value.parcount, d) + +void +toku_txn_get_status(TXN_STATUS s) { + *s = txn_status; +} + +void +toku_txn_lock(TOKUTXN txn) +{ + toku_mutex_lock(&txn->txn_lock); +} + +void +toku_txn_unlock(TOKUTXN txn) +{ + toku_mutex_unlock(&txn->txn_lock); +} + +uint64_t +toku_txn_get_root_id(TOKUTXN txn) +{ + return txn->txnid.parent_id64; +} + +bool txn_declared_read_only(TOKUTXN txn) { + return txn->declared_read_only; +} + +int +toku_txn_begin_txn ( + DB_TXN *container_db_txn, + TOKUTXN parent_tokutxn, + TOKUTXN *tokutxn, + TOKULOGGER logger, + TXN_SNAPSHOT_TYPE snapshot_type, + bool read_only + ) +{ + int r = toku_txn_begin_with_xid( + parent_tokutxn, + tokutxn, + logger, + TXNID_PAIR_NONE, + snapshot_type, + container_db_txn, + false, // for_recovery + read_only + ); + return r; +} + + +static void +txn_create_xids(TOKUTXN txn, TOKUTXN parent) { + XIDS xids; + XIDS parent_xids; + if (parent == NULL) { + parent_xids = toku_xids_get_root_xids(); + } else { + parent_xids = parent->xids; + } + toku_xids_create_unknown_child(parent_xids, &xids); + TXNID finalized_xid = (parent == NULL) ? txn->txnid.parent_id64 : txn->txnid.child_id64; + toku_xids_finalize_with_child(xids, finalized_xid); + txn->xids = xids; +} + +// Allocate and initialize a txn +static void toku_txn_create_txn(TOKUTXN *txn_ptr, TOKUTXN parent, TOKULOGGER logger, TXN_SNAPSHOT_TYPE snapshot_type, DB_TXN *container_db_txn, bool for_checkpoint, bool read_only); + +int +toku_txn_begin_with_xid ( + TOKUTXN parent, + TOKUTXN *txnp, + TOKULOGGER logger, + TXNID_PAIR xid, + TXN_SNAPSHOT_TYPE snapshot_type, + DB_TXN *container_db_txn, + bool for_recovery, + bool read_only + ) +{ + int r = 0; + TOKUTXN txn; + // check for case where we are trying to + // create too many nested transactions + if (!read_only && parent && !toku_xids_can_create_child(parent->xids)) { + r = EINVAL; + goto exit; + } + if (read_only && parent) { + invariant(txn_declared_read_only(parent)); + } + toku_txn_create_txn(&txn, parent, logger, snapshot_type, container_db_txn, for_recovery, read_only); + // txnid64, snapshot_txnid64 + // will be set in here. + if (for_recovery) { + if (parent == NULL) { + invariant(xid.child_id64 == TXNID_NONE); + toku_txn_manager_start_txn_for_recovery( + txn, + logger->txn_manager, + xid.parent_id64 + ); + } + else { + parent->child_manager->start_child_txn_for_recovery(txn, parent, xid); + } + } + else { + assert(xid.parent_id64 == TXNID_NONE); + assert(xid.child_id64 == TXNID_NONE); + if (parent == NULL) { + toku_txn_manager_start_txn( + txn, + logger->txn_manager, + snapshot_type, + read_only + ); + } + else { + parent->child_manager->start_child_txn(txn, parent); + toku_txn_manager_handle_snapshot_create_for_child_txn( + txn, + logger->txn_manager, + snapshot_type + ); + } + } + if (!read_only) { + // this call will set txn->xids + txn_create_xids(txn, parent); + } + *txnp = txn; +exit: + return r; +} + +DB_TXN * +toku_txn_get_container_db_txn (TOKUTXN tokutxn) { + DB_TXN * container = tokutxn->container_db_txn; + return container; +} + +void toku_txn_set_container_db_txn (TOKUTXN tokutxn, DB_TXN*container) { + tokutxn->container_db_txn = container; +} + +static void invalidate_xa_xid (TOKU_XA_XID *xid) { + TOKU_ANNOTATE_NEW_MEMORY(xid, sizeof(*xid)); // consider it to be all invalid for valgrind + xid->formatID = -1; // According to the XA spec, -1 means "invalid data" +} + +static void toku_txn_create_txn ( + TOKUTXN *tokutxn, + TOKUTXN parent_tokutxn, + TOKULOGGER logger, + TXN_SNAPSHOT_TYPE snapshot_type, + DB_TXN *container_db_txn, + bool for_recovery, + bool read_only + ) +{ + assert(logger->rollback_cachefile); + + omt open_fts; + open_fts.create_no_array(); + + struct txn_roll_info roll_info = { + .num_rollback_nodes = 0, + .num_rollentries = 0, + .num_rollentries_processed = 0, + .rollentry_raw_count = 0, + .spilled_rollback_head = ROLLBACK_NONE, + .spilled_rollback_tail = ROLLBACK_NONE, + .current_rollback = ROLLBACK_NONE, + }; + +static txn_child_manager tcm; + + struct tokutxn new_txn = { + .txnid = {.parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }, + .snapshot_txnid64 = TXNID_NONE, + .snapshot_type = for_recovery ? TXN_SNAPSHOT_NONE : snapshot_type, + .for_recovery = for_recovery, + .logger = logger, + .parent = parent_tokutxn, + .child = NULL, + .child_manager_s = tcm, + .child_manager = NULL, + .container_db_txn = container_db_txn, + .live_root_txn_list = nullptr, + .xids = NULL, + .snapshot_next = NULL, + .snapshot_prev = NULL, + .begin_was_logged = false, + .declared_read_only = read_only, + .do_fsync = false, + .force_fsync_on_commit = false, + .do_fsync_lsn = ZERO_LSN, + .xa_xid = {0}, + .progress_poll_fun = NULL, + .progress_poll_fun_extra = NULL, + .txn_lock = ZERO_MUTEX_INITIALIZER, + .open_fts = open_fts, + .roll_info = roll_info, + .state_lock = ZERO_MUTEX_INITIALIZER, + .state_cond = ZERO_COND_INITIALIZER, + .state = TOKUTXN_LIVE, + .num_pin = 0, + .client_id = 0, + }; + + TOKUTXN result = NULL; + XMEMDUP(result, &new_txn); + invalidate_xa_xid(&result->xa_xid); + if (parent_tokutxn == NULL) { + result->child_manager = &result->child_manager_s; + result->child_manager->init(result); + } + else { + result->child_manager = parent_tokutxn->child_manager; + } + + toku_mutex_init(&result->txn_lock, nullptr); + + toku_pthread_mutexattr_t attr; + toku_mutexattr_init(&attr); + toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE); + toku_mutex_init(&result->state_lock, &attr); + toku_mutexattr_destroy(&attr); + + toku_cond_init(&result->state_cond, nullptr); + + *tokutxn = result; + + if (read_only) { + STATUS_INC(TXN_READ_BEGIN, 1); + } + else { + STATUS_INC(TXN_BEGIN, 1); + } +} + +void +toku_txn_update_xids_in_txn(TOKUTXN txn, TXNID xid) +{ + // these should not have been set yet + invariant(txn->txnid.parent_id64 == TXNID_NONE); + invariant(txn->txnid.child_id64 == TXNID_NONE); + txn->txnid.parent_id64 = xid; + txn->txnid.child_id64 = TXNID_NONE; +} + +//Used on recovery to recover a transaction. +int +toku_txn_load_txninfo (TOKUTXN txn, struct txninfo *info) { + txn->roll_info.rollentry_raw_count = info->rollentry_raw_count; + uint32_t i; + for (i = 0; i < info->num_fts; i++) { + FT ft = info->open_fts[i]; + toku_txn_maybe_note_ft(txn, ft); + } + txn->force_fsync_on_commit = info->force_fsync_on_commit; + txn->roll_info.num_rollback_nodes = info->num_rollback_nodes; + txn->roll_info.num_rollentries = info->num_rollentries; + + txn->roll_info.spilled_rollback_head = info->spilled_rollback_head; + txn->roll_info.spilled_rollback_tail = info->spilled_rollback_tail; + txn->roll_info.current_rollback = info->current_rollback; + return 0; +} + +int toku_txn_commit_txn(TOKUTXN txn, int nosync, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) +// Effect: Doesn't close the txn, just performs the commit operations. +// If release_multi_operation_client_lock is true, then unlock that lock (even if an error path is taken) +{ + return toku_txn_commit_with_lsn(txn, nosync, ZERO_LSN, + poll, poll_extra); +} + +struct xcommit_info { + int r; + TOKUTXN txn; +}; + +static void txn_note_commit(TOKUTXN txn) { + // Purpose: + // Delay until any indexer is done pinning this transaction. + // Update status of a transaction from live->committing (or prepared->committing) + // Do so in a thread-safe manner that does not conflict with hot indexing or + // begin checkpoint. + if (toku_txn_is_read_only(txn)) { + // Neither hot indexing nor checkpoint do any work with readonly txns, + // so we can skip taking the txn_manager lock here. + invariant(txn->state==TOKUTXN_LIVE); + txn->state = TOKUTXN_COMMITTING; + goto done; + } + if (txn->state==TOKUTXN_PREPARING) { + invalidate_xa_xid(&txn->xa_xid); + } + // for hot indexing, if hot index is processing + // this transaction in some leafentry, then we cannot change + // the state to commit or abort until + // hot index is done with that leafentry + toku_txn_lock_state(txn); + while (txn->num_pin > 0) { + toku_cond_wait( + &txn->state_cond, + &txn->state_lock + ); + } + txn->state = TOKUTXN_COMMITTING; + toku_txn_unlock_state(txn); +done: + return; +} + +int toku_txn_commit_with_lsn(TOKUTXN txn, int nosync, LSN oplsn, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) +{ + // there should be no child when we commit or abort a TOKUTXN + invariant(txn->child == NULL); + txn_note_commit(txn); + + // Child transactions do not actually 'commit'. They promote their + // changes to parent, so no need to fsync if this txn has a parent. The + // do_sync state is captured in the txn for txn_maybe_fsync_log function + // Additionally, if the transaction was first prepared, we do not need to + // fsync because the prepare caused an fsync of the log. In this case, + // we do not need an additional of the log. We rely on the client running + // recovery to properly recommit this transaction if the commit + // does not make it to disk. In the case of MySQL, that would be the + // binary log. + txn->do_fsync = !txn->parent && (txn->force_fsync_on_commit || (!nosync && txn->roll_info.num_rollentries>0)); + + txn->progress_poll_fun = poll; + txn->progress_poll_fun_extra = poll_extra; + + if (!toku_txn_is_read_only(txn)) { + toku_log_xcommit(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid); + } + // If !txn->begin_was_logged, we could skip toku_rollback_commit + // but it's cheap (only a number of function calls that return immediately) + // since there were no writes. Skipping it would mean we would need to be careful + // in case we added any additional required cleanup into those functions in the future. + int r = toku_rollback_commit(txn, oplsn); + STATUS_INC(TXN_COMMIT, 1); + return r; +} + +int toku_txn_abort_txn(TOKUTXN txn, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) +// Effect: Doesn't close the txn, just performs the abort operations. +// If release_multi_operation_client_lock is true, then unlock that lock (even if an error path is taken) +{ + return toku_txn_abort_with_lsn(txn, ZERO_LSN, poll, poll_extra); +} + +static void txn_note_abort(TOKUTXN txn) { + // Purpose: + // Delay until any indexer is done pinning this transaction. + // Update status of a transaction from live->aborting (or prepared->aborting) + // Do so in a thread-safe manner that does not conflict with hot indexing or + // begin checkpoint. + if (toku_txn_is_read_only(txn)) { + // Neither hot indexing nor checkpoint do any work with readonly txns, + // so we can skip taking the state lock here. + invariant(txn->state==TOKUTXN_LIVE); + txn->state = TOKUTXN_ABORTING; + goto done; + } + if (txn->state==TOKUTXN_PREPARING) { + invalidate_xa_xid(&txn->xa_xid); + } + // for hot indexing, if hot index is processing + // this transaction in some leafentry, then we cannot change + // the state to commit or abort until + // hot index is done with that leafentry + toku_txn_lock_state(txn); + while (txn->num_pin > 0) { + toku_cond_wait( + &txn->state_cond, + &txn->state_lock + ); + } + txn->state = TOKUTXN_ABORTING; + toku_txn_unlock_state(txn); +done: + return; +} + +int toku_txn_abort_with_lsn(TOKUTXN txn, LSN oplsn, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) +{ + // there should be no child when we commit or abort a TOKUTXN + invariant(txn->child == NULL); + txn_note_abort(txn); + + txn->progress_poll_fun = poll; + txn->progress_poll_fun_extra = poll_extra; + txn->do_fsync = false; + + if (!toku_txn_is_read_only(txn)) { + toku_log_xabort(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid); + } + // If !txn->begin_was_logged, we could skip toku_rollback_abort + // but it's cheap (only a number of function calls that return immediately) + // since there were no writes. Skipping it would mean we would need to be careful + // in case we added any additional required cleanup into those functions in the future. + int r = toku_rollback_abort(txn, oplsn); + STATUS_INC(TXN_ABORT, 1); + return r; +} + +static void copy_xid (TOKU_XA_XID *dest, TOKU_XA_XID *source) { + TOKU_ANNOTATE_NEW_MEMORY(dest, sizeof(*dest)); + dest->formatID = source->formatID; + dest->gtrid_length = source->gtrid_length; + dest->bqual_length = source->bqual_length; + memcpy(dest->data, source->data, source->gtrid_length+source->bqual_length); +} + +void toku_txn_prepare_txn (TOKUTXN txn, TOKU_XA_XID *xa_xid) { + if (txn->parent || toku_txn_is_read_only(txn)) { + // We do not prepare children. + // + // Readonly transactions do the same if they commit or abort, so + // XA guarantees are free. No need to pay for overhead of prepare. + return; + } + assert(txn->state==TOKUTXN_LIVE); + // This state transition must be protected against begin_checkpoint + // Therefore, the caller must have the mo lock held + toku_txn_lock_state(txn); + txn->state = TOKUTXN_PREPARING; + toku_txn_unlock_state(txn); + // Do we need to do an fsync? + txn->do_fsync = (txn->force_fsync_on_commit || txn->roll_info.num_rollentries>0); + copy_xid(&txn->xa_xid, xa_xid); + // This list will go away with #4683, so we wn't need the ydb lock for this anymore. + toku_log_xprepare(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid, xa_xid); +} + +void toku_txn_get_prepared_xa_xid (TOKUTXN txn, TOKU_XA_XID *xid) { + copy_xid(xid, &txn->xa_xid); +} + +int toku_logger_recover_txn (TOKULOGGER logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags) { + return toku_txn_manager_recover_root_txn( + logger->txn_manager, + preplist, + count, + retp, + flags + ); +} + +void toku_txn_maybe_fsync_log(TOKULOGGER logger, LSN do_fsync_lsn, bool do_fsync) { + if (logger && do_fsync) { + toku_logger_fsync_if_lsn_not_fsynced(logger, do_fsync_lsn); + } +} + +void toku_txn_get_fsync_info(TOKUTXN ttxn, bool* do_fsync, LSN* do_fsync_lsn) { + *do_fsync = ttxn->do_fsync; + *do_fsync_lsn = ttxn->do_fsync_lsn; +} + +void toku_txn_close_txn(TOKUTXN txn) { + toku_txn_complete_txn(txn); + toku_txn_destroy_txn(txn); +} + +int remove_txn (const FT &h, const uint32_t UU(idx), TOKUTXN const txn); +int remove_txn (const FT &h, const uint32_t UU(idx), TOKUTXN const UU(txn)) +// Effect: This function is called on every open FT that a transaction used. +// This function removes the transaction from that FT. +{ + toku_ft_remove_txn_ref(h); + + return 0; +} + +// for every ft in txn, remove it. +static void note_txn_closing (TOKUTXN txn) { + txn->open_fts.iterate(txn); +} + +void toku_txn_complete_txn(TOKUTXN txn) { + assert(txn->roll_info.spilled_rollback_head.b == ROLLBACK_NONE.b); + assert(txn->roll_info.spilled_rollback_tail.b == ROLLBACK_NONE.b); + assert(txn->roll_info.current_rollback.b == ROLLBACK_NONE.b); + assert(txn->num_pin == 0); + assert(txn->state == TOKUTXN_COMMITTING || txn->state == TOKUTXN_ABORTING || txn->state == TOKUTXN_PREPARING); + if (txn->parent) { + toku_txn_manager_handle_snapshot_destroy_for_child_txn( + txn, + txn->logger->txn_manager, + txn->snapshot_type + ); + txn->parent->child_manager->finish_child_txn(txn); + } + else { + toku_txn_manager_finish_txn(txn->logger->txn_manager, txn); + txn->child_manager->destroy(); + } + // note that here is another place we depend on + // this function being called with the multi operation lock + note_txn_closing(txn); +} + +void toku_txn_destroy_txn(TOKUTXN txn) { + txn->open_fts.destroy(); + if (txn->xids) { + toku_xids_destroy(&txn->xids); + } + toku_mutex_destroy(&txn->txn_lock); + toku_mutex_destroy(&txn->state_lock); + toku_cond_destroy(&txn->state_cond); + toku_free(txn); +} + +XIDS toku_txn_get_xids (TOKUTXN txn) { + if (txn==0) return toku_xids_get_root_xids(); + else return txn->xids; +} + +void toku_txn_force_fsync_on_commit(TOKUTXN txn) { + txn->force_fsync_on_commit = true; +} + +TXNID toku_get_oldest_in_live_root_txn_list(TOKUTXN txn) { + TXNID xid; + if (txn->live_root_txn_list->size()>0) { + int r = txn->live_root_txn_list->fetch(0, &xid); + assert_zero(r); + } + else { + xid = TXNID_NONE; + } + return xid; +} + +bool toku_is_txn_in_live_root_txn_list(const xid_omt_t &live_root_txn_list, TXNID xid) { + TXNID txnid; + bool retval = false; + int r = live_root_txn_list.find_zero(xid, &txnid, nullptr); + if (r==0) { + invariant(txnid == xid); + retval = true; + } + else { + invariant(r==DB_NOTFOUND); + } + return retval; +} + +TOKUTXN_STATE +toku_txn_get_state(TOKUTXN txn) { + return txn->state; +} + +static void +maybe_log_begin_txn_for_write_operation_unlocked(TOKUTXN txn) { + // We now hold the lock. + if (txn->begin_was_logged) { + return; + } + TOKUTXN parent; + parent = txn->parent; + TXNID_PAIR xid; + xid = txn->txnid; + TXNID_PAIR pxid; + pxid = TXNID_PAIR_NONE; + if (parent) { + // Recursively log parent first if necessary. + // Transactions cannot do work if they have children, + // so the lowest level child's lock is sufficient for ancestors. + maybe_log_begin_txn_for_write_operation_unlocked(parent); + pxid = parent->txnid; + } + + toku_log_xbegin(txn->logger, NULL, 0, xid, pxid); + txn->begin_was_logged = true; +} + +void +toku_maybe_log_begin_txn_for_write_operation(TOKUTXN txn) { + toku_txn_lock(txn); + maybe_log_begin_txn_for_write_operation_unlocked(txn); + toku_txn_unlock(txn); +} + +bool +toku_txn_is_read_only(TOKUTXN txn) { + // No need to recursively check children because parents are + // recursively logged before children. + if (!txn->begin_was_logged) { + // Did no work. + invariant(txn->roll_info.num_rollentries == 0); + invariant(txn->do_fsync_lsn.lsn == ZERO_LSN.lsn); + invariant(txn->open_fts.size() == 0); + invariant(txn->num_pin==0); + return true; + } + return false; +} + +// needed for hot indexing +void toku_txn_lock_state(TOKUTXN txn) { + toku_mutex_lock(&txn->state_lock); +} +void toku_txn_unlock_state(TOKUTXN txn){ + toku_mutex_unlock(&txn->state_lock); +} + + +// prevents a client thread from transitioning txn from LIVE|PREPARING -> COMMITTING|ABORTING +// hot indexing may need a transactions to stay in the LIVE|PREPARING state while it processes +// a leafentry. +void toku_txn_pin_live_txn_unlocked(TOKUTXN txn) { + assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING); + assert(!toku_txn_is_read_only(txn)); + txn->num_pin++; +} + +// allows a client thread to go back to being able to transition txn +// from LIVE|PREPARING -> COMMITTING|ABORTING +void toku_txn_unpin_live_txn(TOKUTXN txn) { + assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING); + assert(txn->num_pin > 0); + toku_txn_lock_state(txn); + txn->num_pin--; + if (txn->num_pin == 0) { + toku_cond_broadcast(&txn->state_cond); + } + toku_txn_unlock_state(txn); +} + +bool toku_txn_has_spilled_rollback(TOKUTXN txn) { + return txn_has_spilled_rollback_logs(txn); +} + +uint64_t toku_txn_get_client_id(TOKUTXN txn) { + return txn->client_id; +} + +void toku_txn_set_client_id(TOKUTXN txn, uint64_t client_id) { + txn->client_id = client_id; +} + +int toku_txn_reads_txnid(TXNID txnid, TOKUTXN txn) { + int r = 0; + TXNID oldest_live_in_snapshot = toku_get_oldest_in_live_root_txn_list(txn); + if (oldest_live_in_snapshot == TXNID_NONE && txnid < txn->snapshot_txnid64) { + r = TOKUDB_ACCEPT; + } else if (txnid < oldest_live_in_snapshot || txnid == txn->txnid.parent_id64) { + r = TOKUDB_ACCEPT; + } else if (txnid > txn->snapshot_txnid64 || toku_is_txn_in_live_root_txn_list(*txn->live_root_txn_list, txnid)) { + r = 0; + } else { + r = TOKUDB_ACCEPT; + } + return r; +} + +int toku_txn_discard_txn(TOKUTXN txn) { + int r = toku_rollback_discard(txn); + return r; +} + +#include +void __attribute__((__constructor__)) toku_txn_status_helgrind_ignore(void); +void toku_txn_status_helgrind_ignore(void) { + TOKU_VALGRIND_HG_DISABLE_CHECKING(&txn_status, sizeof txn_status); +} + +#undef STATUS_VALUE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn_child_manager.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn_child_manager.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn_child_manager.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn_child_manager.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,196 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id: rollback.cc 49033 2012-10-17 18:48:30Z zardosht $" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "ft/logger/log-internal.h" +#include "ft/txn/txn_child_manager.h" + +// +// initialized a txn_child_manager, +// when called, root->txnid.parent_id64 may not yet be set +// +void txn_child_manager::init(TOKUTXN root) { + invariant(root->txnid.child_id64 == TXNID_NONE); + invariant(root->parent == NULL); + m_root = root; + m_last_xid = TXNID_NONE; + ZERO_STRUCT(m_mutex); + + toku_pthread_mutexattr_t attr; + toku_mutexattr_init(&attr); + toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE); + toku_mutex_init(&m_mutex, &attr); + toku_mutexattr_destroy(&attr); +} + +void txn_child_manager::destroy() { + toku_mutex_destroy(&m_mutex); +} + +void txn_child_manager::start_child_txn_for_recovery(TOKUTXN child, TOKUTXN parent, TXNID_PAIR txnid) { + invariant(parent->txnid.parent_id64 == m_root->txnid.parent_id64); + invariant(txnid.parent_id64 == m_root->txnid.parent_id64); + + child->txnid = txnid; + toku_mutex_lock(&m_mutex); + if (txnid.child_id64 > m_last_xid) { + m_last_xid = txnid.child_id64; + } + parent->child = child; + toku_mutex_unlock(&m_mutex); +} + +void txn_child_manager::start_child_txn(TOKUTXN child, TOKUTXN parent) { + invariant(parent->txnid.parent_id64 == m_root->txnid.parent_id64); + child->txnid.parent_id64 = m_root->txnid.parent_id64; + toku_mutex_lock(&m_mutex); + + ++m_last_xid; + // Here we ensure that the child_id64 is never equal to the parent_id64 + // We do this to make this feature work more easily with the XIDs + // struct and message application. The XIDs struct stores the parent id + // as the first TXNID, and subsequent TXNIDs store child ids. So, if we + // have a case where the parent id is the same as the child id, we will + // have to do some tricky maneuvering in the message application code + // in ule.cc. So, to lessen the probability of bugs, we ensure that the + // parent id is not the same as the child id. + if (m_last_xid == m_root->txnid.parent_id64) { + ++m_last_xid; + } + child->txnid.child_id64 = m_last_xid; + + parent->child = child; + toku_mutex_unlock(&m_mutex); +} + +void txn_child_manager::finish_child_txn(TOKUTXN child) { + invariant(child->txnid.parent_id64 == m_root->txnid.parent_id64); + toku_mutex_lock(&m_mutex); + child->parent->child = NULL; + toku_mutex_unlock(&m_mutex); +} + +void txn_child_manager::suspend() { + toku_mutex_lock(&m_mutex); +} + +void txn_child_manager::resume() { + toku_mutex_unlock(&m_mutex); +} + +void txn_child_manager::find_tokutxn_by_xid_unlocked(TXNID_PAIR xid, TOKUTXN* result) { + invariant(xid.parent_id64 == m_root->txnid.parent_id64); + TOKUTXN curr_txn = m_root; + while (curr_txn != NULL) { + if (xid.child_id64 == curr_txn->txnid.child_id64) { + *result = curr_txn; + break; + } + curr_txn = curr_txn->child; + } +} + +int txn_child_manager::iterate(txn_mgr_iter_callback cb, void* extra) { + TOKUTXN curr_txn = m_root; + int ret = 0; + toku_mutex_lock(&m_mutex); + while (curr_txn != NULL) { + ret = cb(curr_txn, extra); + if (ret != 0) { + break; + } + curr_txn = curr_txn->child; + } + toku_mutex_unlock(&m_mutex); + return ret; +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn_child_manager.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn_child_manager.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn_child_manager.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn_child_manager.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,120 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id: txn/rollback.h 49033 2012-10-17 18:48:30Z zardosht $" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +// We should be including ft/txn/txn.h here but that header includes this one, +// so we don't. +#include "portability/toku_pthread.h" + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +class txn_child_manager { +public: + void init (TOKUTXN root); + void destroy(); + void start_child_txn_for_recovery(TOKUTXN child, TOKUTXN parent, TXNID_PAIR txnid); + void start_child_txn(TOKUTXN child, TOKUTXN parent); + void finish_child_txn(TOKUTXN child); + void suspend(); + void resume(); + void find_tokutxn_by_xid_unlocked(TXNID_PAIR xid, TOKUTXN* result); + int iterate(int (*cb)(TOKUTXN txn, void *extra), void* extra); + +private: + TXNID m_last_xid; + TOKUTXN m_root; + toku_mutex_t m_mutex; + + friend class txn_child_manager_unit_test; +}; + + +ENSURE_POD(txn_child_manager); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,435 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "portability/toku_stdint.h" + +#include "ft/txn/txn_state.h" +#include "ft/serialize/block_table.h" +#include "util/omt.h" + +typedef uint64_t TXNID; + +typedef struct tokutxn *TOKUTXN; + +#define TXNID_NONE_LIVING ((TXNID)0) +#define TXNID_NONE ((TXNID)0) +#define TXNID_MAX ((TXNID)-1) + +typedef struct txnid_pair_s { + TXNID parent_id64; + TXNID child_id64; +} TXNID_PAIR; + +static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }; + +// We include the child manager here beacuse it uses the TXNID / TOKUTXN types +#include "ft/txn/txn_child_manager.h" + +/* Log Sequence Number (LSN) + * Make the LSN be a struct instead of an integer so that we get better type checking. */ +typedef struct __toku_lsn { uint64_t lsn; } LSN; +static const LSN ZERO_LSN = { .lsn = 0 }; +static const LSN MAX_LSN = { .lsn = UINT64_MAX }; + +// +// Types of snapshots that can be taken by a tokutxn +// - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads. +// used for SERIALIZABLE and READ UNCOMMITTED +// - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot +// used for REPEATABLE READ +// - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot +// used for READ COMMITTED +// + +typedef enum __TXN_SNAPSHOT_TYPE { + TXN_SNAPSHOT_NONE=0, + TXN_SNAPSHOT_ROOT=1, + TXN_SNAPSHOT_CHILD=2 +} TXN_SNAPSHOT_TYPE; + +typedef toku::omt txn_omt_t; +typedef toku::omt xid_omt_t; +typedef toku::omt rx_omt_t; + +inline bool txn_pair_is_none(TXNID_PAIR txnid) { + return txnid.parent_id64 == TXNID_NONE && txnid.child_id64 == TXNID_NONE; +} + +inline bool txn_needs_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, struct tokutxn *parent) { + // we need a snapshot if the snapshot type is a child or + // if the snapshot type is root and we have no parent. + // Cases that we don't need a snapshot: when snapshot type is NONE + // or when it is ROOT and we have a parent + return (snapshot_type != TXN_SNAPSHOT_NONE && (parent==NULL || snapshot_type == TXN_SNAPSHOT_CHILD)); +} + +struct tokulogger; + +struct txn_roll_info { + // these are number of rollback nodes and rollback entries for this txn. + // + // the current rollback node below has sequence number num_rollback_nodes - 1 + // (because they are numbered 0...num-1). often, the current rollback is + // already set to this block num, which means it exists and is available to + // log some entries. if the current rollback is NONE and the number of + // rollback nodes for this transaction is non-zero, then we will use + // the number of rollback nodes to know which sequence number to assign + // to a new one we create + uint64_t num_rollback_nodes; + uint64_t num_rollentries; + uint64_t num_rollentries_processed; + uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children. + + // spilled rollback nodes are rollback nodes that were gorged by this + // transaction, retired, and saved in a list. + + // the spilled rollback head is the block number of the first rollback node + // that makes up the rollback log chain + BLOCKNUM spilled_rollback_head; + + // the spilled rollback is the block number of the last rollback node that + // makes up the rollback log chain. + BLOCKNUM spilled_rollback_tail; + + // the current rollback node block number we may use. if this is ROLLBACK_NONE, + // then we need to create one and set it here before using it. + BLOCKNUM current_rollback; +}; + +struct tokutxn { + // These don't change after create: + + TXNID_PAIR txnid; + + uint64_t snapshot_txnid64; // this is the lsn of the snapshot + const TXN_SNAPSHOT_TYPE snapshot_type; + const bool for_recovery; + struct tokulogger *const logger; + struct tokutxn *const parent; + // The child txn is protected by the child_txn_manager lock + // and by the user contract. The user contract states (and is + // enforced at the ydb layer) that a child txn should not be created + // while another child exists. The txn_child_manager will protect + // other threads from trying to read this value while another + // thread commits/aborts the child + struct tokutxn *child; + + // statically allocated child manager, if this + // txn is a root txn, this manager will be used and set to + // child_manager for this transaction and all of its children + txn_child_manager child_manager_s; + + // child manager for this transaction, all of its children, + // and all of its ancestors + txn_child_manager* child_manager; + + // These don't change but they're created in a way that's hard to make + // strictly const. + DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn + xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started. + struct XIDS_S *xids; // Represents the xid list + + struct tokutxn *snapshot_next; + struct tokutxn *snapshot_prev; + + bool begin_was_logged; + bool declared_read_only; // true if the txn was declared read only when began + + // These are not read until a commit, prepare, or abort starts, and + // they're "monotonic" (only go false->true) during operation: + bool do_fsync; + bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn) + + // Not used until commit, prepare, or abort starts: + LSN do_fsync_lsn; + TOKU_XA_XID xa_xid; // for prepared transactions + TXN_PROGRESS_POLL_FUNCTION progress_poll_fun; + void *progress_poll_fun_extra; + + toku_mutex_t txn_lock; + // Protected by the txn lock: + toku::omt open_fts; // a collection of the fts that we touched. Indexed by filenum. + struct txn_roll_info roll_info; // Info used to manage rollback entries + + // mutex that protects the transition of the state variable + // the rest of the variables are used by the txn code and + // hot indexing to ensure that when hot indexing is processing a + // leafentry, a TOKUTXN cannot dissappear or change state out from + // underneath it + toku_mutex_t state_lock; + toku_cond_t state_cond; + TOKUTXN_STATE state; + uint32_t num_pin; // number of threads (all hot indexes) that want this + // txn to not transition to commit or abort + uint64_t client_id; +}; +typedef struct tokutxn *TOKUTXN; + +void toku_txn_lock(struct tokutxn *txn); +void toku_txn_unlock(struct tokutxn *txn); + +uint64_t toku_txn_get_root_id(struct tokutxn *txn); +bool txn_declared_read_only(struct tokutxn *txn); + +int toku_txn_begin_txn ( + DB_TXN *container_db_txn, + struct tokutxn *parent_tokutxn, + struct tokutxn **tokutxn, + struct tokulogger *logger, + TXN_SNAPSHOT_TYPE snapshot_type, + bool read_only + ); + +DB_TXN * toku_txn_get_container_db_txn (struct tokutxn *tokutxn); +void toku_txn_set_container_db_txn(struct tokutxn *txn, DB_TXN *db_txn); + +// toku_txn_begin_with_xid is called from recovery and has no containing DB_TXN +int toku_txn_begin_with_xid ( + struct tokutxn *parent_tokutxn, + struct tokutxn **tokutxn, + struct tokulogger *logger, + TXNID_PAIR xid, + TXN_SNAPSHOT_TYPE snapshot_type, + DB_TXN *container_db_txn, + bool for_recovery, + bool read_only + ); + +void toku_txn_update_xids_in_txn(struct tokutxn *txn, TXNID xid); + +int toku_txn_load_txninfo (struct tokutxn *txn, struct txninfo *info); + +int toku_txn_commit_txn (struct tokutxn *txn, int nosync, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); +int toku_txn_commit_with_lsn(struct tokutxn *txn, int nosync, LSN oplsn, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); + +int toku_txn_abort_txn(struct tokutxn *txn, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); +int toku_txn_abort_with_lsn(struct tokutxn *txn, LSN oplsn, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); + +int toku_txn_discard_txn(struct tokutxn *txn); + +void toku_txn_prepare_txn (struct tokutxn *txn, TOKU_XA_XID *xid); +// Effect: Do the internal work of preparing a transaction (does not log the prepare record). + +void toku_txn_get_prepared_xa_xid(struct tokutxn *txn, TOKU_XA_XID *xa_xid); +// Effect: Fill in the XID information for a transaction. The caller allocates the XID and the function fills in values. + +void toku_txn_maybe_fsync_log(struct tokulogger *logger, LSN do_fsync_lsn, bool do_fsync); + +void toku_txn_get_fsync_info(struct tokutxn *ttxn, bool* do_fsync, LSN* do_fsync_lsn); + +// Complete and destroy a txn +void toku_txn_close_txn(struct tokutxn *txn); + +// Remove a txn from any live txn lists +void toku_txn_complete_txn(struct tokutxn *txn); + +// Free the memory of a txn +void toku_txn_destroy_txn(struct tokutxn *txn); + +struct XIDS_S *toku_txn_get_xids(struct tokutxn *txn); + +// Force fsync on commit +void toku_txn_force_fsync_on_commit(struct tokutxn *txn); + +typedef enum { + TXN_BEGIN, // total number of transactions begun (does not include recovered txns) + TXN_READ_BEGIN, // total number of read only transactions begun (does not include recovered txns) + TXN_COMMIT, // successful commits + TXN_ABORT, + TXN_STATUS_NUM_ROWS +} txn_status_entry; + +typedef struct { + bool initialized; + TOKU_ENGINE_STATUS_ROW_S status[TXN_STATUS_NUM_ROWS]; +} TXN_STATUS_S, *TXN_STATUS; + +void toku_txn_get_status(TXN_STATUS s); + +bool toku_is_txn_in_live_root_txn_list(const xid_omt_t &live_root_txn_list, TXNID xid); + +TXNID toku_get_oldest_in_live_root_txn_list(struct tokutxn *txn); + +TOKUTXN_STATE toku_txn_get_state(struct tokutxn *txn); + +struct tokulogger_preplist { + TOKU_XA_XID xid; + DB_TXN *txn; +}; +int toku_logger_recover_txn (struct tokulogger *logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags); + +void toku_maybe_log_begin_txn_for_write_operation(struct tokutxn *txn); + +// Return whether txn (or it's descendents) have done no work. +bool toku_txn_is_read_only(struct tokutxn *txn); + +void toku_txn_lock_state(struct tokutxn *txn); +void toku_txn_unlock_state(struct tokutxn *txn); +void toku_txn_pin_live_txn_unlocked(struct tokutxn *txn); +void toku_txn_unpin_live_txn(struct tokutxn *txn); + +bool toku_txn_has_spilled_rollback(struct tokutxn *txn); + +uint64_t toku_txn_get_client_id(struct tokutxn *txn); +void toku_txn_set_client_id(struct tokutxn *txn, uint64_t client_id); + +// +// This function is used by the leafentry iterators. +// returns TOKUDB_ACCEPT if live transaction context is allowed to read a value +// that is written by transaction with LSN of id +// live transaction context may read value if either id is the root ancestor of context, or if +// id was committed before context's snapshot was taken. +// For id to be committed before context's snapshot was taken, the following must be true: +// - id < context->snapshot_txnid64 AND id is not in context's live root transaction list +// For the above to NOT be true: +// - id > context->snapshot_txnid64 OR id is in context's live root transaction list +// +int toku_txn_reads_txnid(TXNID txnid, struct tokutxn *txn); + +void txn_status_init(void); + +void txn_status_destroy(void); + +// For serialize / deserialize + +#include "ft/serialize/wbuf.h" + +static inline void wbuf_TXNID(struct wbuf *wb, TXNID txnid) { + wbuf_ulonglong(wb, txnid); +} + +static inline void wbuf_nocrc_TXNID(struct wbuf *wb, TXNID txnid) { + wbuf_nocrc_ulonglong(wb, txnid); +} + +static inline void wbuf_nocrc_TXNID_PAIR(struct wbuf *wb, TXNID_PAIR txnid) { + wbuf_nocrc_ulonglong(wb, txnid.parent_id64); + wbuf_nocrc_ulonglong(wb, txnid.child_id64); +} + +static inline void wbuf_nocrc_LSN(struct wbuf *wb, LSN lsn) { + wbuf_nocrc_ulonglong(wb, lsn.lsn); +} + +static inline void wbuf_LSN(struct wbuf *wb, LSN lsn) { + wbuf_ulonglong(wb, lsn.lsn); +} + +#include "ft/serialize/rbuf.h" + +static inline void rbuf_TXNID(struct rbuf *rb, TXNID *txnid) { + *txnid = rbuf_ulonglong(rb); +} + +static inline void rbuf_TXNID_PAIR(struct rbuf *rb, TXNID_PAIR *txnid) { + txnid->parent_id64 = rbuf_ulonglong(rb); + txnid->child_id64 = rbuf_ulonglong(rb); +} + +static inline void rbuf_ma_TXNID(struct rbuf *rb, memarena *UU(ma), TXNID *txnid) { + rbuf_TXNID(rb, txnid); +} + +static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID_PAIR *txnid) { + rbuf_TXNID_PAIR(r, txnid); +} + +static inline LSN rbuf_LSN(struct rbuf *rb) { + LSN lsn = { .lsn = rbuf_ulonglong(rb) }; + return lsn; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn_manager.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn_manager.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn_manager.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn_manager.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,1050 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/toku_race_tools.h" + +#include "ft/cachetable/checkpoint.h" +#include "ft/logger/log-internal.h" +#include "ft/ule.h" +#include "ft/txn/txn.h" +#include "ft/txn/txn_manager.h" +#include "ft/txn/rollback.h" +#include "util/omt.h" + +bool garbage_collection_debug = false; + +// internal locking functions, should use this instead of accessing lock directly +static void txn_manager_lock(TXN_MANAGER txn_manager); +static void txn_manager_unlock(TXN_MANAGER txn_manager); + +#if 0 +static bool is_txnid_live(TXN_MANAGER txn_manager, TXNID txnid) { + TOKUTXN result = NULL; + toku_txn_manager_id2txn_unlocked(txn_manager, txnid, &result); + return (result != NULL); +} +#endif + +//Heaviside function to search through an OMT by a TXNID +int find_by_xid (const TOKUTXN &txn, const TXNID &txnidfind); + +static bool is_txnid_live(TXN_MANAGER txn_manager, TXNID txnid) { + TOKUTXN result = NULL; + TXNID_PAIR id = { .parent_id64 = txnid, .child_id64 = TXNID_NONE }; + toku_txn_manager_id2txn_unlocked(txn_manager, id, &result); + return (result != NULL); +} + +static void toku_txn_manager_clone_state_for_gc_unlocked( + TXN_MANAGER txn_manager, + xid_omt_t* snapshot_xids, + rx_omt_t* referenced_xids, + xid_omt_t* live_root_txns + ); + +static void +verify_snapshot_system(TXN_MANAGER txn_manager UU()) { + uint32_t num_snapshot_txnids = txn_manager->num_snapshots; + TXNID snapshot_txnids[num_snapshot_txnids]; + TOKUTXN snapshot_txns[num_snapshot_txnids]; + uint32_t num_live_txns = txn_manager->live_root_txns.size(); + TOKUTXN live_txns[num_live_txns]; + uint32_t num_referenced_xid_tuples = txn_manager->referenced_xids.size(); + struct referenced_xid_tuple *referenced_xid_tuples[num_referenced_xid_tuples]; + + // do this to get an omt of snapshot_txnids + xid_omt_t snapshot_txnids_omt; + rx_omt_t referenced_xids_omt; + xid_omt_t live_root_txns_omt; + toku_txn_manager_clone_state_for_gc_unlocked( + txn_manager, + &snapshot_txnids_omt, + &referenced_xids_omt, + &live_root_txns_omt + ); + + int r; + uint32_t i; + uint32_t j; + //set up arrays for easier access + { + TOKUTXN curr_txn = txn_manager->snapshot_head; + uint32_t curr_index = 0; + while (curr_txn != NULL) { + snapshot_txns[curr_index] = curr_txn; + snapshot_txnids[curr_index] = curr_txn->snapshot_txnid64; + curr_txn = curr_txn->snapshot_next; + curr_index++; + } + } + + for (i = 0; i < num_live_txns; i++) { + r = txn_manager->live_root_txns.fetch(i, &live_txns[i]); + assert_zero(r); + } + for (i = 0; i < num_referenced_xid_tuples; i++) { + r = txn_manager->referenced_xids.fetch(i, &referenced_xid_tuples[i]); + assert_zero(r); + } + + { + //Verify snapshot_txnids + for (i = 0; i < num_snapshot_txnids; i++) { + TXNID snapshot_xid = snapshot_txnids[i]; + TOKUTXN snapshot_txn = snapshot_txns[i]; + uint32_t num_live_root_txn_list = snapshot_txn->live_root_txn_list->size(); + TXNID live_root_txn_list[num_live_root_txn_list]; + { + for (j = 0; j < num_live_root_txn_list; j++) { + r = snapshot_txn->live_root_txn_list->fetch(j, &live_root_txn_list[j]); + assert_zero(r); + } + } + { + // Only committed entries have return a youngest. + TXNID youngest = toku_get_youngest_live_list_txnid_for( + snapshot_xid, + snapshot_txnids_omt, + txn_manager->referenced_xids + ); + invariant(youngest == TXNID_NONE); + } + for (j = 0; j < num_live_root_txn_list; j++) { + TXNID live_xid = live_root_txn_list[j]; + invariant(live_xid <= snapshot_xid); + TXNID youngest = toku_get_youngest_live_list_txnid_for( + live_xid, + snapshot_txnids_omt, + txn_manager->referenced_xids + ); + if (is_txnid_live(txn_manager, live_xid)) { + // Only committed entries have return a youngest. + invariant(youngest == TXNID_NONE); + } + else { + invariant(youngest != TXNID_NONE); + // A committed entry might have been read-only, in which case it won't return anything. + // This snapshot reads 'live_xid' so it's youngest cannot be older than snapshot_xid. + invariant(youngest >= snapshot_xid); + } + } + } + } + { + // Verify referenced_xids. + for (i = 0; i < num_referenced_xid_tuples; i++) { + struct referenced_xid_tuple *tuple = referenced_xid_tuples[i]; + invariant(tuple->begin_id < tuple->end_id); + invariant(tuple->references > 0); + + { + //verify neither pair->begin_id nor end_id is in live_list + r = txn_manager->live_root_txns.find_zero(tuple->begin_id, nullptr, nullptr); + invariant(r == DB_NOTFOUND); + r = txn_manager->live_root_txns.find_zero(tuple->end_id, nullptr, nullptr); + invariant(r == DB_NOTFOUND); + } + { + //verify neither pair->begin_id nor end_id is in snapshot_xids + TOKUTXN curr_txn = txn_manager->snapshot_head; + uint32_t curr_index = 0; + while (curr_txn != NULL) { + invariant(tuple->begin_id != curr_txn->txnid.parent_id64); + invariant(tuple->end_id != curr_txn->txnid.parent_id64); + curr_txn = curr_txn->snapshot_next; + curr_index++; + } + } + { + // Verify number of references is correct + uint32_t refs_found = 0; + for (j = 0; j < num_snapshot_txnids; j++) { + TOKUTXN snapshot_txn = snapshot_txns[j]; + if (toku_is_txn_in_live_root_txn_list(*snapshot_txn->live_root_txn_list, tuple->begin_id)) { + refs_found++; + } + invariant(!toku_is_txn_in_live_root_txn_list( + *snapshot_txn->live_root_txn_list, + tuple->end_id)); + } + invariant(refs_found == tuple->references); + } + { + // Verify youngest makes sense. + TXNID youngest = toku_get_youngest_live_list_txnid_for( + tuple->begin_id, + snapshot_txnids_omt, + txn_manager->referenced_xids + ); + invariant(youngest != TXNID_NONE); + invariant(youngest > tuple->begin_id); + invariant(youngest < tuple->end_id); + // Youngest must be found, and must be a snapshot txn + r = snapshot_txnids_omt.find_zero(youngest, nullptr, nullptr); + invariant_zero(r); + } + } + } + snapshot_txnids_omt.destroy(); + referenced_xids_omt.destroy(); + live_root_txns_omt.destroy(); +} + +void toku_txn_manager_init(TXN_MANAGER* txn_managerp) { + TXN_MANAGER XCALLOC(txn_manager); + toku_mutex_init(&txn_manager->txn_manager_lock, NULL); + txn_manager->live_root_txns.create(); + txn_manager->live_root_ids.create(); + txn_manager->snapshot_head = NULL; + txn_manager->snapshot_tail = NULL; + txn_manager->num_snapshots = 0; + txn_manager->referenced_xids.create(); + txn_manager->last_xid = 0; + + txn_manager->last_xid_seen_for_recover = TXNID_NONE; + txn_manager->last_calculated_oldest_referenced_xid = TXNID_NONE; + + *txn_managerp = txn_manager; +} + +void toku_txn_manager_destroy(TXN_MANAGER txn_manager) { + toku_mutex_destroy(&txn_manager->txn_manager_lock); + invariant(txn_manager->live_root_txns.size() == 0); + txn_manager->live_root_txns.destroy(); + invariant(txn_manager->live_root_ids.size() == 0); + txn_manager->live_root_ids.destroy(); + invariant(txn_manager->snapshot_head == NULL); + invariant(txn_manager->referenced_xids.size() == 0); + txn_manager->referenced_xids.destroy(); + toku_free(txn_manager); +} + +TXNID +toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager) { + TOKUTXN rtxn = NULL; + TXNID rval = TXNID_NONE_LIVING; + txn_manager_lock(txn_manager); + + if (txn_manager->live_root_txns.size() > 0) { + int r = txn_manager->live_root_txns.fetch(0, &rtxn); + invariant_zero(r); + } + if (rtxn) { + rval = rtxn->txnid.parent_id64; + } + txn_manager_unlock(txn_manager); + return rval; +} + +TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager) { + return txn_manager->last_calculated_oldest_referenced_xid; +} + +int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids); +int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids){ + (*referenced_xids)[index] = live_xid->txnid.parent_id64; + return 0; +} + + +// Create list of root transactions that were live when this txn began. +static inline void +setup_live_root_txn_list(xid_omt_t* live_root_txnid, xid_omt_t* live_root_txn_list) { + if (live_root_txnid->size() > 0) { + live_root_txn_list->clone(*live_root_txnid); + } else { + live_root_txn_list->create_no_array(); + } +} + +//Heaviside function to search through an OMT by a TXNID +int +find_by_xid (const TOKUTXN &txn, const TXNID &txnidfind) { + if (txn->txnid.parent_id64 < txnidfind) return -1; + if (txn->txnid.parent_id64 > txnidfind) return +1; + return 0; +} + +static TXNID +max_xid(TXNID a, TXNID b) { + return a < b ? b : a; +} + +static void set_oldest_referenced_xid(TXN_MANAGER txn_manager) { + TXNID oldest_referenced_xid = TXNID_MAX; + int r; + if (txn_manager->live_root_ids.size() > 0) { + r = txn_manager->live_root_ids.fetch(0, &oldest_referenced_xid); + // this function should only be called when we know there is at least + // one live transaction + invariant_zero(r); + } + + if (txn_manager->referenced_xids.size() > 0) { + struct referenced_xid_tuple* tuple; + r = txn_manager->referenced_xids.fetch(0, &tuple); + if (r == 0 && tuple->begin_id < oldest_referenced_xid) { + oldest_referenced_xid = tuple->begin_id; + } + } + if (txn_manager->snapshot_head != NULL) { + TXNID id = txn_manager->snapshot_head->snapshot_txnid64; + if (id < oldest_referenced_xid) { + oldest_referenced_xid = id; + } + } + if (txn_manager->last_xid < oldest_referenced_xid) { + oldest_referenced_xid = txn_manager->last_xid; + } + invariant(oldest_referenced_xid != TXNID_MAX); + txn_manager->last_calculated_oldest_referenced_xid = oldest_referenced_xid; +} + +//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index) +// template-only function, but must be extern +int find_xid (const TOKUTXN &txn, const TOKUTXN &txnfind); +int +find_xid (const TOKUTXN &txn, const TOKUTXN &txnfind) +{ + if (txn->txnid.parent_id64 < txnfind->txnid.parent_id64) return -1; + if (txn->txnid.parent_id64 > txnfind->txnid.parent_id64) return +1; + return 0; +} + +static inline void txn_manager_create_snapshot_unlocked( + TXN_MANAGER txn_manager, + TOKUTXN txn + ) +{ + txn->snapshot_txnid64 = ++txn_manager->last_xid; + setup_live_root_txn_list(&txn_manager->live_root_ids, txn->live_root_txn_list); + // Add this txn to the global list of txns that have their own snapshots. + // (Note, if a txn is a child that creates its own snapshot, then that child xid + // is the xid stored in the global list.) + if (txn_manager->snapshot_head == NULL) { + invariant(txn_manager->snapshot_tail == NULL); + txn_manager->snapshot_head = txn; + txn_manager->snapshot_tail = txn; + } + else { + txn_manager->snapshot_tail->snapshot_next = txn; + txn->snapshot_prev = txn_manager->snapshot_tail; + txn_manager->snapshot_tail = txn; + } + txn_manager->num_snapshots++; +} + +// template-only function, but must be extern +int find_tuple_by_xid (const struct referenced_xid_tuple &tuple, const TXNID &xidfind); +int +find_tuple_by_xid (const struct referenced_xid_tuple &tuple, const TXNID &xidfind) +{ + if (tuple.begin_id < xidfind) return -1; + if (tuple.begin_id > xidfind) return +1; + return 0; +} + +// template-only function, but must be extern +int referenced_xids_note_snapshot_txn_end_iter(const TXNID &live_xid, const uint32_t UU(index), rx_omt_t *const referenced_xids) + __attribute__((nonnull(3))); +int referenced_xids_note_snapshot_txn_end_iter(const TXNID &live_xid, const uint32_t UU(index), rx_omt_t *const referenced_xids) +{ + int r; + uint32_t idx; + struct referenced_xid_tuple *tuple; + + r = referenced_xids->find_zero(live_xid, &tuple, &idx); + if (r == DB_NOTFOUND) { + goto done; + } + invariant_zero(r); + invariant(tuple->references > 0); + if (--tuple->references == 0) { + r = referenced_xids->delete_at(idx); + lazy_assert_zero(r); + } +done: + return 0; +} + +// When txn ends, update reverse live list. To do that, examine each txn in this (closing) txn's live list. +static inline int +note_snapshot_txn_end_by_ref_xids(TXN_MANAGER mgr, const xid_omt_t &live_root_txn_list) { + int r; + r = live_root_txn_list.iterate(&mgr->referenced_xids); + invariant_zero(r); + return r; +} + +typedef struct snapshot_iter_extra { + uint32_t* indexes_to_delete; + uint32_t num_indexes; + xid_omt_t* live_root_txn_list; +} SNAPSHOT_ITER_EXTRA; + +// template-only function, but must be extern +int note_snapshot_txn_end_by_txn_live_list_iter(referenced_xid_tuple* tuple, const uint32_t index, SNAPSHOT_ITER_EXTRA *const sie) + __attribute__((nonnull(3))); +int note_snapshot_txn_end_by_txn_live_list_iter( + referenced_xid_tuple* tuple, + const uint32_t index, + SNAPSHOT_ITER_EXTRA *const sie + ) +{ + int r; + uint32_t idx; + TXNID txnid; + r = sie->live_root_txn_list->find_zero(tuple->begin_id, &txnid, &idx); + if (r == DB_NOTFOUND) { + goto done; + } + invariant_zero(r); + invariant(txnid == tuple->begin_id); + invariant(tuple->references > 0); + if (--tuple->references == 0) { + sie->indexes_to_delete[sie->num_indexes] = index; + sie->num_indexes++; + } +done: + return 0; +} + +static inline int +note_snapshot_txn_end_by_txn_live_list(TXN_MANAGER mgr, xid_omt_t* live_root_txn_list) { + uint32_t size = mgr->referenced_xids.size(); + uint32_t indexes_to_delete[size]; + SNAPSHOT_ITER_EXTRA sie = { .indexes_to_delete = indexes_to_delete, .num_indexes = 0, .live_root_txn_list = live_root_txn_list}; + mgr->referenced_xids.iterate_ptr(&sie); + for (uint32_t i = 0; i < sie.num_indexes; i++) { + uint32_t curr_index = sie.indexes_to_delete[sie.num_indexes-i-1]; + mgr->referenced_xids.delete_at(curr_index); + } + return 0; +} + +static inline void txn_manager_remove_snapshot_unlocked( + TOKUTXN txn, + TXN_MANAGER txn_manager + ) +{ + // Remove from linked list of snapshot txns + if (txn_manager->snapshot_head == txn) { + txn_manager->snapshot_head = txn->snapshot_next; + } + if (txn_manager->snapshot_tail == txn) { + txn_manager->snapshot_tail = txn->snapshot_prev; + } + if (txn->snapshot_next) { + txn->snapshot_next->snapshot_prev = txn->snapshot_prev; + } + if (txn->snapshot_prev) { + txn->snapshot_prev->snapshot_next = txn->snapshot_next; + } + txn_manager->num_snapshots--; + uint32_t ref_xids_size = txn_manager->referenced_xids.size(); + uint32_t live_list_size = txn->live_root_txn_list->size(); + if (ref_xids_size > 0 && live_list_size > 0) { + if (live_list_size > ref_xids_size && ref_xids_size < 2000) { + note_snapshot_txn_end_by_txn_live_list(txn_manager, txn->live_root_txn_list); + } + else { + note_snapshot_txn_end_by_ref_xids(txn_manager, *txn->live_root_txn_list); + } + } +} + +static inline void inherit_snapshot_from_parent(TOKUTXN child) { + if (child->parent) { + child->snapshot_txnid64 = child->parent->snapshot_txnid64; + child->live_root_txn_list = child->parent->live_root_txn_list; + } +} +void toku_txn_manager_handle_snapshot_create_for_child_txn( + TOKUTXN txn, + TXN_MANAGER txn_manager, + TXN_SNAPSHOT_TYPE snapshot_type + ) +{ + // this is a function for child txns, so just doint a sanity check + invariant(txn->parent != NULL); + bool needs_snapshot = txn_needs_snapshot(snapshot_type, txn->parent); + if (needs_snapshot) { + invariant(txn->live_root_txn_list == nullptr); + XMALLOC(txn->live_root_txn_list); + txn_manager_lock(txn_manager); + txn_manager_create_snapshot_unlocked(txn_manager, txn); + txn_manager_unlock(txn_manager); + } + else { + inherit_snapshot_from_parent(txn); + } +} + +void toku_txn_manager_handle_snapshot_destroy_for_child_txn( + TOKUTXN txn, + TXN_MANAGER txn_manager, + TXN_SNAPSHOT_TYPE snapshot_type + ) +{ + // this is a function for child txns, so just doint a sanity check + invariant(txn->parent != NULL); + bool is_snapshot = txn_needs_snapshot(snapshot_type, txn->parent); + if (is_snapshot) { + txn_manager_lock(txn_manager); + txn_manager_remove_snapshot_unlocked(txn, txn_manager); + txn_manager_unlock(txn_manager); + invariant(txn->live_root_txn_list != nullptr); + txn->live_root_txn_list->destroy(); + toku_free(txn->live_root_txn_list); + } +} + +void toku_txn_manager_start_txn_for_recovery( + TOKUTXN txn, + TXN_MANAGER txn_manager, + TXNID xid + ) +{ + txn_manager_lock(txn_manager); + // using xid that is passed in + txn_manager->last_xid = max_xid(txn_manager->last_xid, xid); + toku_txn_update_xids_in_txn(txn, xid); + + uint32_t idx; + int r = txn_manager->live_root_txns.find_zero(txn, nullptr, &idx); + invariant(r == DB_NOTFOUND); + r = txn_manager->live_root_txns.insert_at(txn, idx); + invariant_zero(r); + r = txn_manager->live_root_ids.insert_at(txn->txnid.parent_id64, idx); + invariant_zero(r); + + txn_manager_unlock(txn_manager); +} + +void toku_txn_manager_start_txn( + TOKUTXN txn, + TXN_MANAGER txn_manager, + TXN_SNAPSHOT_TYPE snapshot_type, + bool read_only + ) +{ + int r; + TXNID xid = TXNID_NONE; + // if we are running in recovery, we don't need to make snapshots + bool needs_snapshot = txn_needs_snapshot(snapshot_type, NULL); + + // perform a malloc outside of the txn_manager lock + // will be used in txn_manager_create_snapshot_unlocked below + if (needs_snapshot) { + invariant(txn->live_root_txn_list == nullptr); + XMALLOC(txn->live_root_txn_list); + } + // the act of getting a transaction ID and adding the + // txn to the proper OMTs must be atomic. MVCC depends + // on this. + txn_manager_lock(txn_manager); + if (garbage_collection_debug) { + verify_snapshot_system(txn_manager); + } + + // + // maintain the data structures necessary for MVCC: + // 1. add txn to list of live_root_txns if this is a root transaction + // 2. if the transaction is creating a snapshot: + // - create a live list for the transaction + // - add the id to the list of snapshot ids + // + // The order of operations is important here, and must be taken + // into account when the transaction is closed. The txn is added + // to the live_root_txns first (if it is a root txn). This has the implication + // that a root level snapshot transaction is in its own live list. This fact + // is taken into account when the transaction is closed. + + // add ancestor information, and maintain global live root txn list + xid = ++txn_manager->last_xid; // we always need an ID, needed for lock tree + toku_txn_update_xids_in_txn(txn, xid); + if (!read_only) { + uint32_t idx = txn_manager->live_root_txns.size(); + r = txn_manager->live_root_txns.insert_at(txn, idx); + invariant_zero(r); + r = txn_manager->live_root_ids.insert_at(txn->txnid.parent_id64, idx); + invariant_zero(r); + } + set_oldest_referenced_xid(txn_manager); + + if (needs_snapshot) { + txn_manager_create_snapshot_unlocked( + txn_manager, + txn + ); + } + + if (garbage_collection_debug) { + verify_snapshot_system(txn_manager); + } + txn_manager_unlock(txn_manager); + return; +} + +TXNID +toku_get_youngest_live_list_txnid_for(TXNID xc, const xid_omt_t &snapshot_txnids, const rx_omt_t &referenced_xids) { + struct referenced_xid_tuple *tuple; + int r; + TXNID rval = TXNID_NONE; + + r = referenced_xids.find_zero(xc, &tuple, nullptr); + if (r == DB_NOTFOUND) { + goto done; + } + TXNID live; + + r = snapshot_txnids.find(tuple->end_id, -1, &live, nullptr); + if (r == DB_NOTFOUND) { + goto done; + } + invariant(live < tuple->end_id); + if (live > tuple->begin_id) { + rval = live; + } +done: + return rval; +} + +void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn) { + int r; + invariant(txn->parent == NULL); + bool is_snapshot = txn_needs_snapshot(txn->snapshot_type, NULL); + txn_manager_lock(txn_manager); + + if (garbage_collection_debug) { + verify_snapshot_system(txn_manager); + } + + if (is_snapshot) { + txn_manager_remove_snapshot_unlocked( + txn, + txn_manager + ); + } + + if (!txn_declared_read_only(txn)) { + uint32_t idx; + //Remove txn from list of live root txns + TOKUTXN txnagain; + r = txn_manager->live_root_txns.find_zero(txn, &txnagain, &idx); + invariant_zero(r); + invariant(txn==txnagain); + + r = txn_manager->live_root_txns.delete_at(idx); + invariant_zero(r); + r = txn_manager->live_root_ids.delete_at(idx); + invariant_zero(r); + + if (!toku_txn_is_read_only(txn) || garbage_collection_debug) { + uint32_t num_references = 0; + TOKUTXN curr_txn = txn_manager->snapshot_tail; + while(curr_txn != NULL) { + if (curr_txn->snapshot_txnid64 > txn->txnid.parent_id64) { + num_references++; + } + else { + break; + } + curr_txn = curr_txn->snapshot_prev; + } + + if (num_references > 0) { + // This transaction exists in a live list of another transaction. + struct referenced_xid_tuple tuple = { + .begin_id = txn->txnid.parent_id64, + .end_id = ++txn_manager->last_xid, + .references = num_references + }; + r = txn_manager->referenced_xids.insert(tuple, txn->txnid.parent_id64, nullptr); + lazy_assert_zero(r); + } + } + } + + if (garbage_collection_debug) { + verify_snapshot_system(txn_manager); + } + txn_manager_unlock(txn_manager); + + //Cleanup that does not require the txn_manager lock + if (is_snapshot) { + invariant(txn->live_root_txn_list != nullptr); + txn->live_root_txn_list->destroy(); + toku_free(txn->live_root_txn_list); + } + return; +} + +static void toku_txn_manager_clone_state_for_gc_unlocked( + TXN_MANAGER txn_manager, + xid_omt_t* snapshot_xids, + rx_omt_t* referenced_xids, + xid_omt_t* live_root_txns + ) +{ + TXNID* snapshot_xids_array = NULL; + XMALLOC_N(txn_manager->num_snapshots, snapshot_xids_array); + TOKUTXN curr_txn = txn_manager->snapshot_head; + uint32_t curr_index = 0; + while (curr_txn != NULL) { + snapshot_xids_array[curr_index] = curr_txn->snapshot_txnid64; + curr_txn = curr_txn->snapshot_next; + curr_index++; + } + snapshot_xids->create_steal_sorted_array( + &snapshot_xids_array, + txn_manager->num_snapshots, + txn_manager->num_snapshots + ); + + referenced_xids->clone(txn_manager->referenced_xids); + setup_live_root_txn_list(&txn_manager->live_root_ids, live_root_txns); +} + +void toku_txn_manager_clone_state_for_gc( + TXN_MANAGER txn_manager, + xid_omt_t* snapshot_xids, + rx_omt_t* referenced_xids, + xid_omt_t* live_root_txns + ) +{ + txn_manager_lock(txn_manager); + toku_txn_manager_clone_state_for_gc_unlocked( + txn_manager, + snapshot_xids, + referenced_xids, + live_root_txns + ); + txn_manager_unlock(txn_manager); +} + +void txn_manager_state::init() { + invariant(!initialized); + invariant_notnull(txn_manager); + toku_txn_manager_clone_state_for_gc( + txn_manager, + &snapshot_xids, + &referenced_xids, + &live_root_txns + ); + initialized = true; +} + +void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result) { + TOKUTXN txn; + int r = txn_manager->live_root_txns.find_zero(txnid.parent_id64, &txn, nullptr); + if (r==0) { + assert(txn->txnid.parent_id64 == txnid.parent_id64); + *result = txn; + } + else { + assert(r==DB_NOTFOUND); + // If there is no txn, then we treat it as the null txn. + *result = NULL; + } +} + +int toku_txn_manager_get_root_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid, DB_TXN **txnp) { + txn_manager_lock(txn_manager); + int ret_val = 0; + int num_live_txns = txn_manager->live_root_txns.size(); + for (int i = 0; i < num_live_txns; i++) { + TOKUTXN txn; + { + int r = txn_manager->live_root_txns.fetch(i, &txn); + assert_zero(r); + } + if (txn->xa_xid.formatID == xid->formatID + && txn->xa_xid.gtrid_length == xid->gtrid_length + && txn->xa_xid.bqual_length == xid->bqual_length + && 0==memcmp(txn->xa_xid.data, xid->data, xid->gtrid_length + xid->bqual_length)) { + *txnp = txn->container_db_txn; + ret_val = 0; + goto exit; + } + } + ret_val = DB_NOTFOUND; +exit: + txn_manager_unlock(txn_manager); + return ret_val; +} + +uint32_t toku_txn_manager_num_live_root_txns(TXN_MANAGER txn_manager) { + int ret_val = 0; + txn_manager_lock(txn_manager); + ret_val = txn_manager->live_root_txns.size(); + txn_manager_unlock(txn_manager); + return ret_val; +} + +static int txn_manager_iter( + TXN_MANAGER txn_manager, + txn_mgr_iter_callback cb, + void* extra, + bool just_root_txns + ) +{ + int r = 0; + toku_mutex_lock(&txn_manager->txn_manager_lock); + uint32_t size = txn_manager->live_root_txns.size(); + for (uint32_t i = 0; i < size; i++) { + TOKUTXN curr_txn = NULL; + r = txn_manager->live_root_txns.fetch(i, &curr_txn); + assert_zero(r); + if (just_root_txns) { + r = cb(curr_txn, extra); + } + else { + r = curr_txn->child_manager->iterate(cb, extra); + } + if (r) { + break; + } + } + toku_mutex_unlock(&txn_manager->txn_manager_lock); + return r; +} + +int toku_txn_manager_iter_over_live_txns( + TXN_MANAGER txn_manager, + txn_mgr_iter_callback cb, + void* extra + ) +{ + return txn_manager_iter( + txn_manager, + cb, + extra, + false + ); +} + +int toku_txn_manager_iter_over_live_root_txns( + TXN_MANAGER txn_manager, + txn_mgr_iter_callback cb, + void* extra + ) +{ + return txn_manager_iter( + txn_manager, + cb, + extra, + true + ); +} + + +// +// This function is called only via env_txn_xa_recover and env_txn_recover. +// See comments for those functions to understand assumptions that +// can be made when calling this function. Namely, that the system is +// quiescant, in that we are right after recovery and before user operations +// commence. +// +// Another key assumption made here is that only root transactions +// may be prepared and that child transactions cannot be prepared. +// This assumption is made by the fact that we iterate over the live root txns +// to find prepared transactions. +// +// I (Zardosht), don't think we take advantage of this fact, as we are holding +// the txn_manager_lock in this function, but in the future we might want +// to take these assumptions into account. +// +int toku_txn_manager_recover_root_txn ( + TXN_MANAGER txn_manager, + struct tokulogger_preplist preplist[/*count*/], + long count, + long *retp, /*out*/ + uint32_t flags + ) +{ + int ret_val = 0; + txn_manager_lock(txn_manager); + uint32_t num_txns_returned = 0; + // scan through live root txns to find + // prepared transactions and return them + uint32_t size = txn_manager->live_root_txns.size(); + if (flags==DB_FIRST) { + txn_manager->last_xid_seen_for_recover = TXNID_NONE; + } + else if (flags!=DB_NEXT) { + ret_val = EINVAL; + goto exit; + } + for (uint32_t i = 0; i < size; i++) { + TOKUTXN curr_txn = NULL; + txn_manager->live_root_txns.fetch(i, &curr_txn); + // skip over TOKUTXNs whose txnid64 is too small, meaning + // we have already processed them. + if (curr_txn->txnid.parent_id64 <= txn_manager->last_xid_seen_for_recover) { + continue; + } + if (curr_txn->state == TOKUTXN_PREPARING) { + assert(curr_txn->container_db_txn); + preplist[num_txns_returned].txn = curr_txn->container_db_txn; + preplist[num_txns_returned].xid = curr_txn->xa_xid; + txn_manager->last_xid_seen_for_recover = curr_txn->txnid.parent_id64; + num_txns_returned++; + } + txn_manager->last_xid_seen_for_recover = curr_txn->txnid.parent_id64; + // if we found the maximum number of prepared transactions we are + // allowed to find, then break + if (num_txns_returned >= count) { + break; + } + } + invariant(num_txns_returned <= count); + *retp = num_txns_returned; + ret_val = 0; +exit: + txn_manager_unlock(txn_manager); + return ret_val; +} + +static void txn_manager_lock(TXN_MANAGER txn_manager) { + toku_mutex_lock(&txn_manager->txn_manager_lock); +} + +static void txn_manager_unlock(TXN_MANAGER txn_manager) { + toku_mutex_unlock(&txn_manager->txn_manager_lock); +} + +void toku_txn_manager_suspend(TXN_MANAGER txn_manager) { + txn_manager_lock(txn_manager); +} + +void toku_txn_manager_resume(TXN_MANAGER txn_manager) { + txn_manager_unlock(txn_manager); +} + +void +toku_txn_manager_set_last_xid_from_logger(TXN_MANAGER txn_manager, TXNID last_xid) { + invariant(txn_manager->last_xid == TXNID_NONE); + txn_manager->last_xid = last_xid; +} + +void +toku_txn_manager_set_last_xid_from_recovered_checkpoint(TXN_MANAGER txn_manager, TXNID last_xid) { + txn_manager->last_xid = last_xid; +} + +TXNID +toku_txn_manager_get_last_xid(TXN_MANAGER mgr) { + txn_manager_lock(mgr); + TXNID last_xid = mgr->last_xid; + txn_manager_unlock(mgr); + return last_xid; +} + +bool +toku_txn_manager_txns_exist(TXN_MANAGER mgr) { + txn_manager_lock(mgr); + bool retval = mgr->live_root_txns.size() > 0; + txn_manager_unlock(mgr); + return retval; +} + + +// Test-only function +void +toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment) { + txn_manager_lock(mgr); + mgr->last_xid += increment; + txn_manager_unlock(mgr); +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn_manager.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn_manager.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn_manager.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn_manager.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,268 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "portability/toku_portability.h" +#include "portability/toku_pthread.h" + +#include "ft/txn/txn.h" + +typedef struct txn_manager *TXN_MANAGER; + +struct referenced_xid_tuple { + TXNID begin_id; + TXNID end_id; + uint32_t references; +}; + +struct txn_manager { + toku_mutex_t txn_manager_lock; // a lock protecting this object + txn_omt_t live_root_txns; // a sorted tree. + xid_omt_t live_root_ids; //contains TXNID x | x is snapshot txn + TOKUTXN snapshot_head; + TOKUTXN snapshot_tail; + uint32_t num_snapshots; + // Contains 3-tuples: (TXNID begin_id, TXNID end_id, uint64_t num_live_list_references) + // for committed root transaction ids that are still referenced by a live list. + rx_omt_t referenced_xids; + + TXNID last_xid; + TXNID last_xid_seen_for_recover; + TXNID last_calculated_oldest_referenced_xid; +}; +typedef struct txn_manager *TXN_MANAGER; + +struct txn_manager_state { + txn_manager_state(TXN_MANAGER mgr) : + txn_manager(mgr), + initialized(false) { + snapshot_xids.create_no_array(); + referenced_xids.create_no_array(); + live_root_txns.create_no_array(); + } + + // should not copy construct + txn_manager_state &operator=(txn_manager_state &rhs) = delete; + txn_manager_state(txn_manager_state &rhs) = delete; + + ~txn_manager_state() { + snapshot_xids.destroy(); + referenced_xids.destroy(); + live_root_txns.destroy(); + } + + void init(); + + TXN_MANAGER txn_manager; + bool initialized; + + // a snapshot of the txn manager's mvcc state + // only valid if initialized = true + xid_omt_t snapshot_xids; + rx_omt_t referenced_xids; + xid_omt_t live_root_txns; +}; + +// represents all of the information needed to run garbage collection +struct txn_gc_info { + txn_gc_info(txn_manager_state *st, TXNID xid_sgc, TXNID xid_ip, bool mvcc) + : txn_state_for_gc(st), + oldest_referenced_xid_for_simple_gc(xid_sgc), + oldest_referenced_xid_for_implicit_promotion(xid_ip), + mvcc_needed(mvcc) { + } + + // a snapshot of the transcation system. may be null. + txn_manager_state *txn_state_for_gc; + + // the oldest xid in any live list + // + // suitible for simple garbage collection that cleans up multiple committed + // transaction records into one. not suitible for implicit promotions, which + // must be correct in the face of abort messages - see ftnode->oldest_referenced_xid + TXNID oldest_referenced_xid_for_simple_gc; + + // lower bound on the oldest xid in any live when the messages to be cleaned + // had no messages above them. suitable for implicitly promoting a provisonal uxr. + TXNID oldest_referenced_xid_for_implicit_promotion; + + // whether or not mvcc is actually needed - false during recovery and non-transactional systems + const bool mvcc_needed; +}; + +void toku_txn_manager_init(TXN_MANAGER* txn_manager); +void toku_txn_manager_destroy(TXN_MANAGER txn_manager); + +TXNID toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager); + +TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager); + +void toku_txn_manager_handle_snapshot_create_for_child_txn( + TOKUTXN txn, + TXN_MANAGER txn_manager, + TXN_SNAPSHOT_TYPE snapshot_type + ); +void toku_txn_manager_handle_snapshot_destroy_for_child_txn( + TOKUTXN txn, + TXN_MANAGER txn_manager, + TXN_SNAPSHOT_TYPE snapshot_type + ); + + +// Assign a txnid. Log the txn begin in the recovery log. Initialize the txn live lists. +void toku_txn_manager_start_txn( + TOKUTXN txn, + TXN_MANAGER txn_manager, + TXN_SNAPSHOT_TYPE snapshot_type, + bool read_only + ); + +void toku_txn_manager_start_txn_for_recovery( + TOKUTXN txn, + TXN_MANAGER txn_manager, + TXNID xid + ); + +void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn); + +void toku_txn_manager_clone_state_for_gc( + TXN_MANAGER txn_manager, + xid_omt_t* snapshot_xids, + rx_omt_t* referenced_xids, + xid_omt_t* live_root_txns + ); + +void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result); + +// Returns a root txn associated with xid. The system as a whole +// assumes that only root txns get prepared, adn therefore only +// root txns will have XIDs associated with them. +int toku_txn_manager_get_root_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid, DB_TXN **txnp); + +uint32_t toku_txn_manager_num_live_root_txns(TXN_MANAGER txn_manager); + +typedef int (*txn_mgr_iter_callback)(TOKUTXN txn, void* extra); + +int toku_txn_manager_iter_over_live_txns( + TXN_MANAGER txn_manager, + txn_mgr_iter_callback cb, + void* extra + ); + +int toku_txn_manager_iter_over_live_root_txns( + TXN_MANAGER txn_manager, + txn_mgr_iter_callback cb, + void* extra + ); + +int toku_txn_manager_recover_root_txn( + TXN_MANAGER txn_manager, + struct tokulogger_preplist preplist[/*count*/], + long count, + long *retp, /*out*/ + uint32_t flags + ); + +void toku_txn_manager_suspend(TXN_MANAGER txn_manager); +void toku_txn_manager_resume(TXN_MANAGER txn_manager); + +void toku_txn_manager_set_last_xid_from_logger(TXN_MANAGER txn_manager, TXNID last_xid); +void toku_txn_manager_set_last_xid_from_recovered_checkpoint(TXN_MANAGER txn_manager, TXNID last_xid); +TXNID toku_txn_manager_get_last_xid(TXN_MANAGER mgr); + +bool toku_txn_manager_txns_exist(TXN_MANAGER mgr); + +// Test-only function +void toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment); + +TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, const xid_omt_t &snapshot_txnids, const rx_omt_t &referenced_xids); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn_state.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn_state.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/txn_state.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/txn_state.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,103 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// this is a separate file so that the hotindexing tests can see the txn states + +enum tokutxn_state { + TOKUTXN_LIVE, // initial txn state + TOKUTXN_PREPARING, // txn is preparing (or prepared) + TOKUTXN_COMMITTING, // txn in the process of committing + TOKUTXN_ABORTING, // txn in the process of aborting + TOKUTXN_RETIRED, // txn no longer exists +}; +typedef enum tokutxn_state TOKUTXN_STATE; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/xids.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/xids.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/xids.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/xids.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,301 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +/* Purpose of this file is to implement xids list of nested transactions + * ids. + * + * See design documentation for nested transactions at + * TokuWiki/Imp/TransactionsOverview. + * + * NOTE: xids are always stored in disk byte order. + * Accessors are responsible for transposing bytes to + * host order. + */ + +#include +#include + +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_htod.h" +#include "portability/toku_portability.h" + +#include "ft/txn/xids.h" + +///////////////////////////////////////////////////////////////////////////////// +// This layer of abstraction (xids_xxx) understands xids<> and nothing else. +// It contains all the functions that understand xids<> +// +// xids<> do not store the implicit transaction id of 0 at index 0. +// The accessor functions make the id of 0 explicit at index 0. +// The number of xids physically stored in the xids array is in +// the variable num_xids. +// +// The xids struct is immutable. The caller gets an initial version of XIDS +// by calling toku_xids_get_root_xids(), which returns the constant struct +// representing the root transaction (id 0). When a transaction begins, +// a new XIDS is created with the id of the current transaction appended to +// the list. +// +// + +// This is the xids list for a transactionless environment. +// It is also the initial state of any xids list created for +// nested transactions. + +XIDS +toku_xids_get_root_xids(void) { + static const struct XIDS_S root_xids = { + .num_xids = 0 + }; + + XIDS rval = (XIDS)&root_xids; + return rval; +} + +bool +toku_xids_can_create_child(XIDS xids) { + invariant(xids->num_xids < MAX_TRANSACTION_RECORDS); + return (xids->num_xids + 1) != MAX_TRANSACTION_RECORDS; +} + +int +toku_xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p) { + // Postcondition: + // xids_p points to an xids that is an exact copy of parent_xids, but with room for one more xid. + int rval; + invariant(parent_xids); + uint32_t num_child_xids = parent_xids->num_xids + 1; + // assumes that caller has verified that num_child_xids will + // be less than MAX_TRANSACTIN_RECORDS + invariant(num_child_xids < MAX_TRANSACTION_RECORDS); + size_t new_size = sizeof(*parent_xids) + num_child_xids*sizeof(parent_xids->ids[0]); + XIDS CAST_FROM_VOIDP(xids, toku_xmalloc(new_size)); + // Clone everything (parent does not have the newest xid). + memcpy(xids, parent_xids, new_size - sizeof(xids->ids[0])); + *xids_p = xids; + rval = 0; + return rval; +} + +void +toku_xids_finalize_with_child(XIDS xids, TXNID this_xid) { + // Precondition: + // - xids was created by toku_xids_create_unknown_child + TXNID this_xid_disk = toku_htod64(this_xid); + uint32_t num_child_xids = ++xids->num_xids; + xids->ids[num_child_xids - 1] = this_xid_disk; +} + +// xids is immutable. This function creates a new xids by copying the +// parent's list and then appending the xid of the new transaction. +int +toku_xids_create_child(XIDS parent_xids, // xids list for parent transaction + XIDS *xids_p, // xids list created + TXNID this_xid) { // xid of this transaction (new innermost) + bool can_create_child = toku_xids_can_create_child(parent_xids); + if (!can_create_child) { + return EINVAL; + } + toku_xids_create_unknown_child(parent_xids, xids_p); + toku_xids_finalize_with_child(*xids_p, this_xid); + return 0; +} + +void +toku_xids_create_from_buffer(struct rbuf *rb, // xids list for parent transaction + XIDS *xids_p) { // xids list created + uint8_t num_xids = rbuf_char(rb); + invariant(num_xids < MAX_TRANSACTION_RECORDS); + XIDS CAST_FROM_VOIDP(xids, toku_xmalloc(sizeof(*xids) + num_xids*sizeof(xids->ids[0]))); + xids->num_xids = num_xids; + uint8_t index; + for (index = 0; index < xids->num_xids; index++) { + rbuf_TXNID(rb, &xids->ids[index]); + } + *xids_p = xids; +} + +void +toku_xids_destroy(XIDS *xids_p) { + if (*xids_p != toku_xids_get_root_xids()) toku_free(*xids_p); + *xids_p = NULL; +} + +// Return xid at requested position. +// If requesting an xid out of range (which will be the case if xids array is empty) +// then return 0, the xid of the root transaction. +TXNID +toku_xids_get_xid(XIDS xids, uint8_t index) { + invariant(index < toku_xids_get_num_xids(xids)); + TXNID rval = xids->ids[index]; + rval = toku_dtoh64(rval); + return rval; +} + +uint8_t +toku_xids_get_num_xids(XIDS xids) { + uint8_t rval = xids->num_xids; + return rval; +} + +// Return innermost xid +TXNID +toku_xids_get_innermost_xid(XIDS xids) { + TXNID rval = TXNID_NONE; + if (toku_xids_get_num_xids(xids)) { + // if clause above makes this cast ok + uint8_t innermost_xid = (uint8_t) (toku_xids_get_num_xids(xids) - 1); + rval = toku_xids_get_xid(xids, innermost_xid); + } + return rval; +} + +TXNID +toku_xids_get_outermost_xid(XIDS xids) { + TXNID rval = TXNID_NONE; + if (toku_xids_get_num_xids(xids)) { + rval = toku_xids_get_xid(xids, 0); + } + return rval; +} + +void +toku_xids_cpy(XIDS target, XIDS source) { + size_t size = toku_xids_get_size(source); + memcpy(target, source, size); +} + +// return size in bytes +uint32_t +toku_xids_get_size(XIDS xids) { + uint32_t rval; + uint8_t num_xids = xids->num_xids; + rval = sizeof(*xids) + num_xids * sizeof(xids->ids[0]); + return rval; +} + +uint32_t +toku_xids_get_serialize_size(XIDS xids) { + uint32_t rval; + uint8_t num_xids = xids->num_xids; + rval = 1 + //num xids + 8 * num_xids; + return rval; +} + +unsigned char * +toku_xids_get_end_of_array(XIDS xids) { + TXNID *r = xids->ids + xids->num_xids; + return (unsigned char*)r; +} + +void wbuf_nocrc_xids(struct wbuf *wb, XIDS xids) { + wbuf_nocrc_char(wb, (unsigned char)xids->num_xids); + uint8_t index; + for (index = 0; index < xids->num_xids; index++) { + wbuf_nocrc_TXNID(wb, xids->ids[index]); + } +} + +void +toku_xids_fprintf(FILE *fp, XIDS xids) { + uint8_t index; + unsigned num_xids = toku_xids_get_num_xids(xids); + fprintf(fp, "[|%u| ", num_xids); + for (index = 0; index < toku_xids_get_num_xids(xids); index++) { + if (index) fprintf(fp, ","); + fprintf(fp, "%" PRIx64, toku_xids_get_xid(xids, index)); + } + fprintf(fp, "]"); +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/xids.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/xids.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn/xids.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn/xids.h 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,170 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* Purpose of this file is to provide the world with everything necessary + * to use the xids and nothing else. + * Internal requirements of the xids logic do not belong here. + * + * xids is (abstractly) an immutable list of nested transaction ids, accessed only + * via the functions in this file. + * + * See design documentation for nested transactions at + * TokuWiki/Imp/TransactionsOverview. + */ + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "ft/txn/txn.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/wbuf.h" + +/* The number of transaction ids stored in the xids structure is + * represented by an 8-bit value. The value 255 is reserved. + * The constant MAX_NESTED_TRANSACTIONS is one less because + * one slot in the packed leaf entry is used for the implicit + * root transaction (id 0). + */ +enum { + MAX_NESTED_TRANSACTIONS = 253, + MAX_TRANSACTION_RECORDS = MAX_NESTED_TRANSACTIONS + 1 +}; + +// Variable size list of transaction ids (known in design doc as xids<>). +// ids[0] is the outermost transaction. +// ids[num_xids - 1] is the innermost transaction. +// Should only be accessed by accessor functions toku_xids_xxx, not directly. + +// If the xids struct is unpacked, the compiler aligns the ids[] and we waste a lot of space +struct __attribute__((__packed__)) XIDS_S { + // maximum value of MAX_TRANSACTION_RECORDS - 1 because transaction 0 is implicit + uint8_t num_xids; + TXNID ids[]; +}; +typedef struct XIDS_S *XIDS; + +// Retrieve an XIDS representing the root transaction. +XIDS toku_xids_get_root_xids(void); + +bool toku_xids_can_create_child(XIDS xids); + +void toku_xids_cpy(XIDS target, XIDS source); + +//Creates an XIDS representing this transaction. +//You must pass in an XIDS representing the parent of this transaction. +int toku_xids_create_child(XIDS parent_xids, XIDS *xids_p, TXNID this_xid); + +// The following two functions (in order) are equivalent to toku_xids_create child, +// but allow you to do most of the work without knowing the new xid. +int toku_xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p); +void toku_xids_finalize_with_child(XIDS xids, TXNID this_xid); + +void toku_xids_create_from_buffer(struct rbuf *rb, XIDS *xids_p); + +void toku_xids_destroy(XIDS *xids_p); + +TXNID toku_xids_get_xid(XIDS xids, uint8_t index); + +uint8_t toku_xids_get_num_xids(XIDS xids); + +TXNID toku_xids_get_innermost_xid(XIDS xids); +TXNID toku_xids_get_outermost_xid(XIDS xids); + +// return size in bytes +uint32_t toku_xids_get_size(XIDS xids); + +uint32_t toku_xids_get_serialize_size(XIDS xids); + +unsigned char *toku_xids_get_end_of_array(XIDS xids); + +void wbuf_nocrc_xids(struct wbuf *wb, XIDS xids); + +void toku_xids_fprintf(FILE* fp, XIDS xids); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,795 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - - -#include "ft.h" -#include "txn.h" -#include "log-internal.h" -#include "checkpoint.h" -#include "ule.h" -#include "rollback-apply.h" -#include "txn_manager.h" -#include "txn_child_manager.h" -#include - -/////////////////////////////////////////////////////////////////////////////////// -// Engine status -// -// Status is intended for display to humans to help understand system behavior. -// It does not need to be perfectly thread-safe. - -static TXN_STATUS_S txn_status; - -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(txn_status, k, c, t, "txn: " l, inc) - -void -txn_status_init(void) { - // Note, this function initializes the keyname, type, and legend fields. - // Value fields are initialized to zero by compiler. - STATUS_INIT(TXN_BEGIN, TXN_BEGIN, PARCOUNT, "begin", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(TXN_READ_BEGIN, TXN_BEGIN_READ_ONLY, PARCOUNT, "begin read only", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(TXN_COMMIT, TXN_COMMITS, PARCOUNT, "successful commits", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(TXN_ABORT, TXN_ABORTS, PARCOUNT, "aborts", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - txn_status.initialized = true; -} - -void txn_status_destroy(void) { - for (int i = 0; i < TXN_STATUS_NUM_ROWS; ++i) { - if (txn_status.status[i].type == PARCOUNT) { - destroy_partitioned_counter(txn_status.status[i].value.parcount); - } - } -} - -#undef STATUS_INIT - -#define STATUS_INC(x, d) increment_partitioned_counter(txn_status.status[x].value.parcount, d) - -void -toku_txn_get_status(TXN_STATUS s) { - *s = txn_status; -} - -void -toku_txn_lock(TOKUTXN txn) -{ - toku_mutex_lock(&txn->txn_lock); -} - -void -toku_txn_unlock(TOKUTXN txn) -{ - toku_mutex_unlock(&txn->txn_lock); -} - -uint64_t -toku_txn_get_root_id(TOKUTXN txn) -{ - return txn->txnid.parent_id64; -} - -bool txn_declared_read_only(TOKUTXN txn) { - return txn->declared_read_only; -} - -int -toku_txn_begin_txn ( - DB_TXN *container_db_txn, - TOKUTXN parent_tokutxn, - TOKUTXN *tokutxn, - TOKULOGGER logger, - TXN_SNAPSHOT_TYPE snapshot_type, - bool read_only - ) -{ - int r = toku_txn_begin_with_xid( - parent_tokutxn, - tokutxn, - logger, - TXNID_PAIR_NONE, - snapshot_type, - container_db_txn, - false, // for_recovery - read_only - ); - return r; -} - - -static void -txn_create_xids(TOKUTXN txn, TOKUTXN parent) { - XIDS xids; - XIDS parent_xids; - if (parent == NULL) { - parent_xids = xids_get_root_xids(); - } else { - parent_xids = parent->xids; - } - xids_create_unknown_child(parent_xids, &xids); - TXNID finalized_xid = (parent == NULL) ? txn->txnid.parent_id64 : txn->txnid.child_id64; - xids_finalize_with_child(xids, finalized_xid); - txn->xids = xids; -} - -// Allocate and initialize a txn -static void toku_txn_create_txn(TOKUTXN *txn_ptr, TOKUTXN parent, TOKULOGGER logger, TXN_SNAPSHOT_TYPE snapshot_type, DB_TXN *container_db_txn, bool for_checkpoint, bool read_only); - -int -toku_txn_begin_with_xid ( - TOKUTXN parent, - TOKUTXN *txnp, - TOKULOGGER logger, - TXNID_PAIR xid, - TXN_SNAPSHOT_TYPE snapshot_type, - DB_TXN *container_db_txn, - bool for_recovery, - bool read_only - ) -{ - int r = 0; - TOKUTXN txn; - // check for case where we are trying to - // create too many nested transactions - if (!read_only && parent && !xids_can_create_child(parent->xids)) { - r = EINVAL; - goto exit; - } - if (read_only && parent) { - invariant(txn_declared_read_only(parent)); - } - toku_txn_create_txn(&txn, parent, logger, snapshot_type, container_db_txn, for_recovery, read_only); - // txnid64, snapshot_txnid64 - // will be set in here. - if (for_recovery) { - if (parent == NULL) { - invariant(xid.child_id64 == TXNID_NONE); - toku_txn_manager_start_txn_for_recovery( - txn, - logger->txn_manager, - xid.parent_id64 - ); - } - else { - parent->child_manager->start_child_txn_for_recovery(txn, parent, xid); - } - } - else { - assert(xid.parent_id64 == TXNID_NONE); - assert(xid.child_id64 == TXNID_NONE); - if (parent == NULL) { - toku_txn_manager_start_txn( - txn, - logger->txn_manager, - snapshot_type, - read_only - ); - } - else { - parent->child_manager->start_child_txn(txn, parent); - toku_txn_manager_handle_snapshot_create_for_child_txn( - txn, - logger->txn_manager, - snapshot_type - ); - } - } - if (!read_only) { - // this call will set txn->xids - txn_create_xids(txn, parent); - } - *txnp = txn; -exit: - return r; -} - -DB_TXN * -toku_txn_get_container_db_txn (TOKUTXN tokutxn) { - DB_TXN * container = tokutxn->container_db_txn; - return container; -} - -void toku_txn_set_container_db_txn (TOKUTXN tokutxn, DB_TXN*container) { - tokutxn->container_db_txn = container; -} - -static void invalidate_xa_xid (TOKU_XA_XID *xid) { - TOKU_ANNOTATE_NEW_MEMORY(xid, sizeof(*xid)); // consider it to be all invalid for valgrind - xid->formatID = -1; // According to the XA spec, -1 means "invalid data" -} - -static void toku_txn_create_txn ( - TOKUTXN *tokutxn, - TOKUTXN parent_tokutxn, - TOKULOGGER logger, - TXN_SNAPSHOT_TYPE snapshot_type, - DB_TXN *container_db_txn, - bool for_recovery, - bool read_only - ) -{ - assert(logger->rollback_cachefile); - - omt open_fts; - open_fts.create_no_array(); - - struct txn_roll_info roll_info = { - .num_rollback_nodes = 0, - .num_rollentries = 0, - .num_rollentries_processed = 0, - .rollentry_raw_count = 0, - .spilled_rollback_head = ROLLBACK_NONE, - .spilled_rollback_tail = ROLLBACK_NONE, - .current_rollback = ROLLBACK_NONE, - }; - -static txn_child_manager tcm; - - struct tokutxn new_txn = { - .txnid = {.parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }, - .snapshot_txnid64 = TXNID_NONE, - .snapshot_type = for_recovery ? TXN_SNAPSHOT_NONE : snapshot_type, - .for_recovery = for_recovery, - .logger = logger, - .parent = parent_tokutxn, - .child = NULL, - .child_manager_s = tcm, - .child_manager = NULL, - .container_db_txn = container_db_txn, - .live_root_txn_list = nullptr, - .xids = NULL, - .snapshot_next = NULL, - .snapshot_prev = NULL, - .begin_was_logged = false, - .declared_read_only = read_only, - .do_fsync = false, - .force_fsync_on_commit = false, - .do_fsync_lsn = ZERO_LSN, - .xa_xid = {0}, - .progress_poll_fun = NULL, - .progress_poll_fun_extra = NULL, - .txn_lock = ZERO_MUTEX_INITIALIZER, - .open_fts = open_fts, - .roll_info = roll_info, - .state_lock = ZERO_MUTEX_INITIALIZER, - .state_cond = ZERO_COND_INITIALIZER, - .state = TOKUTXN_LIVE, - .num_pin = 0, - .client_id = 0, - }; - - TOKUTXN result = NULL; - XMEMDUP(result, &new_txn); - invalidate_xa_xid(&result->xa_xid); - if (parent_tokutxn == NULL) { - result->child_manager = &result->child_manager_s; - result->child_manager->init(result); - } - else { - result->child_manager = parent_tokutxn->child_manager; - } - - toku_mutex_init(&result->txn_lock, nullptr); - - toku_pthread_mutexattr_t attr; - toku_mutexattr_init(&attr); - toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE); - toku_mutex_init(&result->state_lock, &attr); - toku_mutexattr_destroy(&attr); - - toku_cond_init(&result->state_cond, nullptr); - - *tokutxn = result; - - if (read_only) { - STATUS_INC(TXN_READ_BEGIN, 1); - } - else { - STATUS_INC(TXN_BEGIN, 1); - } -} - -void -toku_txn_update_xids_in_txn(TOKUTXN txn, TXNID xid) -{ - // these should not have been set yet - invariant(txn->txnid.parent_id64 == TXNID_NONE); - invariant(txn->txnid.child_id64 == TXNID_NONE); - txn->txnid.parent_id64 = xid; - txn->txnid.child_id64 = TXNID_NONE; -} - -//Used on recovery to recover a transaction. -int -toku_txn_load_txninfo (TOKUTXN txn, TXNINFO info) { - txn->roll_info.rollentry_raw_count = info->rollentry_raw_count; - uint32_t i; - for (i = 0; i < info->num_fts; i++) { - FT ft = info->open_fts[i]; - toku_txn_maybe_note_ft(txn, ft); - } - txn->force_fsync_on_commit = info->force_fsync_on_commit; - txn->roll_info.num_rollback_nodes = info->num_rollback_nodes; - txn->roll_info.num_rollentries = info->num_rollentries; - - txn->roll_info.spilled_rollback_head = info->spilled_rollback_head; - txn->roll_info.spilled_rollback_tail = info->spilled_rollback_tail; - txn->roll_info.current_rollback = info->current_rollback; - return 0; -} - -int toku_txn_commit_txn(TOKUTXN txn, int nosync, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) -// Effect: Doesn't close the txn, just performs the commit operations. -// If release_multi_operation_client_lock is true, then unlock that lock (even if an error path is taken) -{ - return toku_txn_commit_with_lsn(txn, nosync, ZERO_LSN, - poll, poll_extra); -} - -struct xcommit_info { - int r; - TOKUTXN txn; -}; - -static void txn_note_commit(TOKUTXN txn) { - // Purpose: - // Delay until any indexer is done pinning this transaction. - // Update status of a transaction from live->committing (or prepared->committing) - // Do so in a thread-safe manner that does not conflict with hot indexing or - // begin checkpoint. - if (toku_txn_is_read_only(txn)) { - // Neither hot indexing nor checkpoint do any work with readonly txns, - // so we can skip taking the txn_manager lock here. - invariant(txn->state==TOKUTXN_LIVE); - txn->state = TOKUTXN_COMMITTING; - goto done; - } - if (txn->state==TOKUTXN_PREPARING) { - invalidate_xa_xid(&txn->xa_xid); - } - // for hot indexing, if hot index is processing - // this transaction in some leafentry, then we cannot change - // the state to commit or abort until - // hot index is done with that leafentry - toku_txn_lock_state(txn); - while (txn->num_pin > 0) { - toku_cond_wait( - &txn->state_cond, - &txn->state_lock - ); - } - txn->state = TOKUTXN_COMMITTING; - toku_txn_unlock_state(txn); -done: - return; -} - -int toku_txn_commit_with_lsn(TOKUTXN txn, int nosync, LSN oplsn, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) -{ - // there should be no child when we commit or abort a TOKUTXN - invariant(txn->child == NULL); - txn_note_commit(txn); - - // Child transactions do not actually 'commit'. They promote their - // changes to parent, so no need to fsync if this txn has a parent. The - // do_sync state is captured in the txn for txn_maybe_fsync_log function - // Additionally, if the transaction was first prepared, we do not need to - // fsync because the prepare caused an fsync of the log. In this case, - // we do not need an additional of the log. We rely on the client running - // recovery to properly recommit this transaction if the commit - // does not make it to disk. In the case of MySQL, that would be the - // binary log. - txn->do_fsync = !txn->parent && (txn->force_fsync_on_commit || (!nosync && txn->roll_info.num_rollentries>0)); - - txn->progress_poll_fun = poll; - txn->progress_poll_fun_extra = poll_extra; - - if (!toku_txn_is_read_only(txn)) { - toku_log_xcommit(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid); - } - // If !txn->begin_was_logged, we could skip toku_rollback_commit - // but it's cheap (only a number of function calls that return immediately) - // since there were no writes. Skipping it would mean we would need to be careful - // in case we added any additional required cleanup into those functions in the future. - int r = toku_rollback_commit(txn, oplsn); - STATUS_INC(TXN_COMMIT, 1); - return r; -} - -int toku_txn_abort_txn(TOKUTXN txn, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) -// Effect: Doesn't close the txn, just performs the abort operations. -// If release_multi_operation_client_lock is true, then unlock that lock (even if an error path is taken) -{ - return toku_txn_abort_with_lsn(txn, ZERO_LSN, poll, poll_extra); -} - -static void txn_note_abort(TOKUTXN txn) { - // Purpose: - // Delay until any indexer is done pinning this transaction. - // Update status of a transaction from live->aborting (or prepared->aborting) - // Do so in a thread-safe manner that does not conflict with hot indexing or - // begin checkpoint. - if (toku_txn_is_read_only(txn)) { - // Neither hot indexing nor checkpoint do any work with readonly txns, - // so we can skip taking the state lock here. - invariant(txn->state==TOKUTXN_LIVE); - txn->state = TOKUTXN_ABORTING; - goto done; - } - if (txn->state==TOKUTXN_PREPARING) { - invalidate_xa_xid(&txn->xa_xid); - } - // for hot indexing, if hot index is processing - // this transaction in some leafentry, then we cannot change - // the state to commit or abort until - // hot index is done with that leafentry - toku_txn_lock_state(txn); - while (txn->num_pin > 0) { - toku_cond_wait( - &txn->state_cond, - &txn->state_lock - ); - } - txn->state = TOKUTXN_ABORTING; - toku_txn_unlock_state(txn); -done: - return; -} - -int toku_txn_abort_with_lsn(TOKUTXN txn, LSN oplsn, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) -{ - // there should be no child when we commit or abort a TOKUTXN - invariant(txn->child == NULL); - txn_note_abort(txn); - - txn->progress_poll_fun = poll; - txn->progress_poll_fun_extra = poll_extra; - txn->do_fsync = false; - - if (!toku_txn_is_read_only(txn)) { - toku_log_xabort(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid); - } - // If !txn->begin_was_logged, we could skip toku_rollback_abort - // but it's cheap (only a number of function calls that return immediately) - // since there were no writes. Skipping it would mean we would need to be careful - // in case we added any additional required cleanup into those functions in the future. - int r = toku_rollback_abort(txn, oplsn); - STATUS_INC(TXN_ABORT, 1); - return r; -} - -static void copy_xid (TOKU_XA_XID *dest, TOKU_XA_XID *source) { - TOKU_ANNOTATE_NEW_MEMORY(dest, sizeof(*dest)); - dest->formatID = source->formatID; - dest->gtrid_length = source->gtrid_length; - dest->bqual_length = source->bqual_length; - memcpy(dest->data, source->data, source->gtrid_length+source->bqual_length); -} - -void toku_txn_prepare_txn (TOKUTXN txn, TOKU_XA_XID *xa_xid) { - if (txn->parent || toku_txn_is_read_only(txn)) { - // We do not prepare children. - // - // Readonly transactions do the same if they commit or abort, so - // XA guarantees are free. No need to pay for overhead of prepare. - return; - } - assert(txn->state==TOKUTXN_LIVE); - // This state transition must be protected against begin_checkpoint - // Therefore, the caller must have the mo lock held - toku_txn_lock_state(txn); - txn->state = TOKUTXN_PREPARING; - toku_txn_unlock_state(txn); - // Do we need to do an fsync? - txn->do_fsync = (txn->force_fsync_on_commit || txn->roll_info.num_rollentries>0); - copy_xid(&txn->xa_xid, xa_xid); - // This list will go away with #4683, so we wn't need the ydb lock for this anymore. - toku_log_xprepare(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid, xa_xid); -} - -void toku_txn_get_prepared_xa_xid (TOKUTXN txn, TOKU_XA_XID *xid) { - copy_xid(xid, &txn->xa_xid); -} - -int toku_logger_recover_txn (TOKULOGGER logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags) { - return toku_txn_manager_recover_root_txn( - logger->txn_manager, - preplist, - count, - retp, - flags - ); -} - -void toku_txn_maybe_fsync_log(TOKULOGGER logger, LSN do_fsync_lsn, bool do_fsync) { - if (logger && do_fsync) { - toku_logger_fsync_if_lsn_not_fsynced(logger, do_fsync_lsn); - } -} - -void toku_txn_get_fsync_info(TOKUTXN ttxn, bool* do_fsync, LSN* do_fsync_lsn) { - *do_fsync = ttxn->do_fsync; - *do_fsync_lsn = ttxn->do_fsync_lsn; -} - -void toku_txn_close_txn(TOKUTXN txn) { - toku_txn_complete_txn(txn); - toku_txn_destroy_txn(txn); -} - -int remove_txn (const FT &h, const uint32_t UU(idx), TOKUTXN const txn); -int remove_txn (const FT &h, const uint32_t UU(idx), TOKUTXN const UU(txn)) -// Effect: This function is called on every open FT that a transaction used. -// This function removes the transaction from that FT. -{ - toku_ft_remove_txn_ref(h); - - return 0; -} - -// for every BRT in txn, remove it. -static void note_txn_closing (TOKUTXN txn) { - txn->open_fts.iterate(txn); -} - -void toku_txn_complete_txn(TOKUTXN txn) { - assert(txn->roll_info.spilled_rollback_head.b == ROLLBACK_NONE.b); - assert(txn->roll_info.spilled_rollback_tail.b == ROLLBACK_NONE.b); - assert(txn->roll_info.current_rollback.b == ROLLBACK_NONE.b); - assert(txn->num_pin == 0); - assert(txn->state == TOKUTXN_COMMITTING || txn->state == TOKUTXN_ABORTING); - if (txn->parent) { - toku_txn_manager_handle_snapshot_destroy_for_child_txn( - txn, - txn->logger->txn_manager, - txn->snapshot_type - ); - txn->parent->child_manager->finish_child_txn(txn); - } - else { - toku_txn_manager_finish_txn(txn->logger->txn_manager, txn); - txn->child_manager->destroy(); - } - // note that here is another place we depend on - // this function being called with the multi operation lock - note_txn_closing(txn); -} - -void toku_txn_destroy_txn(TOKUTXN txn) { - txn->open_fts.destroy(); - if (txn->xids) { - xids_destroy(&txn->xids); - } - toku_mutex_destroy(&txn->txn_lock); - toku_mutex_destroy(&txn->state_lock); - toku_cond_destroy(&txn->state_cond); - toku_free(txn); -} - -XIDS toku_txn_get_xids (TOKUTXN txn) { - if (txn==0) return xids_get_root_xids(); - else return txn->xids; -} - -void toku_txn_force_fsync_on_commit(TOKUTXN txn) { - txn->force_fsync_on_commit = true; -} - -TXNID toku_get_oldest_in_live_root_txn_list(TOKUTXN txn) { - TXNID xid; - if (txn->live_root_txn_list->size()>0) { - int r = txn->live_root_txn_list->fetch(0, &xid); - assert_zero(r); - } - else { - xid = TXNID_NONE; - } - return xid; -} - -bool toku_is_txn_in_live_root_txn_list(const xid_omt_t &live_root_txn_list, TXNID xid) { - TXNID txnid; - bool retval = false; - int r = live_root_txn_list.find_zero(xid, &txnid, nullptr); - if (r==0) { - invariant(txnid == xid); - retval = true; - } - else { - invariant(r==DB_NOTFOUND); - } - return retval; -} - -TOKUTXN_STATE -toku_txn_get_state(TOKUTXN txn) { - return txn->state; -} - -static void -maybe_log_begin_txn_for_write_operation_unlocked(TOKUTXN txn) { - // We now hold the lock. - if (txn->begin_was_logged) { - return; - } - TOKUTXN parent; - parent = txn->parent; - TXNID_PAIR xid; - xid = txn->txnid; - TXNID_PAIR pxid; - pxid = TXNID_PAIR_NONE; - if (parent) { - // Recursively log parent first if necessary. - // Transactions cannot do work if they have children, - // so the lowest level child's lock is sufficient for ancestors. - maybe_log_begin_txn_for_write_operation_unlocked(parent); - pxid = parent->txnid; - } - - toku_log_xbegin(txn->logger, NULL, 0, xid, pxid); - txn->begin_was_logged = true; -} - -void -toku_maybe_log_begin_txn_for_write_operation(TOKUTXN txn) { - toku_txn_lock(txn); - maybe_log_begin_txn_for_write_operation_unlocked(txn); - toku_txn_unlock(txn); -} - -bool -toku_txn_is_read_only(TOKUTXN txn) { - // No need to recursively check children because parents are - // recursively logged before children. - if (!txn->begin_was_logged) { - // Did no work. - invariant(txn->roll_info.num_rollentries == 0); - invariant(txn->do_fsync_lsn.lsn == ZERO_LSN.lsn); - invariant(txn->open_fts.size() == 0); - invariant(txn->num_pin==0); - return true; - } - return false; -} - -// needed for hot indexing -void toku_txn_lock_state(TOKUTXN txn) { - toku_mutex_lock(&txn->state_lock); -} -void toku_txn_unlock_state(TOKUTXN txn){ - toku_mutex_unlock(&txn->state_lock); -} - - -// prevents a client thread from transitioning txn from LIVE|PREPARING -> COMMITTING|ABORTING -// hot indexing may need a transactions to stay in the LIVE|PREPARING state while it processes -// a leafentry. -void toku_txn_pin_live_txn_unlocked(TOKUTXN txn) { - assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING); - assert(!toku_txn_is_read_only(txn)); - txn->num_pin++; -} - -// allows a client thread to go back to being able to transition txn -// from LIVE|PREPARING -> COMMITTING|ABORTING -void toku_txn_unpin_live_txn(TOKUTXN txn) { - assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING); - assert(txn->num_pin > 0); - toku_txn_lock_state(txn); - txn->num_pin--; - if (txn->num_pin == 0) { - toku_cond_broadcast(&txn->state_cond); - } - toku_txn_unlock_state(txn); -} - -bool toku_txn_has_spilled_rollback(TOKUTXN txn) { - return txn_has_spilled_rollback_logs(txn); -} - -uint64_t toku_txn_get_client_id(TOKUTXN txn) { - return txn->client_id; -} - -void toku_txn_set_client_id(TOKUTXN txn, uint64_t client_id) { - txn->client_id = client_id; -} - -#include -void __attribute__((__constructor__)) toku_txn_status_helgrind_ignore(void); -void toku_txn_status_helgrind_ignore(void) { - TOKU_VALGRIND_HG_DISABLE_CHECKING(&txn_status, sizeof txn_status); -} - -#undef STATUS_VALUE diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn_child_manager.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn_child_manager.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn_child_manager.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn_child_manager.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,194 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id: rollback.cc 49033 2012-10-17 18:48:30Z zardosht $" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "log-internal.h" -#include "txn_child_manager.h" - -// -// initialized a txn_child_manager, -// when called, root->txnid.parent_id64 may not yet be set -// -void txn_child_manager::init(TOKUTXN root) { - invariant(root->txnid.child_id64 == TXNID_NONE); - invariant(root->parent == NULL); - m_root = root; - m_last_xid = TXNID_NONE; - ZERO_STRUCT(m_mutex); - - toku_pthread_mutexattr_t attr; - toku_mutexattr_init(&attr); - toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE); - toku_mutex_init(&m_mutex, &attr); - toku_mutexattr_destroy(&attr); -} - -void txn_child_manager::destroy() { - toku_mutex_destroy(&m_mutex); -} - -void txn_child_manager::start_child_txn_for_recovery(TOKUTXN child, TOKUTXN parent, TXNID_PAIR txnid) { - invariant(parent->txnid.parent_id64 == m_root->txnid.parent_id64); - invariant(txnid.parent_id64 == m_root->txnid.parent_id64); - - child->txnid = txnid; - toku_mutex_lock(&m_mutex); - if (txnid.child_id64 > m_last_xid) { - m_last_xid = txnid.child_id64; - } - parent->child = child; - toku_mutex_unlock(&m_mutex); -} - -void txn_child_manager::start_child_txn(TOKUTXN child, TOKUTXN parent) { - invariant(parent->txnid.parent_id64 == m_root->txnid.parent_id64); - child->txnid.parent_id64 = m_root->txnid.parent_id64; - toku_mutex_lock(&m_mutex); - - ++m_last_xid; - // Here we ensure that the child_id64 is never equal to the parent_id64 - // We do this to make this feature work more easily with the XIDs - // struct and message application. The XIDs struct stores the parent id - // as the first TXNID, and subsequent TXNIDs store child ids. So, if we - // have a case where the parent id is the same as the child id, we will - // have to do some tricky maneuvering in the message application code - // in ule.cc. So, to lessen the probability of bugs, we ensure that the - // parent id is not the same as the child id. - if (m_last_xid == m_root->txnid.parent_id64) { - ++m_last_xid; - } - child->txnid.child_id64 = m_last_xid; - - parent->child = child; - toku_mutex_unlock(&m_mutex); -} - -void txn_child_manager::finish_child_txn(TOKUTXN child) { - invariant(child->txnid.parent_id64 == m_root->txnid.parent_id64); - toku_mutex_lock(&m_mutex); - child->parent->child = NULL; - toku_mutex_unlock(&m_mutex); -} - -void txn_child_manager::suspend() { - toku_mutex_lock(&m_mutex); -} - -void txn_child_manager::resume() { - toku_mutex_unlock(&m_mutex); -} - -void txn_child_manager::find_tokutxn_by_xid_unlocked(TXNID_PAIR xid, TOKUTXN* result) { - invariant(xid.parent_id64 == m_root->txnid.parent_id64); - TOKUTXN curr_txn = m_root; - while (curr_txn != NULL) { - if (xid.child_id64 == curr_txn->txnid.child_id64) { - *result = curr_txn; - break; - } - curr_txn = curr_txn->child; - } -} - -int txn_child_manager::iterate(txn_mgr_iter_callback cb, void* extra) { - TOKUTXN curr_txn = m_root; - int ret = 0; - toku_mutex_lock(&m_mutex); - while (curr_txn != NULL) { - ret = cb(curr_txn, extra); - if (ret != 0) { - break; - } - curr_txn = curr_txn->child; - } - toku_mutex_unlock(&m_mutex); - return ret; -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn_child_manager.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn_child_manager.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn_child_manager.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn_child_manager.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,120 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_TXN_CHILD_MANAGER_H -#define TOKU_TXN_CHILD_MANAGER_H - -#ident "$Id: rollback.h 49033 2012-10-17 18:48:30Z zardosht $" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "txn_manager.h" - -class txn_child_manager { -public: - void init (TOKUTXN root); - void destroy(); - void start_child_txn_for_recovery(TOKUTXN child, TOKUTXN parent, TXNID_PAIR txnid); - void start_child_txn(TOKUTXN child, TOKUTXN parent); - void finish_child_txn(TOKUTXN child); - void suspend(); - void resume(); - void find_tokutxn_by_xid_unlocked(TXNID_PAIR xid, TOKUTXN* result); - int iterate(txn_mgr_iter_callback cb, void* extra); - -private: - TXNID m_last_xid; - TOKUTXN m_root; - toku_mutex_t m_mutex; - -friend class txn_child_manager_unit_test; -}; - - -ENSURE_POD(txn_child_manager); - -#endif // TOKU_TXN_CHILD_MANAGER_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,225 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKUTXN_H -#define TOKUTXN_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "txn_manager.h" - -void txn_status_init(void); -void txn_status_destroy(void); - - -inline bool txn_pair_is_none(TXNID_PAIR txnid) { - return txnid.parent_id64 == TXNID_NONE && txnid.child_id64 == TXNID_NONE; -} - -inline bool txn_needs_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, TOKUTXN parent) { - // we need a snapshot if the snapshot type is a child or - // if the snapshot type is root and we have no parent. - // Cases that we don't need a snapshot: when snapshot type is NONE - // or when it is ROOT and we have a parent - return (snapshot_type != TXN_SNAPSHOT_NONE && (parent==NULL || snapshot_type == TXN_SNAPSHOT_CHILD)); -} - -void toku_txn_lock(TOKUTXN txn); -void toku_txn_unlock(TOKUTXN txn); - -uint64_t toku_txn_get_root_id(TOKUTXN txn); -bool txn_declared_read_only(TOKUTXN txn); - -int toku_txn_begin_txn ( - DB_TXN *container_db_txn, - TOKUTXN parent_tokutxn, - TOKUTXN *tokutxn, - TOKULOGGER logger, - TXN_SNAPSHOT_TYPE snapshot_type, - bool read_only - ); - -DB_TXN * toku_txn_get_container_db_txn (TOKUTXN tokutxn); -void toku_txn_set_container_db_txn (TOKUTXN, DB_TXN*); - -// toku_txn_begin_with_xid is called from recovery and has no containing DB_TXN -int toku_txn_begin_with_xid ( - TOKUTXN parent_tokutxn, - TOKUTXN *tokutxn, - TOKULOGGER logger, - TXNID_PAIR xid, - TXN_SNAPSHOT_TYPE snapshot_type, - DB_TXN *container_db_txn, - bool for_recovery, - bool read_only - ); - -void toku_txn_update_xids_in_txn(TOKUTXN txn, TXNID xid); - -int toku_txn_load_txninfo (TOKUTXN txn, TXNINFO info); - -int toku_txn_commit_txn (TOKUTXN txn, int nosync, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); -int toku_txn_commit_with_lsn(TOKUTXN txn, int nosync, LSN oplsn, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); - -int toku_txn_abort_txn(TOKUTXN txn, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); -int toku_txn_abort_with_lsn(TOKUTXN txn, LSN oplsn, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); - -void toku_txn_prepare_txn (TOKUTXN txn, TOKU_XA_XID *xid); -// Effect: Do the internal work of preparing a transaction (does not log the prepare record). - -void toku_txn_get_prepared_xa_xid (TOKUTXN, TOKU_XA_XID *); -// Effect: Fill in the XID information for a transaction. The caller allocates the XID and the function fills in values. - -void toku_txn_maybe_fsync_log(TOKULOGGER logger, LSN do_fsync_lsn, bool do_fsync); - -void toku_txn_get_fsync_info(TOKUTXN ttxn, bool* do_fsync, LSN* do_fsync_lsn); - -// Complete and destroy a txn -void toku_txn_close_txn(TOKUTXN txn); - -// Remove a txn from any live txn lists -void toku_txn_complete_txn(TOKUTXN txn); - -// Free the memory of a txn -void toku_txn_destroy_txn(TOKUTXN txn); - -XIDS toku_txn_get_xids (TOKUTXN); - -// Force fsync on commit -void toku_txn_force_fsync_on_commit(TOKUTXN txn); - -typedef enum { - TXN_BEGIN, // total number of transactions begun (does not include recovered txns) - TXN_READ_BEGIN, // total number of read only transactions begun (does not include recovered txns) - TXN_COMMIT, // successful commits - TXN_ABORT, - TXN_STATUS_NUM_ROWS -} txn_status_entry; - -typedef struct { - bool initialized; - TOKU_ENGINE_STATUS_ROW_S status[TXN_STATUS_NUM_ROWS]; -} TXN_STATUS_S, *TXN_STATUS; - -void toku_txn_get_status(TXN_STATUS s); - -bool toku_is_txn_in_live_root_txn_list(const xid_omt_t &live_root_txn_list, TXNID xid); - -TXNID toku_get_oldest_in_live_root_txn_list(TOKUTXN txn); - -#include "txn_state.h" - -TOKUTXN_STATE toku_txn_get_state(TOKUTXN txn); - -struct tokulogger_preplist { - TOKU_XA_XID xid; - DB_TXN *txn; -}; -int toku_logger_recover_txn (TOKULOGGER logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags); - -void toku_maybe_log_begin_txn_for_write_operation(TOKUTXN txn); - -// Return whether txn (or it's descendents) have done no work. -bool toku_txn_is_read_only(TOKUTXN txn); - -void toku_txn_lock_state(TOKUTXN txn); -void toku_txn_unlock_state(TOKUTXN txn); -void toku_txn_pin_live_txn_unlocked(TOKUTXN txn); -void toku_txn_unpin_live_txn(TOKUTXN txn); - -bool toku_txn_has_spilled_rollback(TOKUTXN txn); - -uint64_t toku_txn_get_client_id(TOKUTXN txn); -void toku_txn_set_client_id(TOKUTXN txn, uint64_t client_id); - -#endif //TOKUTXN_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn_manager.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn_manager.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn_manager.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn_manager.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,1045 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include - -#include - -#include "log-internal.h" -#include "txn.h" -#include "checkpoint.h" -#include "ule.h" -#include "txn_manager.h" -#include "rollback.h" - -bool garbage_collection_debug = false; - -// internal locking functions, should use this instead of accessing lock directly -static void txn_manager_lock(TXN_MANAGER txn_manager); -static void txn_manager_unlock(TXN_MANAGER txn_manager); - -#if 0 -static bool is_txnid_live(TXN_MANAGER txn_manager, TXNID txnid) { - TOKUTXN result = NULL; - toku_txn_manager_id2txn_unlocked(txn_manager, txnid, &result); - return (result != NULL); -} -#endif - -//Heaviside function to search through an OMT by a TXNID -int find_by_xid (const TOKUTXN &txn, const TXNID &txnidfind); - -static bool is_txnid_live(TXN_MANAGER txn_manager, TXNID txnid) { - TOKUTXN result = NULL; - TXNID_PAIR id = { .parent_id64 = txnid, .child_id64 = TXNID_NONE }; - toku_txn_manager_id2txn_unlocked(txn_manager, id, &result); - return (result != NULL); -} - -static void toku_txn_manager_clone_state_for_gc_unlocked( - TXN_MANAGER txn_manager, - xid_omt_t* snapshot_xids, - rx_omt_t* referenced_xids, - xid_omt_t* live_root_txns - ); - -static void -verify_snapshot_system(TXN_MANAGER txn_manager UU()) { - uint32_t num_snapshot_txnids = txn_manager->num_snapshots; - TXNID snapshot_txnids[num_snapshot_txnids]; - TOKUTXN snapshot_txns[num_snapshot_txnids]; - uint32_t num_live_txns = txn_manager->live_root_txns.size(); - TOKUTXN live_txns[num_live_txns]; - uint32_t num_referenced_xid_tuples = txn_manager->referenced_xids.size(); - struct referenced_xid_tuple *referenced_xid_tuples[num_referenced_xid_tuples]; - - // do this to get an omt of snapshot_txnids - xid_omt_t snapshot_txnids_omt; - rx_omt_t referenced_xids_omt; - xid_omt_t live_root_txns_omt; - toku_txn_manager_clone_state_for_gc_unlocked( - txn_manager, - &snapshot_txnids_omt, - &referenced_xids_omt, - &live_root_txns_omt - ); - - int r; - uint32_t i; - uint32_t j; - //set up arrays for easier access - { - TOKUTXN curr_txn = txn_manager->snapshot_head; - uint32_t curr_index = 0; - while (curr_txn != NULL) { - snapshot_txns[curr_index] = curr_txn; - snapshot_txnids[curr_index] = curr_txn->snapshot_txnid64; - curr_txn = curr_txn->snapshot_next; - curr_index++; - } - } - - for (i = 0; i < num_live_txns; i++) { - r = txn_manager->live_root_txns.fetch(i, &live_txns[i]); - assert_zero(r); - } - for (i = 0; i < num_referenced_xid_tuples; i++) { - r = txn_manager->referenced_xids.fetch(i, &referenced_xid_tuples[i]); - assert_zero(r); - } - - { - //Verify snapshot_txnids - for (i = 0; i < num_snapshot_txnids; i++) { - TXNID snapshot_xid = snapshot_txnids[i]; - TOKUTXN snapshot_txn = snapshot_txns[i]; - uint32_t num_live_root_txn_list = snapshot_txn->live_root_txn_list->size(); - TXNID live_root_txn_list[num_live_root_txn_list]; - { - for (j = 0; j < num_live_root_txn_list; j++) { - r = snapshot_txn->live_root_txn_list->fetch(j, &live_root_txn_list[j]); - assert_zero(r); - } - } - { - // Only committed entries have return a youngest. - TXNID youngest = toku_get_youngest_live_list_txnid_for( - snapshot_xid, - snapshot_txnids_omt, - txn_manager->referenced_xids - ); - invariant(youngest == TXNID_NONE); - } - for (j = 0; j < num_live_root_txn_list; j++) { - TXNID live_xid = live_root_txn_list[j]; - invariant(live_xid <= snapshot_xid); - TXNID youngest = toku_get_youngest_live_list_txnid_for( - live_xid, - snapshot_txnids_omt, - txn_manager->referenced_xids - ); - if (is_txnid_live(txn_manager, live_xid)) { - // Only committed entries have return a youngest. - invariant(youngest == TXNID_NONE); - } - else { - invariant(youngest != TXNID_NONE); - // A committed entry might have been read-only, in which case it won't return anything. - // This snapshot reads 'live_xid' so it's youngest cannot be older than snapshot_xid. - invariant(youngest >= snapshot_xid); - } - } - } - } - { - // Verify referenced_xids. - for (i = 0; i < num_referenced_xid_tuples; i++) { - struct referenced_xid_tuple *tuple = referenced_xid_tuples[i]; - invariant(tuple->begin_id < tuple->end_id); - invariant(tuple->references > 0); - - { - //verify neither pair->begin_id nor end_id is in live_list - r = txn_manager->live_root_txns.find_zero(tuple->begin_id, nullptr, nullptr); - invariant(r == DB_NOTFOUND); - r = txn_manager->live_root_txns.find_zero(tuple->end_id, nullptr, nullptr); - invariant(r == DB_NOTFOUND); - } - { - //verify neither pair->begin_id nor end_id is in snapshot_xids - TOKUTXN curr_txn = txn_manager->snapshot_head; - uint32_t curr_index = 0; - while (curr_txn != NULL) { - invariant(tuple->begin_id != curr_txn->txnid.parent_id64); - invariant(tuple->end_id != curr_txn->txnid.parent_id64); - curr_txn = curr_txn->snapshot_next; - curr_index++; - } - } - { - // Verify number of references is correct - uint32_t refs_found = 0; - for (j = 0; j < num_snapshot_txnids; j++) { - TOKUTXN snapshot_txn = snapshot_txns[j]; - if (toku_is_txn_in_live_root_txn_list(*snapshot_txn->live_root_txn_list, tuple->begin_id)) { - refs_found++; - } - invariant(!toku_is_txn_in_live_root_txn_list( - *snapshot_txn->live_root_txn_list, - tuple->end_id)); - } - invariant(refs_found == tuple->references); - } - { - // Verify youngest makes sense. - TXNID youngest = toku_get_youngest_live_list_txnid_for( - tuple->begin_id, - snapshot_txnids_omt, - txn_manager->referenced_xids - ); - invariant(youngest != TXNID_NONE); - invariant(youngest > tuple->begin_id); - invariant(youngest < tuple->end_id); - // Youngest must be found, and must be a snapshot txn - r = snapshot_txnids_omt.find_zero(youngest, nullptr, nullptr); - invariant_zero(r); - } - } - } - snapshot_txnids_omt.destroy(); - referenced_xids_omt.destroy(); - live_root_txns_omt.destroy(); -} - -void toku_txn_manager_init(TXN_MANAGER* txn_managerp) { - TXN_MANAGER XCALLOC(txn_manager); - toku_mutex_init(&txn_manager->txn_manager_lock, NULL); - txn_manager->live_root_txns.create(); - txn_manager->live_root_ids.create(); - txn_manager->snapshot_head = NULL; - txn_manager->snapshot_tail = NULL; - txn_manager->num_snapshots = 0; - txn_manager->referenced_xids.create(); - txn_manager->last_xid = 0; - - txn_manager->last_xid_seen_for_recover = TXNID_NONE; - txn_manager->last_calculated_oldest_referenced_xid = TXNID_NONE; - - *txn_managerp = txn_manager; -} - -void toku_txn_manager_destroy(TXN_MANAGER txn_manager) { - toku_mutex_destroy(&txn_manager->txn_manager_lock); - invariant(txn_manager->live_root_txns.size() == 0); - txn_manager->live_root_txns.destroy(); - invariant(txn_manager->live_root_ids.size() == 0); - txn_manager->live_root_ids.destroy(); - invariant(txn_manager->snapshot_head == NULL); - invariant(txn_manager->referenced_xids.size() == 0); - txn_manager->referenced_xids.destroy(); - toku_free(txn_manager); -} - -TXNID -toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager) { - TOKUTXN rtxn = NULL; - TXNID rval = TXNID_NONE_LIVING; - txn_manager_lock(txn_manager); - - if (txn_manager->live_root_txns.size() > 0) { - int r = txn_manager->live_root_txns.fetch(0, &rtxn); - invariant_zero(r); - } - if (rtxn) { - rval = rtxn->txnid.parent_id64; - } - txn_manager_unlock(txn_manager); - return rval; -} - -TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager) { - return txn_manager->last_calculated_oldest_referenced_xid; -} - -int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids); -int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids){ - (*referenced_xids)[index] = live_xid->txnid.parent_id64; - return 0; -} - - -// Create list of root transactions that were live when this txn began. -static inline void -setup_live_root_txn_list(xid_omt_t* live_root_txnid, xid_omt_t* live_root_txn_list) { - live_root_txn_list->clone(*live_root_txnid); -} - -//Heaviside function to search through an OMT by a TXNID -int -find_by_xid (const TOKUTXN &txn, const TXNID &txnidfind) { - if (txn->txnid.parent_id64 < txnidfind) return -1; - if (txn->txnid.parent_id64 > txnidfind) return +1; - return 0; -} - -static TXNID -max_xid(TXNID a, TXNID b) { - return a < b ? b : a; -} - -static void set_oldest_referenced_xid(TXN_MANAGER txn_manager) { - TXNID oldest_referenced_xid = TXNID_MAX; - int r; - if (txn_manager->live_root_ids.size() > 0) { - r = txn_manager->live_root_ids.fetch(0, &oldest_referenced_xid); - // this function should only be called when we know there is at least - // one live transaction - invariant_zero(r); - } - - if (txn_manager->referenced_xids.size() > 0) { - struct referenced_xid_tuple* tuple; - r = txn_manager->referenced_xids.fetch(0, &tuple); - if (r == 0 && tuple->begin_id < oldest_referenced_xid) { - oldest_referenced_xid = tuple->begin_id; - } - } - if (txn_manager->snapshot_head != NULL) { - TXNID id = txn_manager->snapshot_head->snapshot_txnid64; - if (id < oldest_referenced_xid) { - oldest_referenced_xid = id; - } - } - if (txn_manager->last_xid < oldest_referenced_xid) { - oldest_referenced_xid = txn_manager->last_xid; - } - invariant(oldest_referenced_xid != TXNID_MAX); - txn_manager->last_calculated_oldest_referenced_xid = oldest_referenced_xid; -} - -//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index) -// template-only function, but must be extern -int find_xid (const TOKUTXN &txn, const TOKUTXN &txnfind); -int -find_xid (const TOKUTXN &txn, const TOKUTXN &txnfind) -{ - if (txn->txnid.parent_id64 < txnfind->txnid.parent_id64) return -1; - if (txn->txnid.parent_id64 > txnfind->txnid.parent_id64) return +1; - return 0; -} - -static inline void txn_manager_create_snapshot_unlocked( - TXN_MANAGER txn_manager, - TOKUTXN txn - ) -{ - txn->snapshot_txnid64 = ++txn_manager->last_xid; - setup_live_root_txn_list(&txn_manager->live_root_ids, txn->live_root_txn_list); - // Add this txn to the global list of txns that have their own snapshots. - // (Note, if a txn is a child that creates its own snapshot, then that child xid - // is the xid stored in the global list.) - if (txn_manager->snapshot_head == NULL) { - invariant(txn_manager->snapshot_tail == NULL); - txn_manager->snapshot_head = txn; - txn_manager->snapshot_tail = txn; - } - else { - txn_manager->snapshot_tail->snapshot_next = txn; - txn->snapshot_prev = txn_manager->snapshot_tail; - txn_manager->snapshot_tail = txn; - } - txn_manager->num_snapshots++; -} - -// template-only function, but must be extern -int find_tuple_by_xid (const struct referenced_xid_tuple &tuple, const TXNID &xidfind); -int -find_tuple_by_xid (const struct referenced_xid_tuple &tuple, const TXNID &xidfind) -{ - if (tuple.begin_id < xidfind) return -1; - if (tuple.begin_id > xidfind) return +1; - return 0; -} - -// template-only function, but must be extern -int referenced_xids_note_snapshot_txn_end_iter(const TXNID &live_xid, const uint32_t UU(index), rx_omt_t *const referenced_xids) - __attribute__((nonnull(3))); -int referenced_xids_note_snapshot_txn_end_iter(const TXNID &live_xid, const uint32_t UU(index), rx_omt_t *const referenced_xids) -{ - int r; - uint32_t idx; - struct referenced_xid_tuple *tuple; - - r = referenced_xids->find_zero(live_xid, &tuple, &idx); - if (r == DB_NOTFOUND) { - goto done; - } - invariant_zero(r); - invariant(tuple->references > 0); - if (--tuple->references == 0) { - r = referenced_xids->delete_at(idx); - lazy_assert_zero(r); - } -done: - return 0; -} - -// When txn ends, update reverse live list. To do that, examine each txn in this (closing) txn's live list. -static inline int -note_snapshot_txn_end_by_ref_xids(TXN_MANAGER mgr, const xid_omt_t &live_root_txn_list) { - int r; - r = live_root_txn_list.iterate(&mgr->referenced_xids); - invariant_zero(r); - return r; -} - -typedef struct snapshot_iter_extra { - uint32_t* indexes_to_delete; - uint32_t num_indexes; - xid_omt_t* live_root_txn_list; -} SNAPSHOT_ITER_EXTRA; - -// template-only function, but must be extern -int note_snapshot_txn_end_by_txn_live_list_iter(referenced_xid_tuple* tuple, const uint32_t index, SNAPSHOT_ITER_EXTRA *const sie) - __attribute__((nonnull(3))); -int note_snapshot_txn_end_by_txn_live_list_iter( - referenced_xid_tuple* tuple, - const uint32_t index, - SNAPSHOT_ITER_EXTRA *const sie - ) -{ - int r; - uint32_t idx; - TXNID txnid; - r = sie->live_root_txn_list->find_zero(tuple->begin_id, &txnid, &idx); - if (r == DB_NOTFOUND) { - goto done; - } - invariant_zero(r); - invariant(txnid == tuple->begin_id); - invariant(tuple->references > 0); - if (--tuple->references == 0) { - sie->indexes_to_delete[sie->num_indexes] = index; - sie->num_indexes++; - } -done: - return 0; -} - -static inline int -note_snapshot_txn_end_by_txn_live_list(TXN_MANAGER mgr, xid_omt_t* live_root_txn_list) { - uint32_t size = mgr->referenced_xids.size(); - uint32_t indexes_to_delete[size]; - SNAPSHOT_ITER_EXTRA sie = { .indexes_to_delete = indexes_to_delete, .num_indexes = 0, .live_root_txn_list = live_root_txn_list}; - mgr->referenced_xids.iterate_ptr(&sie); - for (uint32_t i = 0; i < sie.num_indexes; i++) { - uint32_t curr_index = sie.indexes_to_delete[sie.num_indexes-i-1]; - mgr->referenced_xids.delete_at(curr_index); - } - return 0; -} - -static inline void txn_manager_remove_snapshot_unlocked( - TOKUTXN txn, - TXN_MANAGER txn_manager - ) -{ - // Remove from linked list of snapshot txns - if (txn_manager->snapshot_head == txn) { - txn_manager->snapshot_head = txn->snapshot_next; - } - if (txn_manager->snapshot_tail == txn) { - txn_manager->snapshot_tail = txn->snapshot_prev; - } - if (txn->snapshot_next) { - txn->snapshot_next->snapshot_prev = txn->snapshot_prev; - } - if (txn->snapshot_prev) { - txn->snapshot_prev->snapshot_next = txn->snapshot_next; - } - txn_manager->num_snapshots--; - uint32_t ref_xids_size = txn_manager->referenced_xids.size(); - uint32_t live_list_size = txn->live_root_txn_list->size(); - if (ref_xids_size > 0 && live_list_size > 0) { - if (live_list_size > ref_xids_size && ref_xids_size < 2000) { - note_snapshot_txn_end_by_txn_live_list(txn_manager, txn->live_root_txn_list); - } - else { - note_snapshot_txn_end_by_ref_xids(txn_manager, *txn->live_root_txn_list); - } - } -} - -static inline void inherit_snapshot_from_parent(TOKUTXN child) { - if (child->parent) { - child->snapshot_txnid64 = child->parent->snapshot_txnid64; - child->live_root_txn_list = child->parent->live_root_txn_list; - } -} -void toku_txn_manager_handle_snapshot_create_for_child_txn( - TOKUTXN txn, - TXN_MANAGER txn_manager, - TXN_SNAPSHOT_TYPE snapshot_type - ) -{ - // this is a function for child txns, so just doint a sanity check - invariant(txn->parent != NULL); - bool needs_snapshot = txn_needs_snapshot(snapshot_type, txn->parent); - if (needs_snapshot) { - invariant(txn->live_root_txn_list == nullptr); - XMALLOC(txn->live_root_txn_list); - txn_manager_lock(txn_manager); - txn_manager_create_snapshot_unlocked(txn_manager, txn); - txn_manager_unlock(txn_manager); - } - else { - inherit_snapshot_from_parent(txn); - } -} - -void toku_txn_manager_handle_snapshot_destroy_for_child_txn( - TOKUTXN txn, - TXN_MANAGER txn_manager, - TXN_SNAPSHOT_TYPE snapshot_type - ) -{ - // this is a function for child txns, so just doint a sanity check - invariant(txn->parent != NULL); - bool is_snapshot = txn_needs_snapshot(snapshot_type, txn->parent); - if (is_snapshot) { - txn_manager_lock(txn_manager); - txn_manager_remove_snapshot_unlocked(txn, txn_manager); - txn_manager_unlock(txn_manager); - invariant(txn->live_root_txn_list != nullptr); - txn->live_root_txn_list->destroy(); - toku_free(txn->live_root_txn_list); - } -} - -void toku_txn_manager_start_txn_for_recovery( - TOKUTXN txn, - TXN_MANAGER txn_manager, - TXNID xid - ) -{ - txn_manager_lock(txn_manager); - // using xid that is passed in - txn_manager->last_xid = max_xid(txn_manager->last_xid, xid); - toku_txn_update_xids_in_txn(txn, xid); - - uint32_t idx; - int r = txn_manager->live_root_txns.find_zero(txn, nullptr, &idx); - invariant(r == DB_NOTFOUND); - r = txn_manager->live_root_txns.insert_at(txn, idx); - invariant_zero(r); - r = txn_manager->live_root_ids.insert_at(txn->txnid.parent_id64, idx); - invariant_zero(r); - - txn_manager_unlock(txn_manager); -} - -void toku_txn_manager_start_txn( - TOKUTXN txn, - TXN_MANAGER txn_manager, - TXN_SNAPSHOT_TYPE snapshot_type, - bool read_only - ) -{ - int r; - TXNID xid = TXNID_NONE; - // if we are running in recovery, we don't need to make snapshots - bool needs_snapshot = txn_needs_snapshot(snapshot_type, NULL); - - // perform a malloc outside of the txn_manager lock - // will be used in txn_manager_create_snapshot_unlocked below - if (needs_snapshot) { - invariant(txn->live_root_txn_list == nullptr); - XMALLOC(txn->live_root_txn_list); - } - // the act of getting a transaction ID and adding the - // txn to the proper OMTs must be atomic. MVCC depends - // on this. - txn_manager_lock(txn_manager); - if (garbage_collection_debug) { - verify_snapshot_system(txn_manager); - } - - // - // maintain the data structures necessary for MVCC: - // 1. add txn to list of live_root_txns if this is a root transaction - // 2. if the transaction is creating a snapshot: - // - create a live list for the transaction - // - add the id to the list of snapshot ids - // - // The order of operations is important here, and must be taken - // into account when the transaction is closed. The txn is added - // to the live_root_txns first (if it is a root txn). This has the implication - // that a root level snapshot transaction is in its own live list. This fact - // is taken into account when the transaction is closed. - - // add ancestor information, and maintain global live root txn list - xid = ++txn_manager->last_xid; // we always need an ID, needed for lock tree - toku_txn_update_xids_in_txn(txn, xid); - if (!read_only) { - uint32_t idx = txn_manager->live_root_txns.size(); - r = txn_manager->live_root_txns.insert_at(txn, idx); - invariant_zero(r); - r = txn_manager->live_root_ids.insert_at(txn->txnid.parent_id64, idx); - invariant_zero(r); - } - set_oldest_referenced_xid(txn_manager); - - if (needs_snapshot) { - txn_manager_create_snapshot_unlocked( - txn_manager, - txn - ); - } - - if (garbage_collection_debug) { - verify_snapshot_system(txn_manager); - } - txn_manager_unlock(txn_manager); - return; -} - -TXNID -toku_get_youngest_live_list_txnid_for(TXNID xc, const xid_omt_t &snapshot_txnids, const rx_omt_t &referenced_xids) { - struct referenced_xid_tuple *tuple; - int r; - TXNID rval = TXNID_NONE; - - r = referenced_xids.find_zero(xc, &tuple, nullptr); - if (r == DB_NOTFOUND) { - goto done; - } - TXNID live; - - r = snapshot_txnids.find(tuple->end_id, -1, &live, nullptr); - if (r == DB_NOTFOUND) { - goto done; - } - invariant(live < tuple->end_id); - if (live > tuple->begin_id) { - rval = live; - } -done: - return rval; -} - -void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn) { - int r; - invariant(txn->parent == NULL); - bool is_snapshot = txn_needs_snapshot(txn->snapshot_type, NULL); - txn_manager_lock(txn_manager); - - if (garbage_collection_debug) { - verify_snapshot_system(txn_manager); - } - - if (is_snapshot) { - txn_manager_remove_snapshot_unlocked( - txn, - txn_manager - ); - } - - if (!txn_declared_read_only(txn)) { - uint32_t idx; - //Remove txn from list of live root txns - TOKUTXN txnagain; - r = txn_manager->live_root_txns.find_zero(txn, &txnagain, &idx); - invariant_zero(r); - invariant(txn==txnagain); - - r = txn_manager->live_root_txns.delete_at(idx); - invariant_zero(r); - r = txn_manager->live_root_ids.delete_at(idx); - invariant_zero(r); - - if (!toku_txn_is_read_only(txn) || garbage_collection_debug) { - uint32_t num_references = 0; - TOKUTXN curr_txn = txn_manager->snapshot_tail; - while(curr_txn != NULL) { - if (curr_txn->snapshot_txnid64 > txn->txnid.parent_id64) { - num_references++; - } - else { - break; - } - curr_txn = curr_txn->snapshot_prev; - } - - if (num_references > 0) { - // This transaction exists in a live list of another transaction. - struct referenced_xid_tuple tuple = { - .begin_id = txn->txnid.parent_id64, - .end_id = ++txn_manager->last_xid, - .references = num_references - }; - r = txn_manager->referenced_xids.insert(tuple, txn->txnid.parent_id64, nullptr); - lazy_assert_zero(r); - } - } - } - - if (garbage_collection_debug) { - verify_snapshot_system(txn_manager); - } - txn_manager_unlock(txn_manager); - - //Cleanup that does not require the txn_manager lock - if (is_snapshot) { - invariant(txn->live_root_txn_list != nullptr); - txn->live_root_txn_list->destroy(); - toku_free(txn->live_root_txn_list); - } - return; -} - -static void toku_txn_manager_clone_state_for_gc_unlocked( - TXN_MANAGER txn_manager, - xid_omt_t* snapshot_xids, - rx_omt_t* referenced_xids, - xid_omt_t* live_root_txns - ) -{ - TXNID* snapshot_xids_array = NULL; - XMALLOC_N(txn_manager->num_snapshots, snapshot_xids_array); - TOKUTXN curr_txn = txn_manager->snapshot_head; - uint32_t curr_index = 0; - while (curr_txn != NULL) { - snapshot_xids_array[curr_index] = curr_txn->snapshot_txnid64; - curr_txn = curr_txn->snapshot_next; - curr_index++; - } - snapshot_xids->create_steal_sorted_array( - &snapshot_xids_array, - txn_manager->num_snapshots, - txn_manager->num_snapshots - ); - - referenced_xids->clone(txn_manager->referenced_xids); - setup_live_root_txn_list(&txn_manager->live_root_ids, live_root_txns); -} - -void toku_txn_manager_clone_state_for_gc( - TXN_MANAGER txn_manager, - xid_omt_t* snapshot_xids, - rx_omt_t* referenced_xids, - xid_omt_t* live_root_txns - ) -{ - txn_manager_lock(txn_manager); - toku_txn_manager_clone_state_for_gc_unlocked( - txn_manager, - snapshot_xids, - referenced_xids, - live_root_txns - ); - txn_manager_unlock(txn_manager); -} - -void txn_manager_state::init() { - invariant(!initialized); - invariant_notnull(txn_manager); - toku_txn_manager_clone_state_for_gc( - txn_manager, - &snapshot_xids, - &referenced_xids, - &live_root_txns - ); - initialized = true; -} - -void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result) { - TOKUTXN txn; - int r = txn_manager->live_root_txns.find_zero(txnid.parent_id64, &txn, nullptr); - if (r==0) { - assert(txn->txnid.parent_id64 == txnid.parent_id64); - *result = txn; - } - else { - assert(r==DB_NOTFOUND); - // If there is no txn, then we treat it as the null txn. - *result = NULL; - } -} - -int toku_txn_manager_get_root_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid, DB_TXN **txnp) { - txn_manager_lock(txn_manager); - int ret_val = 0; - int num_live_txns = txn_manager->live_root_txns.size(); - for (int i = 0; i < num_live_txns; i++) { - TOKUTXN txn; - { - int r = txn_manager->live_root_txns.fetch(i, &txn); - assert_zero(r); - } - if (txn->xa_xid.formatID == xid->formatID - && txn->xa_xid.gtrid_length == xid->gtrid_length - && txn->xa_xid.bqual_length == xid->bqual_length - && 0==memcmp(txn->xa_xid.data, xid->data, xid->gtrid_length + xid->bqual_length)) { - *txnp = txn->container_db_txn; - ret_val = 0; - goto exit; - } - } - ret_val = DB_NOTFOUND; -exit: - txn_manager_unlock(txn_manager); - return ret_val; -} - -uint32_t toku_txn_manager_num_live_root_txns(TXN_MANAGER txn_manager) { - int ret_val = 0; - txn_manager_lock(txn_manager); - ret_val = txn_manager->live_root_txns.size(); - txn_manager_unlock(txn_manager); - return ret_val; -} - -static int txn_manager_iter( - TXN_MANAGER txn_manager, - txn_mgr_iter_callback cb, - void* extra, - bool just_root_txns - ) -{ - int r = 0; - toku_mutex_lock(&txn_manager->txn_manager_lock); - uint32_t size = txn_manager->live_root_txns.size(); - for (uint32_t i = 0; i < size; i++) { - TOKUTXN curr_txn = NULL; - r = txn_manager->live_root_txns.fetch(i, &curr_txn); - assert_zero(r); - if (just_root_txns) { - r = cb(curr_txn, extra); - } - else { - r = curr_txn->child_manager->iterate(cb, extra); - } - if (r) { - break; - } - } - toku_mutex_unlock(&txn_manager->txn_manager_lock); - return r; -} - -int toku_txn_manager_iter_over_live_txns( - TXN_MANAGER txn_manager, - txn_mgr_iter_callback cb, - void* extra - ) -{ - return txn_manager_iter( - txn_manager, - cb, - extra, - false - ); -} - -int toku_txn_manager_iter_over_live_root_txns( - TXN_MANAGER txn_manager, - txn_mgr_iter_callback cb, - void* extra - ) -{ - return txn_manager_iter( - txn_manager, - cb, - extra, - true - ); -} - - -// -// This function is called only via env_txn_xa_recover and env_txn_recover. -// See comments for those functions to understand assumptions that -// can be made when calling this function. Namely, that the system is -// quiescant, in that we are right after recovery and before user operations -// commence. -// -// Another key assumption made here is that only root transactions -// may be prepared and that child transactions cannot be prepared. -// This assumption is made by the fact that we iterate over the live root txns -// to find prepared transactions. -// -// I (Zardosht), don't think we take advantage of this fact, as we are holding -// the txn_manager_lock in this function, but in the future we might want -// to take these assumptions into account. -// -int toku_txn_manager_recover_root_txn ( - TXN_MANAGER txn_manager, - struct tokulogger_preplist preplist[/*count*/], - long count, - long *retp, /*out*/ - uint32_t flags - ) -{ - int ret_val = 0; - txn_manager_lock(txn_manager); - uint32_t num_txns_returned = 0; - // scan through live root txns to find - // prepared transactions and return them - uint32_t size = txn_manager->live_root_txns.size(); - if (flags==DB_FIRST) { - txn_manager->last_xid_seen_for_recover = TXNID_NONE; - } - else if (flags!=DB_NEXT) { - ret_val = EINVAL; - goto exit; - } - for (uint32_t i = 0; i < size; i++) { - TOKUTXN curr_txn = NULL; - txn_manager->live_root_txns.fetch(i, &curr_txn); - // skip over TOKUTXNs whose txnid64 is too small, meaning - // we have already processed them. - if (curr_txn->txnid.parent_id64 <= txn_manager->last_xid_seen_for_recover) { - continue; - } - if (curr_txn->state == TOKUTXN_PREPARING) { - assert(curr_txn->container_db_txn); - preplist[num_txns_returned].txn = curr_txn->container_db_txn; - preplist[num_txns_returned].xid = curr_txn->xa_xid; - txn_manager->last_xid_seen_for_recover = curr_txn->txnid.parent_id64; - num_txns_returned++; - } - txn_manager->last_xid_seen_for_recover = curr_txn->txnid.parent_id64; - // if we found the maximum number of prepared transactions we are - // allowed to find, then break - if (num_txns_returned >= count) { - break; - } - } - invariant(num_txns_returned <= count); - *retp = num_txns_returned; - ret_val = 0; -exit: - txn_manager_unlock(txn_manager); - return ret_val; -} - -static void txn_manager_lock(TXN_MANAGER txn_manager) { - toku_mutex_lock(&txn_manager->txn_manager_lock); -} - -static void txn_manager_unlock(TXN_MANAGER txn_manager) { - toku_mutex_unlock(&txn_manager->txn_manager_lock); -} - -void toku_txn_manager_suspend(TXN_MANAGER txn_manager) { - txn_manager_lock(txn_manager); -} - -void toku_txn_manager_resume(TXN_MANAGER txn_manager) { - txn_manager_unlock(txn_manager); -} - -void -toku_txn_manager_set_last_xid_from_logger(TXN_MANAGER txn_manager, TXNID last_xid) { - invariant(txn_manager->last_xid == TXNID_NONE); - txn_manager->last_xid = last_xid; -} - -void -toku_txn_manager_set_last_xid_from_recovered_checkpoint(TXN_MANAGER txn_manager, TXNID last_xid) { - txn_manager->last_xid = last_xid; -} - -TXNID -toku_txn_manager_get_last_xid(TXN_MANAGER mgr) { - txn_manager_lock(mgr); - TXNID last_xid = mgr->last_xid; - txn_manager_unlock(mgr); - return last_xid; -} - -bool -toku_txn_manager_txns_exist(TXN_MANAGER mgr) { - txn_manager_lock(mgr); - bool retval = mgr->live_root_txns.size() > 0; - txn_manager_unlock(mgr); - return retval; -} - - -// Test-only function -void -toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment) { - txn_manager_lock(mgr); - mgr->last_xid += increment; - txn_manager_unlock(mgr); -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn_manager.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn_manager.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn_manager.h 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn_manager.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,272 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKUTXN_MANAGER_H -#define TOKUTXN_MANAGER_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include "fttypes.h" -#include -#include - -struct referenced_xid_tuple { - TXNID begin_id; - TXNID end_id; - uint32_t references; -}; - -typedef toku::omt txn_omt_t; -typedef toku::omt xid_omt_t; -typedef toku::omt rx_omt_t; - -struct txn_manager { - toku_mutex_t txn_manager_lock; // a lock protecting this object - txn_omt_t live_root_txns; // a sorted tree. - xid_omt_t live_root_ids; //contains TXNID x | x is snapshot txn - TOKUTXN snapshot_head; - TOKUTXN snapshot_tail; - uint32_t num_snapshots; - // Contains 3-tuples: (TXNID begin_id, TXNID end_id, uint64_t num_live_list_references) - // for committed root transaction ids that are still referenced by a live list. - rx_omt_t referenced_xids; - - TXNID last_xid; - TXNID last_xid_seen_for_recover; - TXNID last_calculated_oldest_referenced_xid; -}; - -struct txn_manager_state { - txn_manager_state(TXN_MANAGER mgr) : - txn_manager(mgr), - initialized(false) { - snapshot_xids.create_no_array(); - referenced_xids.create_no_array(); - live_root_txns.create_no_array(); - } - - // should not copy construct - txn_manager_state &operator=(txn_manager_state &rhs) = delete; - txn_manager_state(txn_manager_state &rhs) = delete; - - ~txn_manager_state() { - snapshot_xids.destroy(); - referenced_xids.destroy(); - live_root_txns.destroy(); - } - - void init(); - - TXN_MANAGER txn_manager; - bool initialized; - - // a snapshot of the txn manager's mvcc state - // only valid if initialized = true - xid_omt_t snapshot_xids; - rx_omt_t referenced_xids; - xid_omt_t live_root_txns; -}; - -// represents all of the information needed to run garbage collection -struct txn_gc_info { - txn_gc_info(txn_manager_state *st, TXNID xid_sgc, TXNID xid_ip, bool mvcc) - : txn_state_for_gc(st), - oldest_referenced_xid_for_simple_gc(xid_sgc), - oldest_referenced_xid_for_implicit_promotion(xid_ip), - mvcc_needed(mvcc) { - } - - // a snapshot of the transcation system. may be null. - txn_manager_state *txn_state_for_gc; - - // the oldest xid in any live list - // - // suitible for simple garbage collection that cleans up multiple committed - // transaction records into one. not suitible for implicit promotions, which - // must be correct in the face of abort messages - see ftnode->oldest_referenced_xid - TXNID oldest_referenced_xid_for_simple_gc; - - // lower bound on the oldest xid in any live when the messages to be cleaned - // had no messages above them. suitable for implicitly promoting a provisonal uxr. - TXNID oldest_referenced_xid_for_implicit_promotion; - - // whether or not mvcc is actually needed - false during recovery and non-transactional systems - const bool mvcc_needed; -}; - -void toku_txn_manager_init(TXN_MANAGER* txn_manager); -void toku_txn_manager_destroy(TXN_MANAGER txn_manager); - -TXNID toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager); - -TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager); - -void toku_txn_manager_handle_snapshot_create_for_child_txn( - TOKUTXN txn, - TXN_MANAGER txn_manager, - TXN_SNAPSHOT_TYPE snapshot_type - ); -void toku_txn_manager_handle_snapshot_destroy_for_child_txn( - TOKUTXN txn, - TXN_MANAGER txn_manager, - TXN_SNAPSHOT_TYPE snapshot_type - ); - - -// Assign a txnid. Log the txn begin in the recovery log. Initialize the txn live lists. -void toku_txn_manager_start_txn( - TOKUTXN txn, - TXN_MANAGER txn_manager, - TXN_SNAPSHOT_TYPE snapshot_type, - bool read_only - ); - -void toku_txn_manager_start_txn_for_recovery( - TOKUTXN txn, - TXN_MANAGER txn_manager, - TXNID xid - ); - -void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn); - -void toku_txn_manager_clone_state_for_gc( - TXN_MANAGER txn_manager, - xid_omt_t* snapshot_xids, - rx_omt_t* referenced_xids, - xid_omt_t* live_root_txns - ); - -void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result); - -// Returns a root txn associated with xid. The system as a whole -// assumes that only root txns get prepared, adn therefore only -// root txns will have XIDs associated with them. -int toku_txn_manager_get_root_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid, DB_TXN **txnp); - -uint32_t toku_txn_manager_num_live_root_txns(TXN_MANAGER txn_manager); - -typedef int (*txn_mgr_iter_callback)(TOKUTXN txn, void* extra); - -int toku_txn_manager_iter_over_live_txns( - TXN_MANAGER txn_manager, - txn_mgr_iter_callback cb, - void* extra - ); - -int toku_txn_manager_iter_over_live_root_txns( - TXN_MANAGER txn_manager, - txn_mgr_iter_callback cb, - void* extra - ); - -int toku_txn_manager_recover_root_txn( - TXN_MANAGER txn_manager, - struct tokulogger_preplist preplist[/*count*/], - long count, - long *retp, /*out*/ - uint32_t flags - ); - -void toku_txn_manager_suspend(TXN_MANAGER txn_manager); -void toku_txn_manager_resume(TXN_MANAGER txn_manager); - -void toku_txn_manager_set_last_xid_from_logger(TXN_MANAGER txn_manager, TXNID last_xid); -void toku_txn_manager_set_last_xid_from_recovered_checkpoint(TXN_MANAGER txn_manager, TXNID last_xid); -TXNID toku_txn_manager_get_last_xid(TXN_MANAGER mgr); - -bool toku_txn_manager_txns_exist(TXN_MANAGER mgr); - -// Test-only function -void toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment); - -TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, const xid_omt_t &snapshot_txnids, const rx_omt_t &referenced_xids); - -#endif // TOKUTXN_MANAGER_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn_state.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn_state.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/txn_state.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/txn_state.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,105 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#if !defined(TOKUTXN_STATE_H) -#define TOKUTXN_STATE_H - -// this is a separate file so that the hotindexing tests can see the txn states - -enum tokutxn_state { - TOKUTXN_LIVE, // initial txn state - TOKUTXN_PREPARING, // txn is preparing (or prepared) - TOKUTXN_COMMITTING, // txn in the process of committing - TOKUTXN_ABORTING, // txn in the process of aborting - TOKUTXN_RETIRED, // txn no longer exists -}; -typedef enum tokutxn_state TOKUTXN_STATE; - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ule.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ule.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ule.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ule.cc 2014-10-08 13:19:51.000000000 +0000 @@ -31,7 +31,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -102,27 +102,29 @@ // See design documentation for nested transactions at // TokuWiki/Imp/TransactionsOverview. -#include -#include "fttypes.h" -#include "ft-internal.h" - -#include - -#include "leafentry.h" -#include "xids.h" -#include "ft_msg.h" -#include "ule.h" -#include "txn_manager.h" -#include "ule-internal.h" -#include -#include -#include +#include + +#include "portability/toku_portability.h" + +#include "ft/ft-internal.h" +#include "ft/leafentry.h" +#include "ft/logger/logger.h" +#include "ft/msg.h" +#include "ft/txn/txn.h" +#include "ft/txn/txn_manager.h" +#include "ft/ule.h" +#include "ft/ule-internal.h" +#include "ft/txn/xids.h" +#include "util/bytestring.h" +#include "util/omt.h" +#include "util/partitioned_counter.h" +#include "util/scoped_malloc.h" +#include "util/status.h" #define ULE_DEBUG 0 static uint32_t ule_get_innermost_numbytes(ULE ule, uint32_t keylen); - /////////////////////////////////////////////////////////////////////////////////// // Engine status // @@ -131,7 +133,7 @@ static LE_STATUS_S le_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(le_status, k, c, t, "le: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(le_status, k, c, t, "le: " l, inc) void toku_ule_status_init(void) { // Note, this function initializes the keyname, type, and legend fields. @@ -216,7 +218,7 @@ // Local functions: static void msg_init_empty_ule(ULE ule); -static void msg_modify_ule(ULE ule, FT_MSG msg); +static void msg_modify_ule(ULE ule, const ft_msg &msg); static void ule_init_empty_ule(ULE ule); static void ule_do_implicit_promotions(ULE ule, XIDS xids); static void ule_try_promote_provisional_outermost(ULE ule, TXNID oldest_possible_live_xid); @@ -256,6 +258,7 @@ uint32_t idx, void* keyp, uint32_t keylen, + uint32_t old_keylen, uint32_t old_le_size, size_t size, LEAFENTRY* new_le_space, @@ -268,7 +271,7 @@ else { // this means we are overwriting something if (old_le_size > 0) { - data_buffer->get_space_for_overwrite(idx, keyp, keylen, old_le_size, size, new_le_space, maybe_free); + data_buffer->get_space_for_overwrite(idx, keyp, keylen, old_keylen, old_le_size, size, new_le_space, maybe_free); } // this means we are inserting something new else { @@ -327,11 +330,11 @@ // static void ule_simple_garbage_collection(ULE ule, txn_gc_info *gc_info) { - uint32_t curr_index = 0; - uint32_t num_entries; if (ule->num_cuxrs == 1) { - goto done; + return; } + + uint32_t curr_index = 0; if (gc_info->mvcc_needed) { // starting at the top of the committed stack, find the first // uxr with a txnid that is less than oldest_referenced_xid @@ -341,37 +344,34 @@ break; } } - } - else { + } else { // if mvcc is not needed, we can need the top committed // value and nothing else curr_index = ule->num_cuxrs - 1; } + // curr_index is now set to the youngest uxr older than oldest_referenced_xid - if (curr_index == 0) { - goto done; + // so if it's not the bottom of the stack.. + if (curr_index != 0) { + // ..then we need to get rid of the entries below curr_index + uint32_t num_entries = ule->num_cuxrs + ule->num_puxrs - curr_index; + memmove(&ule->uxrs[0], &ule->uxrs[curr_index], num_entries * sizeof(ule->uxrs[0])); + ule->uxrs[0].xid = TXNID_NONE; // New 'bottom of stack' loses its TXNID + ule->num_cuxrs -= curr_index; } - - // now get rid of the entries below curr_index - num_entries = ule->num_cuxrs + ule->num_puxrs - curr_index; - memmove(&ule->uxrs[0], &ule->uxrs[curr_index], num_entries * sizeof(ule->uxrs[0])); - ule->uxrs[0].xid = TXNID_NONE; //New 'bottom of stack' loses its TXNID - ule->num_cuxrs -= curr_index; - -done:; } +// TODO: Clean this up +extern bool garbage_collection_debug; + static void ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &referenced_xids, const xid_omt_t &live_root_txns) { - if (ule->num_cuxrs == 1) goto done; - // will fail if too many num_cuxrs - bool necessary_static[MAX_TRANSACTION_RECORDS]; - bool *necessary; - necessary = necessary_static; - if (ule->num_cuxrs >= MAX_TRANSACTION_RECORDS) { - XMALLOC_N(ule->num_cuxrs, necessary); + if (ule->num_cuxrs == 1) { + return; } - memset(necessary, 0, sizeof(necessary[0])*ule->num_cuxrs); + + toku::scoped_calloc necessary_buf(ule->num_cuxrs * sizeof(bool)); + bool *necessary = reinterpret_cast(necessary_buf.get()); uint32_t curr_committed_entry; curr_committed_entry = ule->num_cuxrs - 1; @@ -401,24 +401,21 @@ } tl1 = toku_get_youngest_live_list_txnid_for(xc, snapshot_xids, referenced_xids); - if (tl1 == xc) { - // if tl1 == xc, that means xc should be live and show up in - // live_root_txns, which we check above. So, if we get - // here, something is wrong. - assert(false); - } + + // if tl1 == xc, that means xc should be live and show up in live_root_txns, which we check above. + invariant(tl1 != xc); + if (tl1 == TXNID_NONE) { // set tl1 to youngest live transaction older than ule->uxrs[curr_committed_entry]->xid tl1 = get_next_older_txnid(xc, snapshot_xids); if (tl1 == TXNID_NONE) { - //Remainder is garbage, we're done + // remainder is garbage, we're done break; } } - if (garbage_collection_debug) - { + if (garbage_collection_debug) { int r = snapshot_xids.find_zero(tl1, nullptr, nullptr); - invariant(r==0); //make sure that the txn you are claiming is live is actually live + invariant_zero(r); // make sure that the txn you are claiming is live is actually live } // // tl1 should now be set @@ -432,30 +429,23 @@ curr_committed_entry--; } } - uint32_t first_free; - first_free = 0; - uint32_t i; - for (i = 0; i < ule->num_cuxrs; i++) { - //Shift values to 'delete' garbage values. + uint32_t first_free = 0; + for (uint32_t i = 0; i < ule->num_cuxrs; i++) { + // Shift values to 'delete' garbage values. if (necessary[i]) { ule->uxrs[first_free] = ule->uxrs[i]; first_free++; } } - uint32_t saved; - saved = first_free; + uint32_t saved = first_free; invariant(saved <= ule->num_cuxrs); invariant(saved >= 1); ule->uxrs[0].xid = TXNID_NONE; //New 'bottom of stack' loses its TXNID if (first_free != ule->num_cuxrs) { - //Shift provisional values + // Shift provisional values memmove(&ule->uxrs[first_free], &ule->uxrs[ule->num_cuxrs], ule->num_puxrs * sizeof(ule->uxrs[0])); } ule->num_cuxrs = saved; - if (necessary != necessary_static) { - toku_free(necessary); - } -done:; } static size_t ule_packed_memsize(ULE ule) { @@ -492,10 +482,11 @@ // Otehrwise the new_leafentry_p points at the new leaf entry. // As of October 2011, this function always returns 0. void -toku_le_apply_msg(FT_MSG msg, +toku_le_apply_msg(const ft_msg &msg, LEAFENTRY old_leafentry, // NULL if there was no stored data. bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced + uint32_t old_keylen, // length of the any key in data_buffer txn_gc_info *gc_info, LEAFENTRY *new_leafentry_p, int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead @@ -505,7 +496,7 @@ int64_t oldnumbytes = 0; int64_t newnumbytes = 0; uint64_t oldmemsize = 0; - uint32_t keylen = ft_msg_get_keylen(msg); + uint32_t keylen = msg.kdbt()->size; if (old_leafentry == NULL) { msg_init_empty_ule(&ule); @@ -550,8 +541,9 @@ &ule, // create packed leafentry data_buffer, idx, - ft_msg_get_key(msg), // contract of this function is caller has this set, always + msg.kdbt()->data, // contract of this function is caller has this set, always keylen, // contract of this function is caller has this set, always + old_keylen, oldmemsize, new_leafentry_p, &maybe_free @@ -655,6 +647,7 @@ idx, keyp, keylen, + keylen, // old_keylen, same because the key isn't going to change for gc old_mem_size, new_leaf_entry, &maybe_free @@ -686,10 +679,10 @@ // Purpose is to modify the unpacked leafentry in our private workspace. // static void -msg_modify_ule(ULE ule, FT_MSG msg) { - XIDS xids = ft_msg_get_xids(msg); - invariant(xids_get_num_xids(xids) < MAX_TRANSACTION_RECORDS); - enum ft_msg_type type = ft_msg_get_type(msg); +msg_modify_ule(ULE ule, const ft_msg &msg) { + XIDS xids = msg.xids(); + invariant(toku_xids_get_num_xids(xids) < MAX_TRANSACTION_RECORDS); + enum ft_msg_type type = msg.type(); if (type != FT_OPTIMIZE && type != FT_OPTIMIZE_FOR_UPGRADE) { ule_do_implicit_promotions(ule, xids); } @@ -702,9 +695,9 @@ //fall through to FT_INSERT on purpose. } case FT_INSERT: { - uint32_t vallen = ft_msg_get_vallen(msg); + uint32_t vallen = msg.vdbt()->size; invariant(IS_VALID_LEN(vallen)); - void * valp = ft_msg_get_val(msg); + void * valp = msg.vdbt()->data; ule_apply_insert(ule, xids, vallen, valp); break; } @@ -731,25 +724,23 @@ assert(false); // These messages don't get this far. Instead they get translated (in setval_fun in do_update) into FT_INSERT messages. break; default: - assert(false /* illegal FT_MSG.type */); + assert(false); /* illegal ft msg type */ break; } } -void -test_msg_modify_ule(ULE ule, FT_MSG msg){ +void test_msg_modify_ule(ULE ule, const ft_msg &msg){ msg_modify_ule(ule,msg); } - static void ule_optimize(ULE ule, XIDS xids) { if (ule->num_puxrs) { TXNID uncommitted = ule->uxrs[ule->num_cuxrs].xid; // outermost uncommitted TXNID oldest_living_xid = TXNID_NONE; - uint32_t num_xids = xids_get_num_xids(xids); + uint32_t num_xids = toku_xids_get_num_xids(xids); if (num_xids > 0) { invariant(num_xids==1); - oldest_living_xid = xids_get_xid(xids, 0); + oldest_living_xid = toku_xids_get_xid(xids, 0); } if (oldest_living_xid == TXNID_NONE || uncommitted < oldest_living_xid) { ule_promote_provisional_innermost_to_committed(ule); @@ -974,6 +965,7 @@ uint32_t idx, void* keyp, uint32_t keylen, + uint32_t old_keylen, uint32_t old_le_size, LEAFENTRY * const new_leafentry_p, // this is what this function creates void **const maybe_free @@ -996,7 +988,8 @@ } } if (data_buffer && old_le_size > 0) { - data_buffer->delete_leafentry(idx, keylen, old_le_size); + // must pass old_keylen and old_le_size, since that's what is actually stored in data_buffer + data_buffer->delete_leafentry(idx, old_keylen, old_le_size); } *new_leafentry_p = NULL; rval = 0; @@ -1005,7 +998,7 @@ found_insert: memsize = le_memsize_from_ule(ule); LEAFENTRY new_leafentry; - get_space_for_le(data_buffer, idx, keyp, keylen, old_le_size, memsize, &new_leafentry, maybe_free); + get_space_for_le(data_buffer, idx, keyp, keylen, old_keylen, old_le_size, memsize, &new_leafentry, maybe_free); //p always points to first unused byte after leafentry we are packing uint8_t *p; @@ -1343,9 +1336,9 @@ bool le_has_xids(LEAFENTRY le, XIDS xids) { //Read num_uxrs - uint32_t num_xids = xids_get_num_xids(xids); + uint32_t num_xids = toku_xids_get_num_xids(xids); invariant(num_xids > 0); //Disallow checking for having TXNID_NONE - TXNID xid = xids_get_xid(xids, 0); + TXNID xid = toku_xids_get_xid(xids, 0); invariant(xid!=TXNID_NONE); bool rval = (le_outermost_uncommitted_xid(le) == xid); @@ -1595,13 +1588,13 @@ //Optimization for (most) common case. //No commits necessary if everything is already committed. if (ule->num_puxrs > 0) { - int num_xids = xids_get_num_xids(xids); + int num_xids = toku_xids_get_num_xids(xids); invariant(num_xids>0); uint32_t max_index = ule->num_cuxrs + min_i32(ule->num_puxrs, num_xids) - 1; uint32_t ica_index = max_index; uint32_t index; for (index = ule->num_cuxrs; index <= max_index; index++) { - TXNID current_msg_xid = xids_get_xid(xids, index - ule->num_cuxrs); + TXNID current_msg_xid = toku_xids_get_xid(xids, index - ule->num_cuxrs); TXNID current_ule_xid = ule_get_xid(ule, index); if (current_msg_xid != current_ule_xid) { //ica is innermost transaction with matching xids. @@ -1691,7 +1684,7 @@ static void ule_apply_insert(ULE ule, XIDS xids, uint32_t vallen, void * valp) { ule_prepare_for_new_uxr(ule, xids); - TXNID this_xid = xids_get_innermost_xid(xids); // xid of transaction doing this insert + TXNID this_xid = toku_xids_get_innermost_xid(xids); // xid of transaction doing this insert ule_push_insert_uxr(ule, this_xid == TXNID_NONE, this_xid, vallen, valp); } @@ -1699,7 +1692,7 @@ static void ule_apply_delete(ULE ule, XIDS xids) { ule_prepare_for_new_uxr(ule, xids); - TXNID this_xid = xids_get_innermost_xid(xids); // xid of transaction doing this delete + TXNID this_xid = toku_xids_get_innermost_xid(xids); // xid of transaction doing this delete ule_push_delete_uxr(ule, this_xid == TXNID_NONE, this_xid); } @@ -1710,7 +1703,7 @@ // with placeholders. static void ule_prepare_for_new_uxr(ULE ule, XIDS xids) { - TXNID this_xid = xids_get_innermost_xid(xids); + TXNID this_xid = toku_xids_get_innermost_xid(xids); //This is for LOADER_USE_PUTS or transactionless environment //where messages use XIDS of 0 if (this_xid == TXNID_NONE && ule_get_innermost_xid(ule) == TXNID_NONE) { @@ -1735,7 +1728,7 @@ // Remember, the innermost uxr can only be an insert or a delete, not a placeholder. static void ule_apply_abort(ULE ule, XIDS xids) { - TXNID this_xid = xids_get_innermost_xid(xids); // xid of transaction doing this abort + TXNID this_xid = toku_xids_get_innermost_xid(xids); // xid of transaction doing this abort invariant(this_xid!=TXNID_NONE); UXR innermost = ule_get_innermost_uxr(ule); // need to check for provisional entries in ule, otherwise @@ -1766,7 +1759,7 @@ // If this transaction did modify the leafentry, then promote whatever it did. // Remember, the innermost uxr can only be an insert or a delete, not a placeholder. void ule_apply_commit(ULE ule, XIDS xids) { - TXNID this_xid = xids_get_innermost_xid(xids); // xid of transaction committing + TXNID this_xid = toku_xids_get_innermost_xid(xids); // xid of transaction committing invariant(this_xid!=TXNID_NONE); // need to check for provisional entries in ule, otherwise // there is nothing to abort, not checking this may result @@ -1908,7 +1901,7 @@ //Placeholders can be placed on top of the committed uxr. invariant(ule->num_cuxrs > 0); - uint32_t num_xids = xids_get_num_xids(xids); + uint32_t num_xids = toku_xids_get_num_xids(xids); // we assume that implicit promotion has happened // when we get this call, so the number of xids MUST // be greater than the number of provisional entries @@ -1916,12 +1909,12 @@ // make sure that the xids stack matches up to a certain amount // this first for loop is just debug code for (uint32_t i = 0; i < ule->num_puxrs; i++) { - TXNID current_msg_xid = xids_get_xid(xids, i); + TXNID current_msg_xid = toku_xids_get_xid(xids, i); TXNID current_ule_xid = ule_get_xid(ule, i + ule->num_cuxrs); invariant(current_msg_xid == current_ule_xid); } for (uint32_t i = ule->num_puxrs; i < num_xids-1; i++) { - TXNID current_msg_xid = xids_get_xid(xids, i); + TXNID current_msg_xid = toku_xids_get_xid(xids, i); ule_push_placeholder_uxr(ule, current_msg_xid); } } @@ -2073,7 +2066,7 @@ // is_delp - output parameter that returns answer // context - parameter for f // -int +static int le_iterate_is_del(LEAFENTRY le, LE_ITERATE_CALLBACK f, bool *is_delp, TOKUTXN context) { #if ULE_DEBUG ULE_S ule; @@ -2142,6 +2135,27 @@ } // +// Returns true if the value that is to be read is empty. +// +int le_val_is_del(LEAFENTRY le, bool is_snapshot_read, TOKUTXN txn) { + int rval; + if (is_snapshot_read) { + bool is_del = false; + le_iterate_is_del( + le, + toku_txn_reads_txnid, + &is_del, + txn + ); + rval = is_del; + } + else { + rval = le_latest_is_del(le); + } + return rval; +} + +// // Iterates over "possible" TXNIDs in a leafentry's stack, until one is accepted by 'f'. Set // valpp and vallenp to value and length associated with accepted TXNID // The "possible" TXNIDs are: @@ -2261,6 +2275,27 @@ return r; } +void le_extract_val(LEAFENTRY le, + // should we return the entire leafentry as the val? + bool is_leaf_mode, bool is_snapshot_read, + TOKUTXN ttxn, uint32_t *vallen, void **val) { + if (is_leaf_mode) { + *val = le; + *vallen = leafentry_memsize(le); + } else if (is_snapshot_read) { + int r = le_iterate_val( + le, + toku_txn_reads_txnid, + val, + vallen, + ttxn + ); + lazy_assert_zero(r); + } else { + *val = le_latest_val_and_len(le, vallen); + } +} + // This is an on-disk format. static_asserts verify everything is packed and aligned correctly. struct __attribute__ ((__packed__)) leafentry_13 { struct leafentry_committed_13 { @@ -2467,6 +2502,7 @@ nullptr, //only matters if we are passing in a bn_data 0, //only matters if we are passing in a bn_data 0, //only matters if we are passing in a bn_data + 0, //only matters if we are passing in a bn_data new_leafentry_p, nullptr //only matters if we are passing in a bn_data ); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ule.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ule.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ule.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ule.h 2014-10-08 13:19:51.000000000 +0000 @@ -6,9 +6,6 @@ * requirements of the nested transaction logic belongs here. */ -#ifndef TOKU_ULE_H -#define TOKU_ULE_H - #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -38,7 +35,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,11 +92,13 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include "leafentry.h" -#include "txn_manager.h" +#include "txn/txn_manager.h" #include void toku_ule_status_init(void); @@ -130,5 +129,3 @@ //1 does much slower debugging #define GARBAGE_COLLECTION_DEBUG 0 - -#endif // TOKU_ULE_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ule-internal.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ule-internal.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ule-internal.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ule-internal.h 2014-10-08 13:19:51.000000000 +0000 @@ -5,9 +5,6 @@ * ule mechanisms that do not belong in the public interface. */ -#ifndef TOKU_ULE_INTERNAL_H -#define TOKU_ULE_INTERNAL_H - #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -37,7 +34,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,6 +91,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -136,7 +135,7 @@ -void test_msg_modify_ule(ULE ule, FT_MSG msg); +void test_msg_modify_ule(ULE ule, const ft_msg &msg); ////////////////////////////////////////////////////////////////////////////////////// @@ -148,6 +147,7 @@ uint32_t idx, void* keyp, uint32_t keylen, + uint32_t old_keylen, uint32_t old_le_size, LEAFENTRY * const new_leafentry_p, // this is what this function creates void **const maybe_free @@ -156,7 +156,3 @@ size_t le_memsize_from_ule (ULE ule); void ule_cleanup(ULE ule); - - -#endif // TOKU_ULE_H - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/valgrind.suppressions mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/valgrind.suppressions --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/valgrind.suppressions 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/valgrind.suppressions 2014-10-08 13:19:51.000000000 +0000 @@ -281,3 +281,16 @@ fun:_dl_start obj:/lib/x86_64-linux-gnu/ld-2.17.so } +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/libdl-2.19.so + fun:dlsym + fun:_Z19toku_memory_startupv + fun:call_init.part.0 + fun:_dl_init + obj:/usr/lib/ld-2.19.so +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/wbuf.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/wbuf.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/wbuf.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/wbuf.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,337 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef WBUF_H -#define WBUF_H -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include - -#include -#include - -#include "fttypes.h" - -#define CRC_INCR - -/* When serializing a value, write it into a buffer. */ -/* This code requires that the buffer be big enough to hold whatever you put into it. */ -/* This abstraction doesn't do a good job of hiding its internals. - * Why? The performance of this code is important, and we want to inline stuff */ -//Why is size here an int instead of DISKOFF like in the initializer? -struct wbuf { - unsigned char *buf; - unsigned int size; - unsigned int ndone; - struct x1764 checksum; // The checksum state -}; - -static inline void wbuf_nocrc_init (struct wbuf *w, void *buf, DISKOFF size) { - w->buf = (unsigned char *) buf; - w->size = size; - w->ndone = 0; -} - -static inline void wbuf_init (struct wbuf *w, void *buf, DISKOFF size) { - wbuf_nocrc_init(w, buf, size); - toku_x1764_init(&w->checksum); -} - -static inline size_t wbuf_get_woffset(struct wbuf *w) { - return w->ndone; -} - -/* Write a character. */ -static inline void wbuf_nocrc_char (struct wbuf *w, unsigned char ch) { - assert(w->ndonesize); - w->buf[w->ndone++]=ch; -} - -/* Write a character. */ -static inline void wbuf_nocrc_uint8_t (struct wbuf *w, uint8_t ch) { - assert(w->ndonesize); - w->buf[w->ndone++]=ch; -} - -static inline void wbuf_char (struct wbuf *w, unsigned char ch) { - wbuf_nocrc_char (w, ch); - toku_x1764_add(&w->checksum, &w->buf[w->ndone-1], 1); -} - -//Write an int that MUST be in network order regardless of disk order -static void wbuf_network_int (struct wbuf *w, int32_t i) __attribute__((__unused__)); -static void wbuf_network_int (struct wbuf *w, int32_t i) { - assert(w->ndone + 4 <= w->size); - *(uint32_t*)(&w->buf[w->ndone]) = toku_htonl(i); - toku_x1764_add(&w->checksum, &w->buf[w->ndone], 4); - w->ndone += 4; -} - -static inline void wbuf_nocrc_int (struct wbuf *w, int32_t i) { -#if 0 - wbuf_nocrc_char(w, i>>24); - wbuf_nocrc_char(w, i>>16); - wbuf_nocrc_char(w, i>>8); - wbuf_nocrc_char(w, i>>0); -#else - assert(w->ndone + 4 <= w->size); - #if 0 - w->buf[w->ndone+0] = i>>24; - w->buf[w->ndone+1] = i>>16; - w->buf[w->ndone+2] = i>>8; - w->buf[w->ndone+3] = i>>0; - #else - *(uint32_t*)(&w->buf[w->ndone]) = toku_htod32(i); - #endif - w->ndone += 4; -#endif -} - -static inline void wbuf_int (struct wbuf *w, int32_t i) { - wbuf_nocrc_int(w, i); - toku_x1764_add(&w->checksum, &w->buf[w->ndone-4], 4); -} - -static inline void wbuf_nocrc_uint (struct wbuf *w, uint32_t i) { - wbuf_nocrc_int(w, (int32_t)i); -} - -static inline void wbuf_uint (struct wbuf *w, uint32_t i) { - wbuf_int(w, (int32_t)i); -} - -static inline uint8_t* wbuf_nocrc_reserve_literal_bytes(struct wbuf *w, uint32_t nbytes) { - assert(w->ndone + nbytes <= w->size); - uint8_t * dest = w->buf + w->ndone; - w->ndone += nbytes; - return dest; -} - -static inline void wbuf_nocrc_literal_bytes(struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) { - const unsigned char *bytes = (const unsigned char *) bytes_bv; -#if 0 - { int i; for (i=0; indone + nbytes <= w->size); - memcpy(w->buf + w->ndone, bytes, (size_t)nbytes); - w->ndone += nbytes; -#endif -} - -static inline void wbuf_literal_bytes(struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) { - wbuf_nocrc_literal_bytes(w, bytes_bv, nbytes); - toku_x1764_add(&w->checksum, &w->buf[w->ndone-nbytes], nbytes); -} - -static void wbuf_nocrc_bytes (struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) { - wbuf_nocrc_uint(w, nbytes); - wbuf_nocrc_literal_bytes(w, bytes_bv, nbytes); -} - -static void wbuf_bytes (struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) { - wbuf_uint(w, nbytes); - wbuf_literal_bytes(w, bytes_bv, nbytes); -} - -static void wbuf_nocrc_ulonglong (struct wbuf *w, uint64_t ull) { - wbuf_nocrc_uint(w, (uint32_t)(ull>>32)); - wbuf_nocrc_uint(w, (uint32_t)(ull&0xFFFFFFFF)); -} - -static void wbuf_ulonglong (struct wbuf *w, uint64_t ull) { - wbuf_uint(w, (uint32_t)(ull>>32)); - wbuf_uint(w, (uint32_t)(ull&0xFFFFFFFF)); -} - -static inline void wbuf_nocrc_uint64_t(struct wbuf *w, uint64_t ull) { - wbuf_nocrc_ulonglong(w, ull); -} - - -static inline void wbuf_uint64_t(struct wbuf *w, uint64_t ull) { - wbuf_ulonglong(w, ull); -} - -static inline void wbuf_nocrc_bool (struct wbuf *w, bool b) { - wbuf_nocrc_uint8_t(w, (uint8_t)(b ? 1 : 0)); -} - -static inline void wbuf_nocrc_BYTESTRING (struct wbuf *w, BYTESTRING v) { - wbuf_nocrc_bytes(w, v.data, v.len); -} - -static inline void wbuf_BYTESTRING (struct wbuf *w, BYTESTRING v) { - wbuf_bytes(w, v.data, v.len); -} - -static inline void wbuf_uint8_t (struct wbuf *w, uint8_t v) { - wbuf_char(w, v); -} - -static inline void wbuf_nocrc_uint32_t (struct wbuf *w, uint32_t v) { - wbuf_nocrc_uint(w, v); -} - -static inline void wbuf_uint32_t (struct wbuf *w, uint32_t v) { - wbuf_uint(w, v); -} - -static inline void wbuf_DISKOFF (struct wbuf *w, DISKOFF off) { - wbuf_ulonglong(w, (uint64_t)off); -} - -static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { - wbuf_ulonglong(w, b.b); -} -static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { - wbuf_nocrc_ulonglong(w, b.b); -} - -static inline void wbuf_nocrc_TXNID (struct wbuf *w, TXNID tid) { - wbuf_nocrc_ulonglong(w, tid); -} - -static inline void wbuf_nocrc_TXNID_PAIR (struct wbuf *w, TXNID_PAIR tid) { - wbuf_nocrc_ulonglong(w, tid.parent_id64); - wbuf_nocrc_ulonglong(w, tid.child_id64); -} - - -static inline void wbuf_TXNID (struct wbuf *w, TXNID tid) { - wbuf_ulonglong(w, tid); -} - -static inline void wbuf_nocrc_XIDP (struct wbuf *w, XIDP xid) { - wbuf_nocrc_uint32_t(w, xid->formatID); - wbuf_nocrc_uint8_t(w, xid->gtrid_length); - wbuf_nocrc_uint8_t(w, xid->bqual_length); - wbuf_nocrc_literal_bytes(w, xid->data, xid->gtrid_length+xid->bqual_length); -} - -static inline void wbuf_nocrc_LSN (struct wbuf *w, LSN lsn) { - wbuf_nocrc_ulonglong(w, lsn.lsn); -} - -static inline void wbuf_LSN (struct wbuf *w, LSN lsn) { - wbuf_ulonglong(w, lsn.lsn); -} - -static inline void wbuf_MSN (struct wbuf *w, MSN msn) { - wbuf_ulonglong(w, msn.msn); -} - -static inline void wbuf_nocrc_FILENUM (struct wbuf *w, FILENUM fileid) { - wbuf_nocrc_uint(w, fileid.fileid); -} - -static inline void wbuf_FILENUM (struct wbuf *w, FILENUM fileid) { - wbuf_uint(w, fileid.fileid); -} - -// 2954 -static inline void wbuf_nocrc_FILENUMS (struct wbuf *w, FILENUMS v) { - wbuf_nocrc_uint(w, v.num); - uint32_t i; - for (i = 0; i < v.num; i++) { - wbuf_nocrc_FILENUM(w, v.filenums[i]); - } -} - -// 2954 -static inline void wbuf_FILENUMS (struct wbuf *w, FILENUMS v) { - wbuf_uint(w, v.num); - uint32_t i; - for (i = 0; i < v.num; i++) { - wbuf_FILENUM(w, v.filenums[i]); - } -} - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/workset.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/workset.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/workset.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/workset.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,191 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef _TOKU_WORKSET_H -#define _TOKU_WORKSET_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include - -// The work struct is the base class for work to be done by some threads -struct work { - struct toku_list next; -}; - -// The workset struct contains the set of work to be done by some threads -struct workset { - toku_mutex_t lock; - struct toku_list worklist; // a list of work - int refs; // number of workers that have a reference on the workset - toku_cond_t worker_wait; // a condition variable used to wait for all of the worker to release their reference on the workset -}; - -static inline void -workset_init(struct workset *ws) { - toku_mutex_init(&ws->lock, NULL); - toku_list_init(&ws->worklist); - ws->refs = 1; // the calling thread gets a reference - toku_cond_init(&ws->worker_wait, NULL); -} - -static inline void -workset_destroy(struct workset *ws) { - invariant(toku_list_empty(&ws->worklist)); - toku_cond_destroy(&ws->worker_wait); - toku_mutex_destroy(&ws->lock); -} - -static inline void -workset_lock(struct workset *ws) { - toku_mutex_lock(&ws->lock); -} - -static inline void -workset_unlock(struct workset *ws) { - toku_mutex_unlock(&ws->lock); -} - -// Put work in the workset. Assume the workset is already locked. -static inline void -workset_put_locked(struct workset *ws, struct work *w) { - toku_list_push(&ws->worklist, &w->next); -} - -// Put work in the workset -static inline void -workset_put(struct workset *ws, struct work *w) { - workset_lock(ws); - workset_put_locked(ws, w); - workset_unlock(ws); -} - -// Get work from the workset -static inline struct work * -workset_get(struct workset *ws) { - workset_lock(ws); - struct work *w = NULL; - if (!toku_list_empty(&ws->worklist)) { - struct toku_list *l = toku_list_pop_head(&ws->worklist); - w = toku_list_struct(l, struct work, next); - } - workset_unlock(ws); - return w; -} - -// Add references to the workset -static inline void -workset_add_ref(struct workset *ws, int refs) { - workset_lock(ws); - ws->refs += refs; - workset_unlock(ws); -} - -// Release a reference on the workset -static inline void -workset_release_ref(struct workset *ws) { - workset_lock(ws); - if (--ws->refs == 0) { - toku_cond_broadcast(&ws->worker_wait); - } - workset_unlock(ws); -} - -// Wait until all of the worker threads have released their reference on the workset -static inline void -workset_join(struct workset *ws) { - workset_lock(ws); - while (ws->refs != 0) { - toku_cond_wait(&ws->worker_wait, &ws->lock); - } - workset_unlock(ws); -} - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/xids.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/xids.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/xids.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/xids.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,310 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -/* Purpose of this file is to implement xids list of nested transactions - * ids. - * - * See design documentation for nested transactions at - * TokuWiki/Imp/TransactionsOverview. - * - * NOTE: xids are always stored in disk byte order. - * Accessors are responsible for transposing bytes to - * host order. - */ - - -#include -#include - -#include -#include "fttypes.h" -#include "xids.h" -#include "xids-internal.h" -#include "toku_assert.h" -#include "memory.h" -#include - - -///////////////////////////////////////////////////////////////////////////////// -// This layer of abstraction (xids_xxx) understands xids<> and nothing else. -// It contains all the functions that understand xids<> -// -// xids<> do not store the implicit transaction id of 0 at index 0. -// The accessor functions make the id of 0 explicit at index 0. -// The number of xids physically stored in the xids array is in -// the variable num_xids. -// -// The xids struct is immutable. The caller gets an initial version of XIDS -// by calling xids_get_root_xids(), which returns the constant struct -// representing the root transaction (id 0). When a transaction begins, -// a new XIDS is created with the id of the current transaction appended to -// the list. -// -// - - -// This is the xids list for a transactionless environment. -// It is also the initial state of any xids list created for -// nested transactions. - - -XIDS -xids_get_root_xids(void) { - static const struct xids_t root_xids = { - .num_xids = 0 - }; - - XIDS rval = (XIDS)&root_xids; - return rval; -} - -bool -xids_can_create_child(XIDS xids) { - invariant(xids->num_xids < MAX_TRANSACTION_RECORDS); - return (xids->num_xids + 1) != MAX_TRANSACTION_RECORDS; -} - - -int -xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p) { - // Postcondition: - // xids_p points to an xids that is an exact copy of parent_xids, but with room for one more xid. - int rval; - invariant(parent_xids); - uint32_t num_child_xids = parent_xids->num_xids + 1; - // assumes that caller has verified that num_child_xids will - // be less than MAX_TRANSACTIN_RECORDS - invariant(num_child_xids < MAX_TRANSACTION_RECORDS); - size_t new_size = sizeof(*parent_xids) + num_child_xids*sizeof(parent_xids->ids[0]); - XIDS CAST_FROM_VOIDP(xids, toku_xmalloc(new_size)); - // Clone everything (parent does not have the newest xid). - memcpy(xids, parent_xids, new_size - sizeof(xids->ids[0])); - *xids_p = xids; - rval = 0; - return rval; -} - -void -xids_finalize_with_child(XIDS xids, TXNID this_xid) { - // Precondition: - // - xids was created by xids_create_unknown_child - TXNID this_xid_disk = toku_htod64(this_xid); - uint32_t num_child_xids = ++xids->num_xids; - xids->ids[num_child_xids - 1] = this_xid_disk; -} - -// xids is immutable. This function creates a new xids by copying the -// parent's list and then appending the xid of the new transaction. -int -xids_create_child(XIDS parent_xids, // xids list for parent transaction - XIDS * xids_p, // xids list created - TXNID this_xid) { // xid of this transaction (new innermost) - bool can_create_child = xids_can_create_child(parent_xids); - if (!can_create_child) { - return EINVAL; - } - xids_create_unknown_child(parent_xids, xids_p); - xids_finalize_with_child(*xids_p, this_xid); - return 0; -} - -void -xids_create_from_buffer(struct rbuf *rb, // xids list for parent transaction - XIDS * xids_p) { // xids list created - uint8_t num_xids = rbuf_char(rb); - invariant(num_xids < MAX_TRANSACTION_RECORDS); - XIDS CAST_FROM_VOIDP(xids, toku_xmalloc(sizeof(*xids) + num_xids*sizeof(xids->ids[0]))); - xids->num_xids = num_xids; - uint8_t index; - for (index = 0; index < xids->num_xids; index++) { - rbuf_TXNID(rb, &xids->ids[index]); - } - *xids_p = xids; -} - - -void -xids_destroy(XIDS *xids_p) { - if (*xids_p != xids_get_root_xids()) toku_free(*xids_p); - *xids_p = NULL; -} - - -// Return xid at requested position. -// If requesting an xid out of range (which will be the case if xids array is empty) -// then return 0, the xid of the root transaction. -TXNID -xids_get_xid(XIDS xids, uint8_t index) { - invariant(index < xids_get_num_xids(xids)); - TXNID rval = xids->ids[index]; - rval = toku_dtoh64(rval); - return rval; -} - -uint8_t -xids_get_num_xids(XIDS xids) { - uint8_t rval = xids->num_xids; - return rval; -} - - -// Return innermost xid -TXNID -xids_get_innermost_xid(XIDS xids) { - TXNID rval = TXNID_NONE; - if (xids_get_num_xids(xids)) { - // if clause above makes this cast ok - uint8_t innermost_xid = (uint8_t)(xids_get_num_xids(xids)-1); - rval = xids_get_xid(xids, innermost_xid); - } - return rval; -} - -TXNID -xids_get_outermost_xid(XIDS xids) { - TXNID rval = TXNID_NONE; - if (xids_get_num_xids(xids)) - rval = xids_get_xid(xids, 0); - return rval; -} - -void -xids_cpy(XIDS target, XIDS source) { - size_t size = xids_get_size(source); - memcpy(target, source, size); -} - -// return size in bytes -uint32_t -xids_get_size(XIDS xids){ - uint32_t rval; - uint8_t num_xids = xids->num_xids; - rval = sizeof(*xids) + num_xids * sizeof(xids->ids[0]); - return rval; -} - -uint32_t -xids_get_serialize_size(XIDS xids){ - uint32_t rval; - uint8_t num_xids = xids->num_xids; - rval = 1 + //num xids - 8 * num_xids; - return rval; -} - - -unsigned char * -xids_get_end_of_array(XIDS xids) { - TXNID *r = xids->ids + xids->num_xids; - return (unsigned char*)r; -} - -void wbuf_nocrc_xids(struct wbuf *wb, XIDS xids) { - wbuf_nocrc_char(wb, (unsigned char)xids->num_xids); - uint8_t index; - for (index = 0; index < xids->num_xids; index++) { - wbuf_nocrc_TXNID(wb, xids->ids[index]); - } -} - -void -xids_fprintf(FILE* fp, XIDS xids) { - uint8_t index; - unsigned num_xids = xids_get_num_xids(xids); - fprintf(fp, "[|%u| ", num_xids); - for (index = 0; index < xids_get_num_xids(xids); index++) { - if (index) fprintf(fp, ","); - fprintf(fp, "%" PRIx64, xids_get_xid(xids, index)); - } - fprintf(fp, "]"); -} - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/xids.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/xids.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/xids.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/xids.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,151 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: - -/* Purpose of this file is to provide the world with everything necessary - * to use the xids and nothing else. - * Internal requirements of the xids logic do not belong here. - * - * xids is (abstractly) an immutable list of nested transaction ids, accessed only - * via the functions in this file. - * - * See design documentation for nested transactions at - * TokuWiki/Imp/TransactionsOverview. - */ - -#ifndef XIDS_H -#define XIDS_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "rbuf.h" -#include "wbuf.h" -#include "tokuconst.h" - -//Retrieve an XIDS representing the root transaction. -XIDS xids_get_root_xids(void); - -bool xids_can_create_child(XIDS xids); - -void xids_cpy(XIDS target, XIDS source); - -//Creates an XIDS representing this transaction. -//You must pass in an XIDS representing the parent of this transaction. -int xids_create_child(XIDS parent_xids, XIDS *xids_p, TXNID this_xid); - -// The following two functions (in order) are equivalent to xids_create child, -// but allow you to do most of the work without knowing the new xid. -int xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p); -void xids_finalize_with_child(XIDS xids, TXNID this_xid); - -void xids_create_from_buffer(struct rbuf *rb, XIDS * xids_p); - -void xids_destroy(XIDS *xids_p); - -TXNID xids_get_xid(XIDS xids, uint8_t index); - -uint8_t xids_get_num_xids(XIDS xids); - -TXNID xids_get_innermost_xid(XIDS xids); -TXNID xids_get_outermost_xid(XIDS xids); - -// return size in bytes -uint32_t xids_get_size(XIDS xids); - -uint32_t xids_get_serialize_size(XIDS xids); - -unsigned char *xids_get_end_of_array(XIDS xids); - -void wbuf_nocrc_xids(struct wbuf *wb, XIDS xids); - -void xids_fprintf(FILE* fp, XIDS xids); - - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/xids-internal.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/xids-internal.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/xids-internal.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/xids-internal.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,108 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: - -#ifndef XIDS_INTERNAL_H -#define XIDS_INTERNAL_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -// Variable size list of transaction ids (known in design doc as xids<>). -// ids[0] is the outermost transaction. -// ids[num_xids - 1] is the innermost transaction. -// Should only be accessed by accessor functions xids_xxx, not directly. - -// If the xids struct is unpacked, the compiler aligns the ids[] and we waste a lot of space -typedef struct __attribute__((__packed__)) xids_t { - uint8_t num_xids; // maximum value of MAX_TRANSACTION_RECORDS - 1 ... - // ... because transaction 0 is implicit - TXNID ids[]; -} XIDS_S; - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ybt.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ybt.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ybt.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ybt.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,350 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include -#include - -#include "ybt.h" - -DBT * -toku_init_dbt(DBT *ybt) { - memset(ybt, 0, sizeof(*ybt)); - return ybt; -} - -DBT * -toku_init_dbt_flags(DBT *ybt, uint32_t flags) { - toku_init_dbt(ybt); - ybt->flags = flags; - return ybt; -} - -DBT_ARRAY * -toku_dbt_array_init(DBT_ARRAY *dbts, uint32_t size) { - uint32_t capacity = 1; - while (capacity < size) { capacity *= 2; } - - XMALLOC_N(capacity, dbts->dbts); - for (uint32_t i = 0; i < capacity; i++) { - toku_init_dbt_flags(&dbts->dbts[i], DB_DBT_REALLOC); - } - dbts->size = size; - dbts->capacity = capacity; - return dbts; -} - -void -toku_dbt_array_resize(DBT_ARRAY *dbts, uint32_t size) { - if (size != dbts->size) { - if (size > dbts->capacity) { - const uint32_t old_capacity = dbts->capacity; - uint32_t new_capacity = dbts->capacity; - while (new_capacity < size) { - new_capacity *= 2; - } - dbts->capacity = new_capacity; - XREALLOC_N(new_capacity, dbts->dbts); - for (uint32_t i = old_capacity; i < new_capacity; i++) { - toku_init_dbt_flags(&dbts->dbts[i], DB_DBT_REALLOC); - } - } else if (size < dbts->size) { - if (dbts->capacity >= 8 && size < dbts->capacity / 4) { - const int old_capacity = dbts->capacity; - const int new_capacity = dbts->capacity / 2; - for (int i = new_capacity; i < old_capacity; i++) { - toku_destroy_dbt(&dbts->dbts[i]); - } - XREALLOC_N(new_capacity, dbts->dbts); - dbts->capacity = new_capacity; - } - } - dbts->size = size; - } -} - -void -toku_dbt_array_destroy_shallow(DBT_ARRAY *dbts) { - toku_free(dbts->dbts); - ZERO_STRUCT(*dbts); -} - -void -toku_dbt_array_destroy(DBT_ARRAY *dbts) { - for (uint32_t i = 0; i < dbts->capacity; i++) { - toku_destroy_dbt(&dbts->dbts[i]); - } - toku_dbt_array_destroy_shallow(dbts); -} - - - -void -toku_destroy_dbt(DBT *dbt) { - switch (dbt->flags) { - case DB_DBT_MALLOC: - case DB_DBT_REALLOC: - toku_free(dbt->data); - toku_init_dbt(dbt); - break; - } -} - -DBT * -toku_fill_dbt(DBT *dbt, bytevec k, ITEMLEN len) { - toku_init_dbt(dbt); - dbt->size=len; - dbt->data=(char*)k; - return dbt; -} - -DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len) { - toku_init_dbt_flags(dbt, DB_DBT_MALLOC); - dbt->size = len; - dbt->data = toku_xmemdup(k, len); - return dbt; -} - -DBT *toku_copyref_dbt(DBT *dst, const DBT src) { - dst->flags = 0; - dst->ulen = 0; - dst->size = src.size; - dst->data = src.data; - return dst; -} - -DBT *toku_copy_dbt(DBT *dst, const DBT &src) { - dst->flags = src.flags; - dst->ulen = src.ulen; - dst->size = src.size; - dst->data = src.data; - return dst; -} - -DBT *toku_clone_dbt(DBT *dst, const DBT &src) { - return toku_memdup_dbt(dst, src.data, src.size); -} - -void -toku_sdbt_cleanup(struct simple_dbt *sdbt) { - if (sdbt->data) toku_free(sdbt->data); - memset(sdbt, 0, sizeof(*sdbt)); -} - -static inline int -sdbt_realloc(struct simple_dbt *sdbt) { - void *new_data = toku_realloc(sdbt->data, sdbt->len); - int r; - if (new_data == NULL) { - r = get_error_errno(); - } else { - sdbt->data = new_data; - r = 0; - } - return r; -} - -static inline int -dbt_realloc(DBT *dbt) { - void *new_data = toku_realloc(dbt->data, dbt->ulen); - int r; - if (new_data == NULL) { - r = get_error_errno(); - } else { - dbt->data = new_data; - r = 0; - } - return r; -} - -int -toku_dbt_set (ITEMLEN len, bytevec val, DBT *d, struct simple_dbt *sdbt) { -// sdbt is the static value used when flags==0 -// Otherwise malloc or use the user-supplied memory, as according to the flags in d->flags. - int r; - if (!d) r = 0; - else { - switch (d->flags) { - case (DB_DBT_USERMEM): - d->size = len; - if (d->ulendata, val, len); - r = 0; - } - break; - case (DB_DBT_MALLOC): - d->data = NULL; - d->ulen = 0; - //Fall through to DB_DBT_REALLOC - case (DB_DBT_REALLOC): - if (d->ulen < len) { - d->ulen = len*2; - r = dbt_realloc(d); - } - else if (d->ulen > 16 && d->ulen > len*4) { - d->ulen = len*2 < 16 ? 16 : len*2; - r = dbt_realloc(d); - } - else if (d->data==NULL) { - d->ulen = len; - r = dbt_realloc(d); - } - else r=0; - - if (r==0) { - memcpy(d->data, val, len); - d->size = len; - } - break; - case (0): - if (sdbt->len < len) { - sdbt->len = len*2; - r = sdbt_realloc(sdbt); - } - else if (sdbt->len > 16 && sdbt->len > len*4) { - sdbt->len = len*2 < 16 ? 16 : len*2; - r = sdbt_realloc(sdbt); - } - else r=0; - - if (r==0) { - memcpy(sdbt->data, val, len); - d->data = sdbt->data; - d->size = len; - } - break; - default: - r = EINVAL; - break; - } - } - return r; -} - -const DBT *toku_dbt_positive_infinity(void) { - static DBT positive_infinity_dbt = {}; - return &positive_infinity_dbt; -} - -const DBT *toku_dbt_negative_infinity(void) { - static DBT negative_infinity_dbt = {}; - return &negative_infinity_dbt; -} - -bool toku_dbt_is_infinite(const DBT *dbt) { - return dbt == toku_dbt_positive_infinity() || dbt == toku_dbt_negative_infinity(); -} - -int toku_dbt_infinite_compare(const DBT *a, const DBT *b) { - if (a == b) { - return 0; - } else if (a == toku_dbt_positive_infinity()) { - return 1; - } else if (b == toku_dbt_positive_infinity()) { - return -1; - } else if (a == toku_dbt_negative_infinity()) { - return -1; - } else { - invariant(b == toku_dbt_negative_infinity()); - return 1; - } -} - -bool toku_dbt_equals(const DBT *a, const DBT *b) { - if (!toku_dbt_is_infinite(a) && !toku_dbt_is_infinite(b)) { - return a->data == b->data && a->size == b->size; - } else { - // a or b is infinite, so they're equal if they are the same infinite - return a == b ? true : false; - } -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ybt.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ybt.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/ft/ybt.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/ft/ybt.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,141 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_YBT_H -#define TOKU_YBT_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -// fttypes.h must be first to make 64-bit file mode work right in linux. -#include "fttypes.h" -#include - -// TODO: John -// Document this API a little better so that DBT -// memory management can be morm widely understood. - -DBT *toku_init_dbt(DBT *); - -DBT *toku_init_dbt_flags(DBT *, uint32_t flags); - -void toku_destroy_dbt(DBT *); - -DBT *toku_fill_dbt(DBT *dbt, bytevec k, ITEMLEN len); - -DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len); - -DBT *toku_copyref_dbt(DBT *dst, const DBT src); - -DBT *toku_copy_dbt(DBT *dst, const DBT &src); - -DBT *toku_clone_dbt(DBT *dst, const DBT &src); - -int toku_dbt_set(ITEMLEN len, bytevec val, DBT *d, struct simple_dbt *sdbt); - -int toku_dbt_set_value(DBT *, bytevec *val, ITEMLEN vallen, void **staticptrp, bool ybt1_disposable); - -void toku_sdbt_cleanup(struct simple_dbt *sdbt); - -// returns: special DBT pointer representing positive infinity -const DBT *toku_dbt_positive_infinity(void); - -// returns: special DBT pointer representing negative infinity -const DBT *toku_dbt_negative_infinity(void); - -// returns: true if the given dbt is either positive or negative infinity -bool toku_dbt_is_infinite(const DBT *dbt); - -// effect: compares two potentially infinity-valued dbts -// requires: at least one is infinite (assert otherwise) -int toku_dbt_infinite_compare(const DBT *a, const DBT *b); - -// returns: true if the given dbts have the same data pointer and size -bool toku_dbt_equals(const DBT *a, const DBT *b); - -#endif /* TOKU_YBT_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/concurrent_tree.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/concurrent_tree.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/concurrent_tree.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/concurrent_tree.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ #include -void concurrent_tree::create(comparator *cmp) { +void concurrent_tree::create(const comparator *cmp) { // start with an empty root node. we do this instead of // setting m_root to null so there's always a root to lock m_root.create_root(cmp); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/concurrent_tree.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/concurrent_tree.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/concurrent_tree.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/concurrent_tree.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef CONCURRENT_TREE_H -#define CONCURRENT_TREE_H - #include #include "treenode.h" @@ -174,7 +173,7 @@ }; // effect: initialize the tree to an empty state - void create(comparator *cmp); + void create(const comparator *cmp); // effect: destroy the tree. // requires: tree is empty @@ -203,5 +202,3 @@ #include "concurrent_tree.cc" } /* namespace toku */ - -#endif /* CONCURRENT_TREE_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/keyrange.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/keyrange.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/keyrange.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/keyrange.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,165 +91,165 @@ #include "keyrange.h" -#include +#include namespace toku { -// create a keyrange by borrowing the left and right dbt -// pointers. no memory is copied. no checks for infinity needed. -void keyrange::create(const DBT *left, const DBT *right) { - init_empty(); - m_left_key = left; - m_right_key = right; -} - -// destroy the key copies. if they were never set, then destroy does nothing. -void keyrange::destroy(void) { - toku_destroy_dbt(&m_left_key_copy); - toku_destroy_dbt(&m_right_key_copy); -} - -// create a keyrange by copying the keys from the given range. -void keyrange::create_copy(const keyrange &range) { - // start with an initialized, empty range - init_empty(); - - // optimize the case where the left and right keys are the same. - // we'd like to only have one copy of the data. - if (toku_dbt_equals(range.get_left_key(), range.get_right_key())) { - set_both_keys(range.get_left_key()); - } else { - // replace our empty left and right keys with - // copies of the range's left and right keys - replace_left_key(range.get_left_key()); - replace_right_key(range.get_right_key()); - } -} - -// extend this keyrange by choosing the leftmost and rightmost -// endpoints between this range and the given. replaced keys -// in this range are freed and inherited keys are copied. -void keyrange::extend(comparator *cmp, const keyrange &range) { - const DBT *range_left = range.get_left_key(); - const DBT *range_right = range.get_right_key(); - if (cmp->compare(range_left, get_left_key()) < 0) { - replace_left_key(range_left); - } - if (cmp->compare(range_right, get_right_key()) > 0) { - replace_right_key(range_right); - } -} - -// how much memory does this keyrange take? -// - the size of the left and right keys -// --- ignore the fact that we may have optimized the point case. -// it complicates things for little gain. -// - the size of the keyrange class itself -uint64_t keyrange::get_memory_size(void) const { - const DBT *left_key = get_left_key(); - const DBT *right_key = get_right_key(); - return left_key->size + right_key->size + sizeof(keyrange); -} - -// compare ranges. -keyrange::comparison keyrange::compare(comparator *cmp, const keyrange &range) const { - if (cmp->compare(get_right_key(), range.get_left_key()) < 0) { - return comparison::LESS_THAN; - } else if (cmp->compare(get_left_key(), range.get_right_key()) > 0) { - return comparison::GREATER_THAN; - } else if (cmp->compare(get_left_key(), range.get_left_key()) == 0 && - cmp->compare(get_right_key(), range.get_right_key()) == 0) { - return comparison::EQUALS; - } else { - return comparison::OVERLAPS; - } -} - -bool keyrange::overlaps(comparator *cmp, const keyrange &range) const { - // equality is a stronger form of overlapping. - // so two ranges "overlap" if they're either equal or just overlapping. - comparison c = compare(cmp, range); - return c == comparison::EQUALS || c == comparison::OVERLAPS; -} - -keyrange keyrange::get_infinite_range(void) { - keyrange range; - range.create(toku_dbt_negative_infinity(), toku_dbt_positive_infinity()); - return range; -} - -void keyrange::init_empty(void) { - m_left_key = nullptr; - m_right_key = nullptr; - toku_init_dbt(&m_left_key_copy); - toku_init_dbt(&m_right_key_copy); - m_point_range = false; -} - -const DBT *keyrange::get_left_key(void) const { - if (m_left_key) { - return m_left_key; - } else { - return &m_left_key_copy; - } -} - -const DBT *keyrange::get_right_key(void) const { - if (m_right_key) { - return m_right_key; - } else { - return &m_right_key_copy; - } -} - -// copy the given once and set both the left and right pointers. -// optimization for point ranges, so the left and right ranges -// are not copied twice. -void keyrange::set_both_keys(const DBT *key) { - if (toku_dbt_is_infinite(key)) { - m_left_key = key; - m_right_key = key; - } else { - toku_clone_dbt(&m_left_key_copy, *key); - toku_copyref_dbt(&m_right_key_copy, m_left_key_copy); - } - m_point_range = true; -} - -// destroy the current left key. set and possibly copy the new one -void keyrange::replace_left_key(const DBT *key) { - // a little magic: - // - // if this is a point range, then the left and right keys share - // one copy of the data, and it lives in the left key copy. so - // if we're replacing the left key, move the real data to the - // right key copy instead of destroying it. now, the memory is - // owned by the right key and the left key may be replaced. - if (m_point_range) { - m_right_key_copy = m_left_key_copy; - } else { + // create a keyrange by borrowing the left and right dbt + // pointers. no memory is copied. no checks for infinity needed. + void keyrange::create(const DBT *left, const DBT *right) { + init_empty(); + m_left_key = left; + m_right_key = right; + } + + // destroy the key copies. if they were never set, then destroy does nothing. + void keyrange::destroy(void) { toku_destroy_dbt(&m_left_key_copy); + toku_destroy_dbt(&m_right_key_copy); } - if (toku_dbt_is_infinite(key)) { - m_left_key = key; - } else { - toku_clone_dbt(&m_left_key_copy, *key); - m_left_key = nullptr; + // create a keyrange by copying the keys from the given range. + void keyrange::create_copy(const keyrange &range) { + // start with an initialized, empty range + init_empty(); + + // optimize the case where the left and right keys are the same. + // we'd like to only have one copy of the data. + if (toku_dbt_equals(range.get_left_key(), range.get_right_key())) { + set_both_keys(range.get_left_key()); + } else { + // replace our empty left and right keys with + // copies of the range's left and right keys + replace_left_key(range.get_left_key()); + replace_right_key(range.get_right_key()); + } + } + + // extend this keyrange by choosing the leftmost and rightmost + // endpoints between this range and the given. replaced keys + // in this range are freed and inherited keys are copied. + void keyrange::extend(const comparator &cmp, const keyrange &range) { + const DBT *range_left = range.get_left_key(); + const DBT *range_right = range.get_right_key(); + if (cmp(range_left, get_left_key()) < 0) { + replace_left_key(range_left); + } + if (cmp(range_right, get_right_key()) > 0) { + replace_right_key(range_right); + } + } + + // how much memory does this keyrange take? + // - the size of the left and right keys + // --- ignore the fact that we may have optimized the point case. + // it complicates things for little gain. + // - the size of the keyrange class itself + uint64_t keyrange::get_memory_size(void) const { + const DBT *left_key = get_left_key(); + const DBT *right_key = get_right_key(); + return left_key->size + right_key->size + sizeof(keyrange); + } + + // compare ranges. + keyrange::comparison keyrange::compare(const comparator &cmp, const keyrange &range) const { + if (cmp(get_right_key(), range.get_left_key()) < 0) { + return comparison::LESS_THAN; + } else if (cmp(get_left_key(), range.get_right_key()) > 0) { + return comparison::GREATER_THAN; + } else if (cmp(get_left_key(), range.get_left_key()) == 0 && + cmp(get_right_key(), range.get_right_key()) == 0) { + return comparison::EQUALS; + } else { + return comparison::OVERLAPS; + } + } + + bool keyrange::overlaps(const comparator &cmp, const keyrange &range) const { + // equality is a stronger form of overlapping. + // so two ranges "overlap" if they're either equal or just overlapping. + comparison c = compare(cmp, range); + return c == comparison::EQUALS || c == comparison::OVERLAPS; + } + + keyrange keyrange::get_infinite_range(void) { + keyrange range; + range.create(toku_dbt_negative_infinity(), toku_dbt_positive_infinity()); + return range; } - m_point_range = false; -} -// destroy the current right key. set and possibly copy the new one -void keyrange::replace_right_key(const DBT *key) { - toku_destroy_dbt(&m_right_key_copy); - if (toku_dbt_is_infinite(key)) { - m_right_key = key; - } else { - toku_clone_dbt(&m_right_key_copy, *key); + void keyrange::init_empty(void) { + m_left_key = nullptr; m_right_key = nullptr; + toku_init_dbt(&m_left_key_copy); + toku_init_dbt(&m_right_key_copy); + m_point_range = false; + } + + const DBT *keyrange::get_left_key(void) const { + if (m_left_key) { + return m_left_key; + } else { + return &m_left_key_copy; + } + } + + const DBT *keyrange::get_right_key(void) const { + if (m_right_key) { + return m_right_key; + } else { + return &m_right_key_copy; + } + } + + // copy the given once and set both the left and right pointers. + // optimization for point ranges, so the left and right ranges + // are not copied twice. + void keyrange::set_both_keys(const DBT *key) { + if (toku_dbt_is_infinite(key)) { + m_left_key = key; + m_right_key = key; + } else { + toku_clone_dbt(&m_left_key_copy, *key); + toku_copyref_dbt(&m_right_key_copy, m_left_key_copy); + } + m_point_range = true; + } + + // destroy the current left key. set and possibly copy the new one + void keyrange::replace_left_key(const DBT *key) { + // a little magic: + // + // if this is a point range, then the left and right keys share + // one copy of the data, and it lives in the left key copy. so + // if we're replacing the left key, move the real data to the + // right key copy instead of destroying it. now, the memory is + // owned by the right key and the left key may be replaced. + if (m_point_range) { + m_right_key_copy = m_left_key_copy; + } else { + toku_destroy_dbt(&m_left_key_copy); + } + + if (toku_dbt_is_infinite(key)) { + m_left_key = key; + } else { + toku_clone_dbt(&m_left_key_copy, *key); + m_left_key = nullptr; + } + m_point_range = false; + } + + // destroy the current right key. set and possibly copy the new one + void keyrange::replace_right_key(const DBT *key) { + toku_destroy_dbt(&m_right_key_copy); + if (toku_dbt_is_infinite(key)) { + m_right_key = key; + } else { + toku_clone_dbt(&m_right_key_copy, *key); + m_right_key = nullptr; + } + m_point_range = false; } - m_point_range = false; -} } /* namespace toku */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/keyrange.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/keyrange.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/keyrange.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/keyrange.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef KEYRANGE_H -#define KEYRANGE_H - #include namespace toku { @@ -118,7 +117,7 @@ // effect: extends the keyrange by choosing the leftmost and rightmost // endpoints from this range and the given range. // replaced keys in this range are freed, new keys are copied. - void extend(comparator *cmp, const keyrange &range); + void extend(const comparator &cmp, const keyrange &range); // returns: the amount of memory this keyrange takes. does not account // for point optimizations or malloc overhead. @@ -144,10 +143,10 @@ // EQUALS if given range has the same left and right endpoints // OVERLAPS if at least one of the given range's endpoints falls // between this range's endpoints - comparison compare(comparator *cmp, const keyrange &range) const; + comparison compare(const comparator &cmp, const keyrange &range) const; // returns: true if the range and the given range are equal or overlapping - bool overlaps(comparator *cmp, const keyrange &range) const; + bool overlaps(const comparator &cmp, const keyrange &range) const; // returns: a keyrange representing -inf, +inf static keyrange get_infinite_range(void); @@ -184,5 +183,3 @@ }; } /* namespace toku */ - -#endif /* KEYRANGE_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/lock_request.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/lock_request.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/lock_request.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/lock_request.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +89,12 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include +#include "portability/toku_race_tools.h" -#include - -#include "locktree.h" -#include "lock_request.h" +#include "ft/txn/txn.h" +#include "locktree/locktree.h" +#include "locktree/lock_request.h" +#include "util/dbt.h" namespace toku { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/lock_request.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/lock_request.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/lock_request.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/lock_request.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,21 +86,19 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_LOCK_REQUEST_H -#define TOKU_LOCK_REQUEST_H - #include -#include -#include -#include +#include "portability/toku_pthread.h" -#include "locktree.h" -#include "txnid_set.h" -#include "wfg.h" +#include "locktree/locktree.h" +#include "locktree/txnid_set.h" +#include "locktree/wfg.h" +#include "ft/comparator.h" namespace toku { @@ -243,5 +241,3 @@ ENSURE_POD(lock_request); } /* namespace toku */ - -#endif /* TOKU_LOCK_REQUEST_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/locktree.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/locktree.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/locktree.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/locktree.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,19 +116,16 @@ // but does nothing based on the value of the reference count - it is // up to the user of the locktree to destroy it when it sees fit. -void locktree::create(locktree_manager *mgr, DICTIONARY_ID dict_id, - DESCRIPTOR desc, ft_compare_func cmp) { +void locktree::create(locktree_manager *mgr, DICTIONARY_ID dict_id, const comparator &cmp) { m_mgr = mgr; m_dict_id = dict_id; - // the only reason m_cmp is malloc'd here is to prevent gdb from printing - // out an entire DB struct every time you inspect a locktree. - XCALLOC(m_cmp); - m_cmp->create(cmp, desc); + m_cmp.create_from(cmp); m_reference_count = 1; m_userdata = nullptr; + XCALLOC(m_rangetree); - m_rangetree->create(m_cmp); + m_rangetree->create(&m_cmp); m_sto_txnid = TXNID_NONE; m_sto_buffer.create(); @@ -155,11 +152,10 @@ void locktree::destroy(void) { invariant(m_reference_count == 0); + m_cmp.destroy(); m_rangetree->destroy(); - toku_free(m_cmp); toku_free(m_rangetree); m_sto_buffer.destroy(); - m_lock_request_info.pending_lock_requests.destroy(); } @@ -258,18 +254,18 @@ keyrange range; range.create(left_key, right_key); - buffer_mem = m_sto_buffer.get_num_bytes(); + buffer_mem = m_sto_buffer.total_memory_size(); m_sto_buffer.append(left_key, right_key); - delta = m_sto_buffer.get_num_bytes() - buffer_mem; + delta = m_sto_buffer.total_memory_size() - buffer_mem; if (m_mgr != nullptr) { m_mgr->note_mem_used(delta); } } void locktree::sto_end(void) { - uint64_t num_bytes = m_sto_buffer.get_num_bytes(); + uint64_t mem_size = m_sto_buffer.total_memory_size(); if (m_mgr != nullptr) { - m_mgr->note_mem_released(num_bytes); + m_mgr->note_mem_released(mem_size); } m_sto_buffer.destroy(); m_sto_buffer.create(); @@ -299,12 +295,11 @@ concurrent_tree sto_rangetree; concurrent_tree::locked_keyrange sto_lkr; - sto_rangetree.create(m_cmp); + sto_rangetree.create(&m_cmp); // insert all of the ranges from the single txnid buffer into a new rangtree - range_buffer::iterator iter; + range_buffer::iterator iter(&m_sto_buffer); range_buffer::iterator::record rec; - iter.create(&m_sto_buffer); while (iter.current(&rec)) { sto_lkr.prepare(&sto_rangetree); int r = acquire_lock_consolidated(&sto_lkr, @@ -439,7 +434,7 @@ txnid_set *conflicts, bool big_txn) { // All ranges in the locktree must have left endpoints <= right endpoints. // Range comparisons rely on this fact, so we make a paranoid invariant here. - paranoid_invariant(m_cmp->compare(left_key, right_key) <= 0); + paranoid_invariant(m_cmp(left_key, right_key) <= 0); int r = m_mgr == nullptr ? 0 : m_mgr->check_current_lock_constraints(big_txn); if (r == 0) { @@ -575,15 +570,14 @@ // locks are already released, otherwise we need to do it here. bool released = sto_try_release(txnid); if (!released) { - range_buffer::iterator iter; + range_buffer::iterator iter(ranges); range_buffer::iterator::record rec; - iter.create(ranges); while (iter.current(&rec)) { const DBT *left_key = rec.get_left_key(); const DBT *right_key = rec.get_right_key(); // All ranges in the locktree must have left endpoints <= right endpoints. // Range comparisons rely on this fact, so we make a paranoid invariant here. - paranoid_invariant(m_cmp->compare(left_key, right_key) <= 0); + paranoid_invariant(m_cmp(left_key, right_key) <= 0); remove_overlapping_locks_for_txnid(txnid, left_key, right_key); iter.next(); } @@ -647,10 +641,10 @@ TXNID txnid; range_buffer buffer; - static int find_by_txnid(const struct txnid_range_buffer &other_buffer, const TXNID &txnid) { - if (txnid < other_buffer.txnid) { + static int find_by_txnid(struct txnid_range_buffer *const &other_buffer, const TXNID &txnid) { + if (txnid < other_buffer->txnid) { return -1; - } else if (other_buffer.txnid == txnid) { + } else if (other_buffer->txnid == txnid) { return 0; } else { return 1; @@ -666,7 +660,7 @@ // has locks in a random/alternating order, then this does // not work so well. void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_escalate_callback_extra) { - omt range_buffers; + omt range_buffers; range_buffers.create(); // prepare and acquire a locked keyrange on the entire locktree @@ -716,7 +710,6 @@ // Try to find a range buffer for the current txnid. Create one if it doesn't exist. // Then, append the new escalated range to the buffer. uint32_t idx; - struct txnid_range_buffer new_range_buffer; struct txnid_range_buffer *existing_range_buffer; int r = range_buffers.find_zero( current_txnid, @@ -724,9 +717,10 @@ &idx ); if (r == DB_NOTFOUND) { - new_range_buffer.txnid = current_txnid; - new_range_buffer.buffer.create(); - new_range_buffer.buffer.append(escalated_left_key, escalated_right_key); + struct txnid_range_buffer *XMALLOC(new_range_buffer); + new_range_buffer->txnid = current_txnid; + new_range_buffer->buffer.create(); + new_range_buffer->buffer.append(escalated_left_key, escalated_right_key); range_buffers.insert_at(new_range_buffer, idx); } else { invariant_zero(r); @@ -754,9 +748,8 @@ invariant_zero(r); const TXNID current_txnid = current_range_buffer->txnid; - range_buffer::iterator iter; + range_buffer::iterator iter(¤t_range_buffer->buffer); range_buffer::iterator::record rec; - iter.create(¤t_range_buffer->buffer); while (iter.current(&rec)) { keyrange range; range.create(rec.get_left_key(), rec.get_right_key()); @@ -771,6 +764,15 @@ } current_range_buffer->buffer.destroy(); } + + while (range_buffers.size() > 0) { + struct txnid_range_buffer *buffer; + int r = range_buffers.fetch(0, &buffer); + invariant_zero(r); + r = range_buffers.delete_at(0); + invariant_zero(r); + toku_free(buffer); + } range_buffers.destroy(); lkr.release(); @@ -788,8 +790,8 @@ return &m_lock_request_info; } -void locktree::set_descriptor(DESCRIPTOR desc) { - m_cmp->set_descriptor(desc); +void locktree::set_comparator(const comparator &cmp) { + m_cmp.inherit(cmp); } locktree_manager *locktree::get_manager(void) const { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/locktree.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/locktree.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/locktree.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/locktree.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #include #include -#include +#include // just for DICTIONARY_ID.. #include #include @@ -137,7 +137,6 @@ class locktree; class locktree_manager; class lock_request; - class memory_tracker; class concurrent_tree; typedef int (*lt_create_cb)(locktree *lt, void *extra); @@ -184,10 +183,10 @@ // effect: Get a locktree from the manager. If a locktree exists with the given // dict_id, it is referenced and then returned. If one did not exist, it - // is created. It will use the given descriptor and comparison function - // for comparing keys, and the on_create callback passed to locktree_manager::create() - // will be called with the given extra parameter. - locktree *get_lt(DICTIONARY_ID dict_id, DESCRIPTOR desc, ft_compare_func cmp, void *on_create_extra); + // is created. It will use the comparator for comparing keys. The on_create + // callback (passed to locktree_manager::create()) will be called with the + // given extra parameter. + locktree *get_lt(DICTIONARY_ID dict_id, const comparator &cmp, void *on_create_extra); void reference_lt(locktree *lt); @@ -246,7 +245,6 @@ // tracks the current number of locks and lock memory uint64_t m_max_lock_memory; uint64_t m_current_lock_memory; - memory_tracker *m_mem_tracker; struct lt_counters m_lt_counters; @@ -309,8 +307,7 @@ // A locktree represents the set of row locks owned by all transactions // over an open dictionary. Read and write ranges are represented as - // a left and right key which are compared with the given descriptor - // and comparison fn. + // a left and right key which are compared with the given comparator // // Locktrees are not created and destroyed by the user. Instead, they are // referenced and released using the locktree manager. @@ -325,10 +322,8 @@ // - Destroy the manager. class locktree { public: - // effect: Creates a locktree that uses the given memory tracker - // to report memory usage and honor memory constraints. - void create(locktree_manager *mgr, DICTIONARY_ID dict_id, - DESCRIPTOR desc, ft_compare_func cmp); + // effect: Creates a locktree + void create(locktree_manager *mgr, DICTIONARY_ID dict_id, const comparator &cmp); void destroy(void); @@ -374,7 +369,7 @@ locktree_manager *get_manager(void) const; - void set_descriptor(DESCRIPTOR desc); + void set_comparator(const comparator &cmp); int compare(const locktree *lt) const; @@ -392,16 +387,14 @@ DICTIONARY_ID m_dict_id; uint32_t m_reference_count; - // use a comparator object that encapsulates an ft compare - // function and a descriptor in a fake db. this way we can - // pass it around for easy key comparisons. + // Since the memory referenced by this comparator is not owned by the + // locktree, the user must guarantee it will outlive the locktree. // - // since this comparator will store a pointer to a descriptor, - // the user of the locktree needs to make sure that the descriptor - // is valid for as long as the locktree. this is currently - // implemented by opening an ft_handle for this locktree and - // storing it as userdata below. - comparator *m_cmp; + // The ydb API accomplishes this by opening an ft_handle in the on_create + // callback, which will keep the underlying FT (and its descriptor) in memory + // for as long as the handle is open. The ft_handle is stored opaquely in the + // userdata pointer below. see locktree_manager::get_lt w/ on_create_extra + comparator m_cmp; concurrent_tree *m_rangetree; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/manager.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/manager.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/manager.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/manager.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -183,8 +183,8 @@ invariant_zero(r); } -locktree *locktree_manager::get_lt(DICTIONARY_ID dict_id, DESCRIPTOR desc, - ft_compare_func cmp, void *on_create_extra) { +locktree *locktree_manager::get_lt(DICTIONARY_ID dict_id, + const comparator &cmp, void *on_create_extra) { // hold the mutex around searching and maybe // inserting into the locktree map @@ -193,7 +193,7 @@ locktree *lt = locktree_map_find(dict_id); if (lt == nullptr) { XCALLOC(lt); - lt->create(this, dict_id, desc, cmp); + lt->create(this, dict_id, cmp); // new locktree created - call the on_create callback // and put it in the locktree map @@ -483,7 +483,7 @@ mgr->add_escalator_wait_time(t1 - t0); } -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(status, k, c, t, "locktree: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(status, k, c, t, "locktree: " l, inc) void locktree_manager::status_init(void) { STATUS_INIT(LTM_SIZE_CURRENT, LOCKTREE_MEMORY_SIZE, UINT64, "memory size", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); @@ -530,33 +530,32 @@ STATUS_VALUE(LTM_LONG_WAIT_ESCALATION_COUNT) = m_long_wait_escalation_count; STATUS_VALUE(LTM_LONG_WAIT_ESCALATION_TIME) = m_long_wait_escalation_time; - mutex_lock(); - uint64_t lock_requests_pending = 0; uint64_t sto_num_eligible = 0; uint64_t sto_end_early_count = 0; tokutime_t sto_end_early_time = 0; + size_t num_locktrees = 0; + struct lt_counters lt_counters = {}; - struct lt_counters lt_counters = m_lt_counters; - - size_t num_locktrees = m_locktree_map.size(); - for (size_t i = 0; i < num_locktrees; i++) { - locktree *lt; - int r = m_locktree_map.fetch(i, <); - invariant_zero(r); - - toku_mutex_lock(<->m_lock_request_info.mutex); - lock_requests_pending += lt->m_lock_request_info.pending_lock_requests.size(); - lt_counters.add(lt->get_lock_request_info()->counters); - toku_mutex_unlock(<->m_lock_request_info.mutex); - - sto_num_eligible += lt->sto_txnid_is_valid_unsafe() ? 1 : 0; - sto_end_early_count += lt->m_sto_end_early_count; - sto_end_early_time += lt->m_sto_end_early_time; + if (toku_mutex_trylock(&m_mutex) == 0) { + lt_counters = m_lt_counters; + num_locktrees = m_locktree_map.size(); + for (size_t i = 0; i < num_locktrees; i++) { + locktree *lt; + int r = m_locktree_map.fetch(i, <); + invariant_zero(r); + if (toku_mutex_trylock(<->m_lock_request_info.mutex) == 0) { + lock_requests_pending += lt->m_lock_request_info.pending_lock_requests.size(); + lt_counters.add(lt->get_lock_request_info()->counters); + toku_mutex_unlock(<->m_lock_request_info.mutex); + } + sto_num_eligible += lt->sto_txnid_is_valid_unsafe() ? 1 : 0; + sto_end_early_count += lt->m_sto_end_early_count; + sto_end_early_time += lt->m_sto_end_early_time; + } + mutex_unlock(); } - mutex_unlock(); - STATUS_VALUE(LTM_NUM_LOCKTREES) = num_locktrees; STATUS_VALUE(LTM_LOCK_REQUESTS_PENDING) = lock_requests_pending; STATUS_VALUE(LTM_STO_NUM_ELIGIBLE) = sto_num_eligible; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/range_buffer.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/range_buffer.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/range_buffer.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/range_buffer.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,215 +89,210 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include #include -#include -#include "range_buffer.h" +#include "portability/memory.h" + +#include "locktree/range_buffer.h" +#include "util/dbt.h" namespace toku { -bool range_buffer::record_header::left_is_infinite(void) const { - return left_neg_inf || left_pos_inf; -} - -bool range_buffer::record_header::right_is_infinite(void) const { - return right_neg_inf || right_pos_inf; -} - -void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key) { - left_neg_inf = left_key == toku_dbt_negative_infinity(); - left_pos_inf = left_key == toku_dbt_positive_infinity(); - left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size; - if (right_key) { - right_neg_inf = right_key == toku_dbt_negative_infinity(); - right_pos_inf = right_key == toku_dbt_positive_infinity(); - right_key_size = toku_dbt_is_infinite(right_key) ? 0 : right_key->size; - } else { - right_neg_inf = left_neg_inf; - right_pos_inf = left_pos_inf; - right_key_size = 0; - } -} - -const DBT *range_buffer::iterator::record::get_left_key(void) const { - if (m_header.left_neg_inf) { - return toku_dbt_negative_infinity(); - } else if (m_header.left_pos_inf) { - return toku_dbt_positive_infinity(); - } else { - return &m_left_key; - } -} - -const DBT *range_buffer::iterator::record::get_right_key(void) const { - if (m_header.right_neg_inf) { - return toku_dbt_negative_infinity(); - } else if (m_header.right_pos_inf) { - return toku_dbt_positive_infinity(); - } else { - return &m_right_key; - } -} - -size_t range_buffer::iterator::record::size(void) const { - return sizeof(record_header) + m_header.left_key_size + m_header.right_key_size; -} - -void range_buffer::iterator::record::deserialize(const char *buf) { - size_t current = 0; - - // deserialize the header - memcpy(&m_header, buf, sizeof(record_header)); - current += sizeof(record_header); - - // deserialize the left key if necessary - if (!m_header.left_is_infinite()) { - // point the left DBT's buffer into ours - toku_fill_dbt(&m_left_key, buf + current, m_header.left_key_size); - current += m_header.left_key_size; - } - - // deserialize the right key if necessary - if (!m_header.right_is_infinite()) { - if (m_header.right_key_size == 0) { - toku_copyref_dbt(&m_right_key, m_left_key); + bool range_buffer::record_header::left_is_infinite(void) const { + return left_neg_inf || left_pos_inf; + } + + bool range_buffer::record_header::right_is_infinite(void) const { + return right_neg_inf || right_pos_inf; + } + + void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key) { + left_neg_inf = left_key == toku_dbt_negative_infinity(); + left_pos_inf = left_key == toku_dbt_positive_infinity(); + left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size; + if (right_key) { + right_neg_inf = right_key == toku_dbt_negative_infinity(); + right_pos_inf = right_key == toku_dbt_positive_infinity(); + right_key_size = toku_dbt_is_infinite(right_key) ? 0 : right_key->size; + } else { + right_neg_inf = left_neg_inf; + right_pos_inf = left_pos_inf; + right_key_size = 0; + } + } + + const DBT *range_buffer::iterator::record::get_left_key(void) const { + if (_header.left_neg_inf) { + return toku_dbt_negative_infinity(); + } else if (_header.left_pos_inf) { + return toku_dbt_positive_infinity(); + } else { + return &_left_key; + } + } + + const DBT *range_buffer::iterator::record::get_right_key(void) const { + if (_header.right_neg_inf) { + return toku_dbt_negative_infinity(); + } else if (_header.right_pos_inf) { + return toku_dbt_positive_infinity(); + } else { + return &_right_key; + } + } + + size_t range_buffer::iterator::record::size(void) const { + return sizeof(record_header) + _header.left_key_size + _header.right_key_size; + } + + void range_buffer::iterator::record::deserialize(const char *buf) { + size_t current = 0; + + // deserialize the header + memcpy(&_header, buf, sizeof(record_header)); + current += sizeof(record_header); + + // deserialize the left key if necessary + if (!_header.left_is_infinite()) { + // point the left DBT's buffer into ours + toku_fill_dbt(&_left_key, buf + current, _header.left_key_size); + current += _header.left_key_size; + } + + // deserialize the right key if necessary + if (!_header.right_is_infinite()) { + if (_header.right_key_size == 0) { + toku_copyref_dbt(&_right_key, _left_key); + } else { + toku_fill_dbt(&_right_key, buf + current, _header.right_key_size); + } + } + } + + toku::range_buffer::iterator::iterator() : + _ma_chunk_iterator(nullptr), + _current_chunk_base(nullptr), + _current_chunk_offset(0), _current_chunk_max(0), + _current_rec_size(0) { + } + + toku::range_buffer::iterator::iterator(const range_buffer *buffer) : + _ma_chunk_iterator(&buffer->_arena), + _current_chunk_base(nullptr), + _current_chunk_offset(0), _current_chunk_max(0), + _current_rec_size(0) { + reset_current_chunk(); + } + + void range_buffer::iterator::reset_current_chunk() { + _current_chunk_base = _ma_chunk_iterator.current(&_current_chunk_max); + _current_chunk_offset = 0; + } + + bool range_buffer::iterator::current(record *rec) { + if (_current_chunk_offset < _current_chunk_max) { + const char *buf = reinterpret_cast(_current_chunk_base); + rec->deserialize(buf + _current_chunk_offset); + _current_rec_size = rec->size(); + return true; + } else { + return false; + } + } + + // move the iterator to the next record in the buffer + void range_buffer::iterator::next(void) { + invariant(_current_chunk_offset < _current_chunk_max); + invariant(_current_rec_size > 0); + + // the next record is _current_rec_size bytes forward + _current_chunk_offset += _current_rec_size; + // now, we don't know how big the current is, set it to 0. + _current_rec_size = 0; + + if (_current_chunk_offset >= _current_chunk_max) { + // current chunk is exhausted, try moving to the next one + if (_ma_chunk_iterator.more()) { + _ma_chunk_iterator.next(); + reset_current_chunk(); + } + } + } + + void range_buffer::create(void) { + // allocate buffer space lazily instead of on creation. this way, + // no malloc/free is done if the transaction ends up taking no locks. + _arena.create(0); + _num_ranges = 0; + } + + void range_buffer::append(const DBT *left_key, const DBT *right_key) { + // if the keys are equal, then only one copy is stored. + if (toku_dbt_equals(left_key, right_key)) { + invariant(left_key->size <= MAX_KEY_SIZE); + append_point(left_key); } else { - toku_fill_dbt(&m_right_key, buf + current, m_header.right_key_size); + invariant(left_key->size <= MAX_KEY_SIZE); + invariant(right_key->size <= MAX_KEY_SIZE); + append_range(left_key, right_key); } + _num_ranges++; + } + + bool range_buffer::is_empty(void) const { + return total_memory_size() == 0; } -} -void range_buffer::iterator::create(const range_buffer *buffer) { - m_buffer = buffer; - m_current_offset = 0; - m_current_size = 0; -} - -bool range_buffer::iterator::current(record *rec) { - if (m_current_offset < m_buffer->m_buf_current) { - rec->deserialize(m_buffer->m_buf + m_current_offset); - m_current_size = rec->size(); - return true; - } else { - return false; - } -} - -// move the iterator to the next record in the buffer -void range_buffer::iterator::next(void) { - invariant(m_current_offset < m_buffer->m_buf_current); - invariant(m_current_size > 0); - - // the next record is m_current_size bytes forward - // now, we don't know how big the current is, set it to 0. - m_current_offset += m_current_size; - m_current_size = 0; -} - -void range_buffer::create(void) { - // allocate buffer space lazily instead of on creation. this way, - // no malloc/free is done if the transaction ends up taking no locks. - m_buf = nullptr; - m_buf_size = 0; - m_buf_current = 0; - m_num_ranges = 0; -} - -void range_buffer::append(const DBT *left_key, const DBT *right_key) { - // if the keys are equal, then only one copy is stored. - if (toku_dbt_equals(left_key, right_key)) { - append_point(left_key); - } else { - append_range(left_key, right_key); - } - m_num_ranges++; -} - -bool range_buffer::is_empty(void) const { - return m_buf == nullptr; -} - -uint64_t range_buffer::get_num_bytes(void) const { - return m_buf_current; -} - -int range_buffer::get_num_ranges(void) const { - return m_num_ranges; -} - -void range_buffer::destroy(void) { - if (m_buf) { - toku_free(m_buf); - } -} - -void range_buffer::append_range(const DBT *left_key, const DBT *right_key) { - maybe_grow(sizeof(record_header) + left_key->size + right_key->size); - - record_header h; - h.init(left_key, right_key); - - // serialize the header - memcpy(m_buf + m_buf_current, &h, sizeof(record_header)); - m_buf_current += sizeof(record_header); - - // serialize the left key if necessary - if (!h.left_is_infinite()) { - memcpy(m_buf + m_buf_current, left_key->data, left_key->size); - m_buf_current += left_key->size; - } - - // serialize the right key if necessary - if (!h.right_is_infinite()) { - memcpy(m_buf + m_buf_current, right_key->data, right_key->size); - m_buf_current += right_key->size; - } -} - -void range_buffer::append_point(const DBT *key) { - maybe_grow(sizeof(record_header) + key->size); - - record_header h; - h.init(key, nullptr); - - // serialize the header - memcpy(m_buf + m_buf_current, &h, sizeof(record_header)); - m_buf_current += sizeof(record_header); - - // serialize the key if necessary - if (!h.left_is_infinite()) { - memcpy(m_buf + m_buf_current, key->data, key->size); - m_buf_current += key->size; - } -} - -void range_buffer::maybe_grow(size_t size) { - static const size_t initial_size = 4096; - static const size_t aggressive_growth_threshold = 128 * 1024; - const size_t needed = m_buf_current + size; - if (m_buf_size < needed) { - if (m_buf_size == 0) { - m_buf_size = initial_size; - } - // aggressively grow the range buffer to the threshold, - // but only additivately increase the size after that. - while (m_buf_size < needed && m_buf_size < aggressive_growth_threshold) { - m_buf_size <<= 1; - } - while (m_buf_size < needed) { - m_buf_size += aggressive_growth_threshold; - } - XREALLOC(m_buf, m_buf_size); - } -} - -size_t range_buffer::get_initial_size(size_t n) const { - size_t r = 4096; - while (r < n) { - r *= 2; + uint64_t range_buffer::total_memory_size(void) const { + return _arena.total_size_in_use(); + } + + int range_buffer::get_num_ranges(void) const { + return _num_ranges; + } + + void range_buffer::destroy(void) { + _arena.destroy(); + } + + void range_buffer::append_range(const DBT *left_key, const DBT *right_key) { + size_t record_length = sizeof(record_header) + left_key->size + right_key->size; + char *buf = reinterpret_cast(_arena.malloc_from_arena(record_length)); + + record_header h; + h.init(left_key, right_key); + + // serialize the header + memcpy(buf, &h, sizeof(record_header)); + buf += sizeof(record_header); + + // serialize the left key if necessary + if (!h.left_is_infinite()) { + memcpy(buf, left_key->data, left_key->size); + buf += left_key->size; + } + + // serialize the right key if necessary + if (!h.right_is_infinite()) { + memcpy(buf, right_key->data, right_key->size); + } + } + + void range_buffer::append_point(const DBT *key) { + size_t record_length = sizeof(record_header) + key->size; + char *buf = reinterpret_cast(_arena.malloc_from_arena(record_length)); + + record_header h; + h.init(key, nullptr); + + // serialize the header + memcpy(buf, &h, sizeof(record_header)); + buf += sizeof(record_header); + + // serialize the key if necessary + if (!h.left_is_infinite()) { + memcpy(buf, key->data, key->size); + } } - return r; -} } /* namespace toku */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/range_buffer.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/range_buffer.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/range_buffer.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/range_buffer.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,136 +86,126 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef RANGE_BUFFER_H -#define RANGE_BUFFER_H - -#include +#include "portability/toku_stdint.h" -#include +#include "util/dbt.h" +#include "util/memarena.h" namespace toku { -// a key range buffer represents a set of key ranges that can -// be stored, iterated over, and then destroyed all at once. - -class range_buffer { -// Private in spirit: We fail POD asserts when we try to store range_buffers in an omt. -// So make it all public, but don't touch. -public: -//private: - - // the key range buffer is a bunch of records in a row. - // each record has the following header, followed by the - // left key and right key data payload, if applicable. - - struct record_header { - bool left_neg_inf; - bool left_pos_inf; - bool right_pos_inf; - bool right_neg_inf; - uint32_t left_key_size; - uint32_t right_key_size; - - bool left_is_infinite(void) const; + // a key range buffer represents a set of key ranges that can + // be stored, iterated over, and then destroyed all at once. + class range_buffer { + private: - bool right_is_infinite(void) const; + // the key range buffer is a bunch of records in a row. + // each record has the following header, followed by the + // left key and right key data payload, if applicable. + // we limit keys to be 2^16, since we store lengths as 2 bytes. + static const size_t MAX_KEY_SIZE = 1 << 16; + + struct record_header { + bool left_neg_inf; + bool left_pos_inf; + bool right_pos_inf; + bool right_neg_inf; + uint16_t left_key_size; + uint16_t right_key_size; - void init(const DBT *left_key, const DBT *right_key); - }; - static_assert(sizeof(record_header) == 12, "record header format is off"); - -public: + bool left_is_infinite(void) const; - // the iterator abstracts reading over a buffer of variable length - // records one by one until there are no more left. + bool right_is_infinite(void) const; - class iterator { + void init(const DBT *left_key, const DBT *right_key); + }; + static_assert(sizeof(record_header) == 8, "record header format is off"); + public: - // a record represents the user-view of a serialized key range. - // it handles positive and negative infinity and the optimized - // point range case, where left and right points share memory. - - class record { + // the iterator abstracts reading over a buffer of variable length + // records one by one until there are no more left. + class iterator { public: - // get a read-only pointer to the left key of this record's range - const DBT *get_left_key(void) const; - - // get a read-only pointer to the right key of this record's range - const DBT *get_right_key(void) const; + iterator(); + iterator(const range_buffer *buffer); - // how big is this record? this tells us where the next record is - size_t size(void) const; + // a record represents the user-view of a serialized key range. + // it handles positive and negative infinity and the optimized + // point range case, where left and right points share memory. + class record { + public: + // get a read-only pointer to the left key of this record's range + const DBT *get_left_key(void) const; + + // get a read-only pointer to the right key of this record's range + const DBT *get_right_key(void) const; + + // how big is this record? this tells us where the next record is + size_t size(void) const; + + // populate a record header and point our DBT's + // buffers into ours if they are not infinite. + void deserialize(const char *buf); + + private: + record_header _header; + DBT _left_key; + DBT _right_key; + }; + + // populate the given record object with the current + // the memory referred to by record is valid for only + // as long as the record exists. + bool current(record *rec); - // populate a record header and point our DBT's - // buffers into ours if they are not infinite. - void deserialize(const char *buf); + // move the iterator to the next record in the buffer + void next(void); private: - record_header m_header; - DBT m_left_key; - DBT m_right_key; - }; - - void create(const range_buffer *buffer); + void reset_current_chunk(); - // populate the given record object with the current - // the memory referred to by record is valid for only - // as long as the record exists. - bool current(record *rec); - - // move the iterator to the next record in the buffer - void next(void); - - private: - // the key range buffer we are iterating over, the current - // offset in that buffer, and the size of the current record. - const range_buffer *m_buffer; - size_t m_current_offset; - size_t m_current_size; - }; - - // allocate buffer space lazily instead of on creation. this way, - // no malloc/free is done if the transaction ends up taking no locks. - void create(void); - - // append a left/right key range to the buffer. - // if the keys are equal, then only one copy is stored. - void append(const DBT *left_key, const DBT *right_key); + // the key range buffer we are iterating over, the current + // offset in that buffer, and the size of the current record. + memarena::chunk_iterator _ma_chunk_iterator; + const void *_current_chunk_base; + size_t _current_chunk_offset; + size_t _current_chunk_max; + size_t _current_rec_size; + }; - // is this range buffer empty? - bool is_empty(void) const; + // allocate buffer space lazily instead of on creation. this way, + // no malloc/free is done if the transaction ends up taking no locks. + void create(void); - // how many bytes are stored in this range buffer? - uint64_t get_num_bytes(void) const; + // append a left/right key range to the buffer. + // if the keys are equal, then only one copy is stored. + void append(const DBT *left_key, const DBT *right_key); - // how many ranges are stored in this range buffer? - int get_num_ranges(void) const; + // is this range buffer empty? + bool is_empty(void) const; - void destroy(void); + // how much memory is being used by this range buffer? + uint64_t total_memory_size(void) const; -//private: - char *m_buf; - size_t m_buf_size; - size_t m_buf_current; - int m_num_ranges; + // how many ranges are stored in this range buffer? + int get_num_ranges(void) const; - void append_range(const DBT *left_key, const DBT *right_key); + void destroy(void); - // append a point to the buffer. this is the space/time saving - // optimization for key ranges where left == right. - void append_point(const DBT *key); + private: + memarena _arena; + int _num_ranges; - void maybe_grow(size_t size); + void append_range(const DBT *left_key, const DBT *right_key); - // the initial size of the buffer is the next power of 2 - // greater than the first entry we insert into the buffer. - size_t get_initial_size(size_t n) const; -}; + // append a point to the buffer. this is the space/time saving + // optimization for key ranges where left == right. + void append_point(const DBT *key); + }; } /* namespace toku */ - -#endif /* RANGE_BUFFER_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_create_destroy.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_create_destroy.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_create_destroy.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_create_destroy.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_acquire_release.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_acquire_release.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_acquire_release.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_acquire_release.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -126,19 +126,19 @@ // if the subtree root does not overlap then one of its children // must exist and have an overlapping range. - if (!lkr.m_subtree->m_range.overlaps(&cmp, range)) { + if (!lkr.m_subtree->m_range.overlaps(cmp, range)) { treenode *left = lkr.m_subtree->m_left_child.ptr; treenode *right = lkr.m_subtree->m_right_child.ptr; if (left != nullptr) { // left exists, so if it does not overlap then the right must - if (!left->m_range.overlaps(&cmp, range)) { + if (!left->m_range.overlaps(cmp, range)) { invariant_notnull(right); - invariant(right->m_range.overlaps(&cmp, range)); + invariant(right->m_range.overlaps(cmp, range)); } } else { // no left child, so the right must exist and be overlapping invariant_notnull(right); - invariant(right->m_range.overlaps(&cmp, range)); + invariant(right->m_range.overlaps(cmp, range)); } } @@ -160,6 +160,8 @@ lkr.release(); tree.destroy(); } + + cmp.destroy(); } } /* namespace toku */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_remove.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_remove.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_remove.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_remove.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,17 +117,17 @@ } static uint64_t check_for_range_and_count(concurrent_tree::locked_keyrange *lkr, - comparator *cmp, const keyrange &range, bool range_should_exist) { + const comparator &cmp, const keyrange &range, bool range_should_exist) { struct check_fn_obj { - comparator *cmp; + const comparator *cmp; uint64_t count; keyrange target_range; bool target_range_found; bool fn(const keyrange &query_range, TXNID txnid) { (void) txnid; - if (query_range.compare(cmp, target_range) == keyrange::comparison::EQUALS) { + if (query_range.compare(*cmp, target_range) == keyrange::comparison::EQUALS) { invariant(!target_range_found); target_range_found = true; } @@ -135,7 +135,7 @@ return true; } } check_fn; - check_fn.cmp = cmp; + check_fn.cmp = &cmp; check_fn.count = 0; check_fn.target_range = range; check_fn.target_range_found = false; @@ -174,14 +174,14 @@ // insert an element. it should exist and the // count should be correct. lkr.insert(range, i); - n = check_for_range_and_count(&lkr, &cmp, range, true); + n = check_for_range_and_count(&lkr, cmp, range, true); if (i >= cap) { invariant(n == cap + 1); // remove an element previously inserted. it should // no longer exist and the count should be correct. range.create(get_ith_key_from_set(i - cap), get_ith_key_from_set(i - cap)); lkr.remove(range); - n = check_for_range_and_count(&lkr, &cmp, range, false); + n = check_for_range_and_count(&lkr, cmp, range, false); invariant(n == cap); } else { invariant(n == i + 1); @@ -193,12 +193,13 @@ keyrange range; range.create(get_ith_key_from_set(num_keys - i - 1), get_ith_key_from_set(num_keys - i - 1)); lkr.remove(range); - n = check_for_range_and_count(&lkr, &cmp, range, false); + n = check_for_range_and_count(&lkr, cmp, range, false); invariant(n == (cap - i - 1)); } lkr.release(); tree.destroy(); + cmp.destroy(); } } /* namespace toku */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -136,6 +136,7 @@ lkr.release(); tree.destroy(); + cmp.destroy(); } } /* namespace toku */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_remove_all.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_remove_all.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_remove_all.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_remove_all.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -132,6 +132,8 @@ lkr.release(); tree.destroy(); } + + cmp.destroy(); } } /* namespace toku */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_unit_test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_unit_test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/concurrent_tree_unit_test.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/concurrent_tree_unit_test.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,6 +86,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -120,7 +120,7 @@ locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, compare_dbts); + lt.create(nullptr, dict_id, dbt_comparator); TXNID txnid_a = 1001; lock_request request_a; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,7 +117,7 @@ locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, compare_dbts); + lt.create(nullptr, dict_id, dbt_comparator); TXNID txnid_a = 1001; lock_request request_a; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -102,7 +102,7 @@ const uint64_t lock_wait_time = 10; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, compare_dbts); + lt.create(nullptr, dict_id, dbt_comparator); TXNID txnid_a = 1001; TXNID txnid_b = 2001; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -101,7 +101,7 @@ lock_request request; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, compare_dbts); + lt.create(nullptr, dict_id, dbt_comparator); TXNID txnid_a = 1001; TXNID txnid_b = 2001; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_LOCK_REQUEST_UNIT_TEST_H -#define TOKU_LOCK_REQUEST_UNIT_TEST_H - #include "test.h" #include "locktree_unit_test.h" @@ -132,5 +131,3 @@ }; } - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -101,7 +101,7 @@ locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, compare_dbts); + lt.create(nullptr, dict_id, dbt_comparator); TXNID txnid_a = 1001; lock_request request_a; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -108,7 +108,7 @@ locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, compare_dbts); + lt.create(nullptr, dict_id, dbt_comparator); int r; TXNID txnid_a = 1001; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_create_destroy.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_create_destroy.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_create_destroy.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_create_destroy.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,7 +98,7 @@ locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, compare_dbts); + lt.create(nullptr, dict_id, dbt_comparator); lt_lock_request_info *info = lt.get_lock_request_info(); invariant_notnull(info); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -247,11 +247,11 @@ locktree *big_lt[n_big]; for (int i = 0; i < n_big; i++) { dict_id = { next_dict_id }; next_dict_id++; - big_lt[i] = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + big_lt[i] = mgr.get_lt(dict_id, dbt_comparator, nullptr); } dict_id = { next_dict_id }; next_dict_id++; - locktree *small_lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + locktree *small_lt = mgr.get_lt(dict_id, dbt_comparator, nullptr); // create the worker threads struct big_arg big_arg = { &mgr, big_lt, n_big, 1000 }; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -210,13 +210,10 @@ mgr.set_max_lock_memory(max_lock_memory); // create lock trees - DESCRIPTOR desc[n_lt]; - DICTIONARY_ID dict_id[n_lt]; locktree *lt[n_big]; for (int i = 0; i < n_lt; i++) { - desc[i] = nullptr; - dict_id[i] = { (uint64_t)i }; - lt[i] = mgr.get_lt(dict_id[i], desc[i], compare_dbts, nullptr); + DICTIONARY_ID dict_id = { .dictid = (uint64_t) i }; + lt[i] = mgr.get_lt(dict_id, dbt_comparator, nullptr); assert(lt[i]); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -210,13 +210,10 @@ mgr.set_max_lock_memory(max_lock_memory); // create lock trees - DESCRIPTOR desc[n_lt]; - DICTIONARY_ID dict_id[n_lt]; locktree *lt[n_big]; for (int i = 0; i < n_lt; i++) { - desc[i] = nullptr; - dict_id[i] = { (uint64_t)i }; - lt[i] = mgr.get_lt(dict_id[i], desc[i], compare_dbts, nullptr); + DICTIONARY_ID dict_id = { .dictid = (uint64_t)i }; + lt[i] = mgr.get_lt(dict_id, dbt_comparator, nullptr); assert(lt[i]); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -167,9 +167,8 @@ const TXNID txn_b = 100; // create lock trees - DESCRIPTOR desc = nullptr; - DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); + DICTIONARY_ID dict_id = { .dictid = 1 }; + locktree *lt = mgr.get_lt(dict_id, dbt_comparator, nullptr); int64_t last_i = -1; for (int64_t i = 0; ; i++) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -228,13 +228,11 @@ mgr.set_max_lock_memory(max_lock_memory); // create lock trees - DESCRIPTOR desc_0 = nullptr; - DICTIONARY_ID dict_id_0 = { 1 }; - locktree *lt_0 = mgr.get_lt(dict_id_0, desc_0, compare_dbts, nullptr); - - DESCRIPTOR desc_1 = nullptr; - DICTIONARY_ID dict_id_1 = { 2 }; - locktree *lt_1 = mgr.get_lt(dict_id_1, desc_1, compare_dbts, nullptr); + DICTIONARY_ID dict_id_0 = { .dictid = 1 }; + locktree *lt_0 = mgr.get_lt(dict_id_0, dbt_comparator, nullptr); + + DICTIONARY_ID dict_id_1 = { .dictid = 2 }; + locktree *lt_1 = mgr.get_lt(dict_id_1, dbt_comparator, nullptr); // create the worker threads struct arg big_arg = { &mgr, lt_0, 1000 }; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,7 +98,7 @@ locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, compare_dbts); + lt.create(nullptr, dict_id, dbt_comparator); int r; TXNID txnid_a = 1001; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_misc.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_misc.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_misc.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_misc.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -109,7 +109,9 @@ void locktree_unit_test::test_misc(void) { locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, my_compare_dbts); + toku::comparator my_dbt_comparator; + my_dbt_comparator.create(my_compare_dbts, nullptr); + lt.create(nullptr, dict_id, my_dbt_comparator); invariant(lt.get_userdata() == nullptr); int userdata; @@ -124,19 +126,27 @@ expected_a = &dbt_a; expected_b = &dbt_b; + toku::comparator cmp_d1, cmp_d2; + cmp_d1.create(my_compare_dbts, &d1); + cmp_d2.create(my_compare_dbts, &d2); + // make sure the comparator object has the correct // descriptor when we set the locktree's descriptor - lt.set_descriptor(&d1); + lt.set_comparator(cmp_d1); expected_descriptor = &d1; - r = lt.m_cmp->compare(&dbt_a, &dbt_b); + r = lt.m_cmp(&dbt_a, &dbt_b); invariant(r == expected_comparison_magic); - lt.set_descriptor(&d2); + lt.set_comparator(cmp_d2); expected_descriptor = &d2; - r = lt.m_cmp->compare(&dbt_a, &dbt_b); + r = lt.m_cmp(&dbt_a, &dbt_b); invariant(r == expected_comparison_magic); lt.release_reference(); lt.destroy(); + + cmp_d1.destroy(); + cmp_d2.destroy(); + my_dbt_comparator.destroy(); } } /* namespace toku */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -101,7 +101,7 @@ locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, compare_dbts); + lt.create(nullptr, dict_id, dbt_comparator); const DBT *zero = get_dbt(0); const DBT *one = get_dbt(1); @@ -143,7 +143,7 @@ bool saw_the_other; TXNID expected_txnid; keyrange *expected_range; - comparator *cmp; + const comparator *cmp; bool fn(const keyrange &range, TXNID txnid) { if (txnid == the_other_txnid) { invariant(!saw_the_other); @@ -151,12 +151,12 @@ return true; } invariant(txnid == expected_txnid); - keyrange::comparison c = range.compare(cmp, *expected_range); + keyrange::comparison c = range.compare(*cmp, *expected_range); invariant(c == keyrange::comparison::EQUALS); return true; } } verify_fn; - verify_fn.cmp = lt.m_cmp; + verify_fn.cmp = <.m_cmp; #define do_verify() \ do { verify_fn.saw_the_other = false; locktree_iterate(<, &verify_fn); } while (0) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,8 +98,8 @@ locktree_manager mgr; mgr.create(nullptr, nullptr, nullptr, nullptr); - DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + DICTIONARY_ID dict_id = { .dictid = 1 }; + locktree *lt = mgr.get_lt(dict_id, dbt_comparator, nullptr); int r; TXNID txnid_a = 1001; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -101,7 +101,7 @@ locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt.create(nullptr, dict_id, nullptr, compare_dbts); + lt.create(nullptr, dict_id, dbt_comparator); const DBT *zero = get_dbt(0); const DBT *one = get_dbt(1); @@ -149,15 +149,15 @@ struct verify_fn_obj { TXNID expected_txnid; keyrange *expected_range; - comparator *cmp; + const comparator *cmp; bool fn(const keyrange &range, TXNID txnid) { invariant(txnid == expected_txnid); - keyrange::comparison c = range.compare(cmp, *expected_range); + keyrange::comparison c = range.compare(*cmp, *expected_range); invariant(c == keyrange::comparison::EQUALS); return true; } } verify_fn; - verify_fn.cmp = lt.m_cmp; + verify_fn.cmp = <.m_cmp; keyrange range; range.create(one, one); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_unit_test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_unit_test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/locktree_unit_test.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/locktree_unit_test.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_LOCKTREE_UNIT_TEST_H -#define TOKU_LOCKTREE_UNIT_TEST_H - #include "test.h" #include "locktree.h" @@ -157,5 +156,3 @@ }; } /* namespace toku */ - -#endif /* TOKU_LOCKTREE_UNIT_TEST_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_locktree_map.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_locktree_map.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_locktree_map.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_locktree_map.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_params.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_params.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_params.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_params.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_reference_release_lt.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_reference_release_lt.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_reference_release_lt.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_reference_release_lt.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -107,9 +107,15 @@ (*k) = false; } +static int my_cmp(DB *UU(db), const DBT *UU(a), const DBT *UU(b)) { + return 0; +} + void manager_unit_test::test_reference_release_lt(void) { locktree_manager mgr; mgr.create(create_cb, destroy_cb, nullptr, nullptr); + toku::comparator my_comparator; + my_comparator.create(my_cmp, nullptr); DICTIONARY_ID a = { 0 }; DICTIONARY_ID b = { 1 }; @@ -117,18 +123,12 @@ bool aok = false; bool bok = false; bool cok = false; - - int d = 5; - DESCRIPTOR_S desc_s; - desc_s.dbt.data = &d; - desc_s.dbt.size = desc_s.dbt.ulen = sizeof(d); - desc_s.dbt.flags = DB_DBT_USERMEM; - locktree *alt = mgr.get_lt(a, &desc_s, nullptr, &aok); + locktree *alt = mgr.get_lt(a, my_comparator, &aok); invariant_notnull(alt); - locktree *blt = mgr.get_lt(b, &desc_s, nullptr, &bok); + locktree *blt = mgr.get_lt(b, my_comparator, &bok); invariant_notnull(alt); - locktree *clt = mgr.get_lt(c, &desc_s, nullptr, &cok); + locktree *clt = mgr.get_lt(c, my_comparator, &cok); invariant_notnull(alt); // three distinct locktrees should have been returned @@ -152,9 +152,9 @@ // get another handle on a and b, they shoudl be the same // as the original alt and blt - locktree *blt2 = mgr.get_lt(b, &desc_s, nullptr, &bok); + locktree *blt2 = mgr.get_lt(b, my_comparator, &bok); invariant(blt2 == blt); - locktree *alt2 = mgr.get_lt(a, &desc_s, nullptr, &aok); + locktree *alt2 = mgr.get_lt(a, my_comparator, &aok); invariant(alt2 == alt); // remove one ref from everything. c should die. a and b are ok. @@ -171,6 +171,7 @@ invariant(!aok); invariant(!bok); + my_comparator.destroy(); mgr.destroy(); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_status.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_status.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_status.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_status.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -120,9 +120,8 @@ assert_status(&status, "LTM_WAIT_COUNT", 0); assert_status(&status, "LTM_TIMEOUT_COUNT", 0); - DESCRIPTOR desc = nullptr; - DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); + DICTIONARY_ID dict_id = { .dictid = 1 }; + locktree *lt = mgr.get_lt(dict_id, dbt_comparator, nullptr); int r; TXNID txnid_a = 1001; TXNID txnid_b = 2001; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_unit_test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_unit_test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/manager_unit_test.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/manager_unit_test.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_MANAGER_TEST_H -#define TOKU_MANAGER_TEST_H - #include #include @@ -111,5 +110,3 @@ }; } /* namespace toku */ - -#endif /* TOKU_MANAGER_TEST_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/range_buffer_test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/range_buffer_test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/range_buffer_test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/range_buffer_test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -121,9 +121,8 @@ } size_t i = 0; - range_buffer::iterator iter; + range_buffer::iterator iter(&buffer); range_buffer::iterator::record rec; - iter.create(&buffer); while (iter.current(&rec)) { const DBT *expected_point = get_dbt_by_iteration(i); invariant(compare_dbts(nullptr, expected_point, rec.get_left_key()) == 0); @@ -151,9 +150,8 @@ } size_t i = 0; - range_buffer::iterator iter; + range_buffer::iterator iter(&buffer); range_buffer::iterator::record rec; - iter.create(&buffer); while (iter.current(&rec)) { const DBT *expected_left = get_dbt_by_iteration(i); const DBT *expected_right = get_dbt_by_iteration(i + 1); @@ -187,9 +185,8 @@ } size_t i = 0; - range_buffer::iterator iter; + range_buffer::iterator iter(&buffer); range_buffer::iterator::record rec; - iter.create(&buffer); while (iter.current(&rec)) { const DBT *expected_left = get_dbt_by_iteration(i); const DBT *expected_right = get_dbt_by_iteration(i + 1); @@ -232,10 +229,10 @@ // Append a small dbt, the buf should be able to fit it. buffer.append(&small_dbt, &small_dbt); - invariant(buffer.m_buf_size >= small_dbt.size); + invariant(buffer.total_memory_size() >= small_dbt.size); // Append a large dbt, the buf should be able to fit it. buffer.append(&large_dbt, &large_dbt); - invariant(buffer.m_buf_size >= (small_dbt.size + large_dbt.size)); + invariant(buffer.total_memory_size() >= (small_dbt.size + large_dbt.size)); toku_free(small_buf); toku_free(large_buf); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/test.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/test.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,79 +86,90 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_TEST_H -#define TOKU_TEST_H - -#include #include +#include "ft/comparator.h" +#include "util/dbt.h" + namespace toku { -__attribute__((__unused__)) -static DBT min_dbt(void) { - static int64_t min = INT_MIN; - DBT dbt; - toku_fill_dbt(&dbt, &min, sizeof(int64_t)); - dbt.flags = DB_DBT_USERMEM; - return dbt; -} - -__attribute__((__unused__)) -static DBT max_dbt(void) { - static int64_t max = INT_MAX; - DBT dbt; - toku_fill_dbt(&dbt, &max, sizeof(int64_t)); - dbt.flags = DB_DBT_USERMEM; - return dbt; -} - -__attribute__((__unused__)) -static const DBT *get_dbt(int64_t key) { - static const int NUM_DBTS = 1000; - static bool initialized; - static int64_t static_ints[NUM_DBTS]; - static DBT static_dbts[NUM_DBTS]; - invariant(key < NUM_DBTS); - if (!initialized) { - for (int i = 0; i < NUM_DBTS; i++) { - static_ints[i] = i; - toku_fill_dbt(&static_dbts[i], - &static_ints[i], - sizeof(int64_t)); - static_dbts[i].flags = DB_DBT_USERMEM; + __attribute__((__unused__)) + static DBT min_dbt(void) { + static int64_t min = INT_MIN; + DBT dbt; + toku_fill_dbt(&dbt, &min, sizeof(int64_t)); + dbt.flags = DB_DBT_USERMEM; + return dbt; + } + + __attribute__((__unused__)) + static DBT max_dbt(void) { + static int64_t max = INT_MAX; + DBT dbt; + toku_fill_dbt(&dbt, &max, sizeof(int64_t)); + dbt.flags = DB_DBT_USERMEM; + return dbt; + } + + __attribute__((__unused__)) + static const DBT *get_dbt(int64_t key) { + static const int NUM_DBTS = 1000; + static bool initialized; + static int64_t static_ints[NUM_DBTS]; + static DBT static_dbts[NUM_DBTS]; + invariant(key < NUM_DBTS); + if (!initialized) { + for (int i = 0; i < NUM_DBTS; i++) { + static_ints[i] = i; + toku_fill_dbt(&static_dbts[i], + &static_ints[i], + sizeof(int64_t)); + static_dbts[i].flags = DB_DBT_USERMEM; + } + initialized = true; } - initialized = true; + + invariant(key < NUM_DBTS); + return &static_dbts[key]; } - invariant(key < NUM_DBTS); - return &static_dbts[key]; -} - -__attribute__((__unused__)) -static int compare_dbts(DB *db, const DBT *key1, const DBT *key2) { - (void) db; - - // this emulates what a "infinity-aware" comparator object does - if (toku_dbt_is_infinite(key1) || toku_dbt_is_infinite(key2)) { - return toku_dbt_infinite_compare(key1, key2); - } else { - invariant(key1->size == sizeof(int64_t)); - invariant(key2->size == sizeof(int64_t)); - int64_t a = *(int64_t*) key1->data; - int64_t b = *(int64_t*) key2->data; - if (a < b) { - return -1; - } else if (a == b) { - return 0; + __attribute__((__unused__)) + static int compare_dbts(DB *db, const DBT *key1, const DBT *key2) { + (void) db; + + // this emulates what a "infinity-aware" comparator object does + if (toku_dbt_is_infinite(key1) || toku_dbt_is_infinite(key2)) { + return toku_dbt_infinite_compare(key1, key2); } else { - return 1; + invariant(key1->size == sizeof(int64_t)); + invariant(key2->size == sizeof(int64_t)); + int64_t a = *(int64_t*) key1->data; + int64_t b = *(int64_t*) key2->data; + if (a < b) { + return -1; + } else if (a == b) { + return 0; + } else { + return 1; + } } } -} -} /* namespace toku */ + __attribute__((__unused__)) comparator dbt_comparator; + + __attribute__((__constructor__)) + static void construct_dbt_comparator(void) { + dbt_comparator.create(compare_dbts, nullptr); + } -#endif + __attribute__((__destructor__)) + static void destruct_dbt_comparator(void) { + dbt_comparator.destroy(); + } + +} /* namespace toku */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/txnid_set_test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/txnid_set_test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/txnid_set_test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/txnid_set_test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/wfg_test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/wfg_test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/tests/wfg_test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/tests/wfg_test.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/treenode.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/treenode.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/treenode.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/treenode.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -99,7 +99,7 @@ toku_mutex_unlock(&m_mutex); } -void treenode::init(comparator *cmp) { +void treenode::init(const comparator *cmp) { m_txnid = TXNID_NONE; m_is_root = false; m_is_empty = true; @@ -117,7 +117,7 @@ m_right_child.set(nullptr); } -void treenode::create_root(comparator *cmp) { +void treenode::create_root(const comparator *cmp) { init(cmp); m_is_root = true; } @@ -145,10 +145,10 @@ } bool treenode::range_overlaps(const keyrange &range) { - return m_range.overlaps(m_cmp, range); + return m_range.overlaps(*m_cmp, range); } -treenode *treenode::alloc(comparator *cmp, const keyrange &range, TXNID txnid) { +treenode *treenode::alloc(const comparator *cmp, const keyrange &range, TXNID txnid) { treenode *XCALLOC(node); node->init(cmp); node->set_range_and_txnid(range, txnid); @@ -190,7 +190,7 @@ // determine which child to look at based on a comparison. if we were // given a comparison hint, use that. otherwise, compare them now. - keyrange::comparison c = cmp_hint ? *cmp_hint : range.compare(m_cmp, m_range); + keyrange::comparison c = cmp_hint ? *cmp_hint : range.compare(*m_cmp, m_range); treenode *child; if (c == keyrange::comparison::LESS_THAN) { @@ -209,7 +209,7 @@ if (child == nullptr) { return this; } else { - c = range.compare(m_cmp, child->m_range); + c = range.compare(*m_cmp, child->m_range); if (c == keyrange::comparison::EQUALS || c == keyrange::comparison::OVERLAPS) { child->mutex_unlock(); return this; @@ -225,7 +225,7 @@ template void treenode::traverse_overlaps(const keyrange &range, F *function) { - keyrange::comparison c = range.compare(m_cmp, m_range); + keyrange::comparison c = range.compare(*m_cmp, m_range); if (c == keyrange::comparison::EQUALS) { // Doesn't matter if fn wants to keep going, there // is nothing left, so return. @@ -264,7 +264,7 @@ void treenode::insert(const keyrange &range, TXNID txnid) { // choose a child to check. if that child is null, then insert the new node there. // otherwise recur down that child's subtree - keyrange::comparison c = range.compare(m_cmp, m_range); + keyrange::comparison c = range.compare(*m_cmp, m_range); if (c == keyrange::comparison::LESS_THAN) { treenode *left_child = lock_and_rebalance_left(); if (left_child == nullptr) { @@ -382,7 +382,7 @@ // if the range is equal to this node's range, then just remove // the root of this subtree. otherwise search down the tree // in either the left or right children. - keyrange::comparison c = range.compare(m_cmp, m_range); + keyrange::comparison c = range.compare(*m_cmp, m_range); switch (c) { case keyrange::comparison::EQUALS: return remove_root_of_subtree(); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/treenode.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/treenode.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/treenode.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/treenode.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,20 +86,19 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TREENODE_H -#define TREENODE_H - -#include #include -#include +#include "portability/memory.h" +#include "portability/toku_pthread.h" -#include - -#include "keyrange.h" +#include "ft/comparator.h" +#include "ft/txn/txn.h" +#include "locktree/keyrange.h" namespace toku { @@ -124,7 +123,7 @@ // - node may be unlocked if no other thread has visibility // effect: create the root node - void create_root(comparator *cmp); + void create_root(const comparator *cmp); // effect: destroys the root node void destroy_root(void); @@ -211,7 +210,7 @@ child_ptr m_right_child; // comparator for ranges - comparator *m_cmp; + const comparator *m_cmp; // marked for the root node. the root node is never free()'d // when removed, but instead marked as empty. @@ -221,7 +220,7 @@ bool m_is_empty; // effect: initializes an empty node with the given comparator - void init(comparator *cmp); + void init(const comparator *cmp); // requires: *parent is initialized to something meaningful. // requires: subtree is non-empty @@ -268,7 +267,7 @@ treenode *maybe_rebalance(void); // returns: allocated treenode populated with a copy of the range and txnid - static treenode *alloc(comparator *cmp, const keyrange &range, TXNID txnid); + static treenode *alloc(const comparator *cmp, const keyrange &range, TXNID txnid); // requires: node is a locked root node, or an unlocked non-root node static void free(treenode *node); @@ -283,5 +282,3 @@ #include "treenode.cc" } /* namespace toku */ - -#endif /* TREENODE_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/txnid_set.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/txnid_set.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/txnid_set.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/txnid_set.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/txnid_set.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/txnid_set.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/txnid_set.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/txnid_set.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,15 +86,14 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_TXNID_SET_H -#define TOKU_TXNID_SET_H - -#include +#include "ft/txn/txn.h" -#include +#include "util/omt.h" namespace toku { @@ -130,5 +129,3 @@ ENSURE_POD(txnid_set); } /* namespace toku */ - -#endif /* TOKU_TXNID_SET_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/wfg.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/wfg.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/wfg.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/wfg.cc 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/wfg.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/wfg.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/locktree/wfg.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/locktree/wfg.h 2014-10-08 13:19:51.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,17 +86,13 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_WFG_H -#define TOKU_WFG_H - -#include - -#include - -#include "txnid_set.h" +#include "locktree/txnid_set.h" +#include "util/omt.h" namespace toku { @@ -159,5 +155,3 @@ ENSURE_POD(wfg); } /* namespace toku */ - -#endif /* TOKU_WFG_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/CMakeLists.txt mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/CMakeLists.txt 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/CMakeLists.txt 2014-10-08 13:19:52.000000000 +0000 @@ -19,7 +19,6 @@ add_library(tokuportability_static_conv STATIC ${tokuportability_srcs}) set_target_properties(tokuportability_static_conv PROPERTIES POSITION_INDEPENDENT_CODE ON) -add_dependencies(tokuportability_static_conv build_jemalloc) set(tokuportability_source_libs tokuportability_static_conv ${LIBJEMALLOC} ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) toku_merge_static_libs(${LIBTOKUPORTABILITY}_static ${LIBTOKUPORTABILITY}_static "${tokuportability_source_libs}") diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/file.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/file.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/file.cc 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/file.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -166,10 +166,10 @@ ssize_t n = readlink(fname, symname, MY_MAX_PATH); if ((int)n == -1) - fprintf(stderr, "%.24s Tokudb No space when writing %" PRIu64 " bytes to fd=%d ", tstr, (uint64_t) len, fd); + fprintf(stderr, "%.24s TokuFT No space when writing %" PRIu64 " bytes to fd=%d ", tstr, (uint64_t) len, fd); else { tstr[n] = 0; // readlink doesn't append a NUL to the end of the buffer. - fprintf(stderr, "%.24s Tokudb No space when writing %" PRIu64 " bytes to %*s ", tstr, (uint64_t) len, (int) n, symname); + fprintf(stderr, "%.24s TokuFT No space when writing %" PRIu64 " bytes to %*s ", tstr, (uint64_t) len, (int) n, symname); } fprintf(stderr, "retry in %d second%s\n", toku_write_enospc_sleep, toku_write_enospc_sleep > 1 ? "s" : ""); fflush(stderr); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/huge_page_detection.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/huge_page_detection.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/huge_page_detection.cc 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/huge_page_detection.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -107,7 +107,7 @@ char *r = fgets(buf, sizeof(buf), f); assert(r != NULL); if (strstr(buf, "[always]")) { - fprintf(stderr,"TokuDB: Transparent huge pages are enabled, according to %s. TokuDB will be disabled. To use TokuDB disable huge pages in your kernel or, for testing, set the environment variable TOKU_HUGE_PAGES_OK to 1\n", fname); + fprintf(stderr, "Transparent huge pages are enabled, according to %s\n", fname); huge_pages_enabled = true; } else { huge_pages_enabled =false; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/memory.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/memory.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/memory.cc 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/memory.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/memory.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/memory.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/memory.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/memory.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,8 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: #ident "$Id$" -#ifndef MEMORY_H -#define MEMORY_H /* COPYING CONDITIONS NOTICE: @@ -32,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +87,13 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include - /* Tokutek memory allocation functions and macros. * These are functions for malloc and free */ @@ -225,5 +224,3 @@ void toku_memory_get_status(LOCAL_MEMORY_STATUS s); size_t toku_memory_footprint(void * p, size_t touched); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/os_malloc.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/os_malloc.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/os_malloc.cc 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/os_malloc.cc 2014-10-08 13:19:52.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/portability.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/portability.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/portability.cc 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/portability.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/rdtsc.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/rdtsc.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/rdtsc.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/rdtsc.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,127 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// read the processor time stamp register - -#if defined __ICC - -#define USE_RDTSC 1 -#define rdtsc _rdtsc - -#elif defined __i386__ - -#define USE_RDTSC 1 - -static inline unsigned long long rdtsc(void) { - unsigned long hi, lo; - __asm__ __volatile__ ("rdtsc\n" - "movl %%edx,%0\n" - "movl %%eax,%1" : "=r"(hi), "=r"(lo) : : "edx", "eax"); - return ((unsigned long long) hi << 32ULL) + (unsigned long long) lo; -} - -#elif defined __x86_64__ - -#define USE_RDTSC 1 - -static inline unsigned long long rdtsc(void) { - unsigned long long r; - __asm__ __volatile__ ("rdtsc\n" - "shl $32,%%rdx\n" - "or %%rdx,%%rax\n" - "movq %%rax,%0" : "=r"(r) : : "edx", "eax", "rdx", "rax"); - return r; -} - -#else - -#define USE_RDTSC 0 - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/rwlock_condvar.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/rwlock_condvar.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/rwlock_condvar.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/rwlock_condvar.h 2014-10-08 13:19:52.000000000 +0000 @@ -33,7 +33,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-active-cpus.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-active-cpus.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-active-cpus.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-active-cpus.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-cache-line-boundary-fails.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-cache-line-boundary-fails.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-cache-line-boundary-fails.cc 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-cache-line-boundary-fails.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-cpu-freq.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-cpu-freq.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-cpu-freq.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-cpu-freq.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-cpu-freq-openlimit17.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-cpu-freq-openlimit17.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-cpu-freq-openlimit17.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-cpu-freq-openlimit17.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-filesystem-sizes.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-filesystem-sizes.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-filesystem-sizes.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-filesystem-sizes.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-flock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-flock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-flock.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-flock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-fsync.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-fsync.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-fsync.cc 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-fsync.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -221,7 +221,6 @@ } } -//sync() does not appear to have an analogue on windows. static void time_sync_fsyncs_many_files(int N, int bytes, int fds[/*N*/]) { if (verbose>1) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-fsync-directory.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-fsync-directory.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-fsync-directory.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-fsync-directory.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-gettime.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-gettime.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-gettime.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-gettime.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-gettimeofday.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-gettimeofday.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-gettimeofday.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-gettimeofday.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-hugepage.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-hugepage.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-hugepage.cc 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-hugepage.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-max-data.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-max-data.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-max-data.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-max-data.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-memory-status.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-memory-status.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-memory-status.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-memory-status.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-pagesize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-pagesize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-pagesize.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-pagesize.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rdlock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rdlock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rdlock.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rdlock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rwr.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rwr.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rwr.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rwr.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-pwrite4g.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-pwrite4g.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-pwrite4g.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-pwrite4g.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-snprintf.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-snprintf.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-snprintf.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-snprintf.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-stat.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-stat.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-stat.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-stat.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-toku-malloc.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-toku-malloc.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-toku-malloc.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-toku-malloc.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-xid.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-xid.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/test-xid.cc 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/test-xid.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/try-assert0.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/try-assert0.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/try-assert0.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/try-assert0.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/try-assert-zero.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/try-assert-zero.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/try-assert-zero.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/try-assert-zero.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/try-leak-lost.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/try-leak-lost.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/try-leak-lost.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/try-leak-lost.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/try-leak-reachable.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/try-leak-reachable.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/try-leak-reachable.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/try-leak-reachable.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/try-uninit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/try-uninit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/tests/try-uninit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/tests/try-uninit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_assert.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_assert.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_assert.cc 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_assert.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_assert.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_assert.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_assert.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_assert.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,11 +86,12 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_ASSERT_H -#define TOKU_ASSERT_H /* The problem with assert.h: If NDEBUG is set then it doesn't execute the function, if NDEBUG isn't set then we get a branch that isn't taken. */ + /* This version will complain if NDEBUG is set. */ /* It evaluates the argument and then calls a function toku_do_assert() which takes all the hits for the branches not taken. */ @@ -201,5 +202,3 @@ } extern bool toku_gdb_dump_on_assert; - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_atomic.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_atomic.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_atomic.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_atomic.h 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -85,12 +85,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2012-2013 Tokutek Inc. All rights reserved." #ident "$Id$" -#ifndef TOKU_ATOMIC_H -#define TOKU_ATOMIC_H - #include #include @@ -159,5 +158,3 @@ #pragma GCC poison __sync_synchronize #pragma GCC poison __sync_lock_test_and_set #pragma GCC poison __sync_release - -#endif // TOKU_ATOMIC_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_byteswap.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_byteswap.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_byteswap.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_byteswap.h 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -85,12 +85,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2012-2013 Tokutek Inc. All rights reserved." #ident "$Id$" -#ifndef TOKU_BYTESWAP_H -#define TOKU_BYTESWAP_H - #include #if defined(HAVE_BYTESWAP_H) @@ -102,5 +101,3 @@ # include # define bswap_64 OSSwapInt64 #endif - -#endif /* TOKU_BYTESWAP_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_crash.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_crash.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_crash.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_crash.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_crash.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_crash.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_crash.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_crash.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,9 @@ under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#ifndef PORTABILITY_TOKU_CRASH_H -#define PORTABILITY_TOKU_CRASH_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include @@ -192,5 +191,3 @@ } void toku_try_gdb_stack_trace(const char *gdb_path); - -#endif // PORTABILITY_TOKU_CRASH_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_htod.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_htod.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_htod.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_htod.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,8 +86,9 @@ under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." /* Purpose of this file is to provide definitions of * Host to Disk byte transposition functions, an abstraction of @@ -106,9 +107,6 @@ * HOST AND A LITTLE-ENDIAN DISK. */ -#ifndef _TOKU_HTOD_H -#define _TOKU_HTOD_H - #include #if defined(HAVE_ENDIAN_H) @@ -166,8 +164,3 @@ #else #error Not supported #endif - - - -#endif - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_htonl.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_htonl.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_htonl.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_htonl.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,14 +86,9 @@ under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." - -#ifndef _TOKU_HTONL_H -#define _TOKU_HTONL_H +#pragma once -#if !__linux__ && !__FreeBSD__ && !__sun__ -//#error -#endif +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include @@ -105,5 +100,3 @@ static inline uint32_t toku_ntohl(uint32_t i) { return ntohl(i); } - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_list.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_list.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_list.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_list.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef _TOKUDB_LIST_H -#define _TOKUDB_LIST_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,13 +87,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007, 2008, 2009 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -//TODO: #1378 This is not threadsafe. Make sure when splitting locks -//that we protect these calls. - - // This toku_list is intended to be embedded in other data structures. struct toku_list { struct toku_list *next, *prev; @@ -177,7 +173,3 @@ #else #define toku_list_struct(p, t, f) ((t*)((char*)(p) - ((char*)&((t*)0)->f))) #endif - - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_os.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_os.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_os.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_os.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_OS_H -#define TOKU_OS_H - #include #include @@ -179,5 +178,3 @@ // Portable linux 'dup2' int toku_dup2(int fd, int fd2) __attribute__((__visibility__("default"))); - -#endif /* TOKU_OS_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_os_types.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_os_types.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_os_types.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_os_types.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,9 @@ under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#if !defined(TOKU_OS_TYPES_H) -#define TOKU_OS_TYPES_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include @@ -128,5 +127,3 @@ #if !defined(O_BINARY) #define O_BINARY 0 #endif - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_path.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_path.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_path.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_path.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_path.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_path.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_path.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_path.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef PORTABILITY_TOKU_PATH_H -#define PORTABILITY_TOKU_PATH_H - #include #include #include @@ -124,5 +123,3 @@ // There are n path components, including base. // Returns: // dest (useful for chaining function calls) - -#endif // PORTABILITY_TOKU_PATH_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_portability.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_portability.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_portability.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_portability.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,10 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_PORTABILITY_H -#define TOKU_PORTABILITY_H #include "toku_config.h" @@ -107,11 +107,6 @@ #define DEV_NULL_FILE "/dev/null" -// HACK Poison these mcaros so no one uses them -#define TOKU_WINDOWS , -#define TOKU_WINDOWS_32 , -#define TOKU_WINDOWS_64 , - // include here, before they get deprecated #include @@ -188,26 +183,6 @@ // Deprecated functions. #if !defined(TOKU_ALLOW_DEPRECATED) -# if defined(__ICL) || defined(__ICC) // Intel Compiler -# pragma deprecated (creat, fstat, stat, getpid, syscall, sysconf, mkdir, strdup) -//# pragma poison off_t -//# pragma poison pthread_attr_t pthread_t -//# pragma poison pthread_mutexattr_t pthread_mutex_t -//# pragma poison pthread_condattr_t pthread_cond_t -//# pragma poison pthread_rwlockattr_t pthread_rwlock_t -//# pragma poison timespec -# ifndef DONT_DEPRECATE_WRITES -# pragma poison write pwrite -# endif -# ifndef DONT_DEPRECATE_MALLOC -# pragma deprecated (malloc, free, realloc) -# endif -# ifndef DONT_DEPRECATE_ERRNO -# pragma deprecated (errno) -# endif -# pragma poison dup2 -# pragma poison _dup2 -# else int creat(const char *pathname, mode_t mode) __attribute__((__deprecated__)); int fstat(int fd, struct stat *buf) __attribute__((__deprecated__)); int stat(const char *path, struct stat *buf) __attribute__((__deprecated__)); @@ -281,7 +256,6 @@ #pragma GCC poison __sync_synchronize #pragma GCC poison __sync_lock_test_and_set #pragma GCC poison __sync_release -# endif #endif #if defined(__cplusplus) @@ -352,17 +326,8 @@ int toku_portability_init(void); void toku_portability_destroy(void); -static inline uint64_t roundup_to_multiple(uint64_t alignment, uint64_t v) // Effect: Return X, where X the smallest multiple of ALIGNMENT such that X>=V. // Requires: ALIGNMENT is a power of two -{ - assert(0==(alignment&(alignment-1))); // alignment must be a power of two - uint64_t result = (v+alignment-1)&~(alignment-1); - assert(result>=v); // The result is >=V. - assert(result%alignment==0); // The result is a multiple of alignment. - assert(result #include @@ -213,6 +212,21 @@ #endif } +static inline int +toku_mutex_trylock(toku_mutex_t *mutex) { + int r = pthread_mutex_trylock(&mutex->pmutex); +#if TOKU_PTHREAD_DEBUG + if (r == 0) { + invariant(mutex->valid); + invariant(!mutex->locked); + invariant(mutex->owner == 0); + mutex->locked = true; + mutex->owner = pthread_self(); + } +#endif + return r; +} + static inline void toku_mutex_unlock(toku_mutex_t *mutex) { #if TOKU_PTHREAD_DEBUG @@ -407,5 +421,3 @@ toku_pthread_setspecific(toku_pthread_key_t key, void *data) { return pthread_setspecific(key, data); } - -#endif /* TOKU_PTHREAD_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_race_tools.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_race_tools.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_race_tools.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_race_tools.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,10 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_RACE_TOOLS_H -#define TOKU_RACE_TOOLS_H #include @@ -138,5 +138,3 @@ # define RUNNING_ON_VALGRIND (0U) #endif - -#endif // TOKU_RACE_TOOLS_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_random.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_random.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_random.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_random.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,9 @@ under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#ifndef TOKU_RANDOM_H -#define TOKU_RANDOM_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include @@ -169,5 +168,3 @@ return result; } - -#endif // TOKU_RANDOM_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_stdint.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_stdint.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_stdint.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_stdint.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,13 +86,9 @@ under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#ifndef TOKU_STDINT_H -#define TOKU_STDINT_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include - -#endif - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_stdlib.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_stdlib.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_stdlib.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_stdlib.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,5 +86,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." + #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_time.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_time.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_time.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_time.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_time.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_time.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/portability/toku_time.h 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/portability/toku_time.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,9 @@ under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#ifndef TOKU_TIME_H -#define TOKU_TIME_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "toku_config.h" @@ -161,5 +160,3 @@ gettimeofday(&t, NULL); return t.tv_sec * (1UL * 1000 * 1000) + t.tv_usec; } - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/README.md mariadb-5.5-5.5.40/storage/tokudb/ft-index/README.md --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/README.md 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/README.md 2014-10-08 13:19:51.000000000 +0000 @@ -1,16 +1,16 @@ -TokuKV +TokuFT ====== -TokuKV is a high-performance, transactional key-value store, used in the +TokuFT is a high-performance, transactional key-value store, used in the TokuDB storage engine for MySQL and MariaDB and in TokuMX, the high-performance MongoDB distribution. -TokuKV is provided as a shared library with an interface similar to +TokuFT is provided as a shared library with an interface similar to Berkeley DB. To build the full MySQL product, see the instructions for [Tokutek/ft-engine][ft-engine]. To build TokuMX, see the instructions -for [Tokutek/mongo][mongo]. This document covers TokuKV only. +for [Tokutek/mongo][mongo]. This document covers TokuFT only. [ft-engine]: https://github.com/Tokutek/ft-engine [mongo]: https://github.com/Tokutek/mongo @@ -19,7 +19,7 @@ Building -------- -TokuKV is built using CMake >= 2.8.9. Out-of-source builds are +TokuFT is built using CMake >= 2.8.9. Out-of-source builds are recommended. You need a C++11 compiler, though only GCC >= 4.7 and Apple's Clang are tested. You also need zlib development packages (`yum install zlib-devel` or `apt-get install zlib1g-dev`). @@ -35,7 +35,6 @@ cd build CC=gcc47 CXX=g++47 cmake \ -D CMAKE_BUILD_TYPE=Debug \ - -D USE_BDB=OFF \ -D BUILD_TESTING=OFF \ -D USE_VALGRIND=OFF \ -D CMAKE_INSTALL_PREFIX=../prefix/ \ @@ -50,14 +49,14 @@ ### Platforms -TokuKV is supported on 64-bit Centos, should work on other 64-bit linux -distributions, and may work on OSX 10.8 and FreeBSD. TokuKV is not +TokuFT is supported on 64-bit Centos, should work on other 64-bit linux +distributions, and may work on OSX 10.8 and FreeBSD. TokuFT is not supported on 32-bit systems. [Transparent hugepages][transparent-hugepages] is a feature in newer linux kernel versions that causes problems for the memory usage tracking -calculations in TokuKV and can lead to memory overcommit. If you have -this feature enabled, TokuKV will not start, and you should turn it off. +calculations in TokuFT and can lead to memory overcommit. If you have +this feature enabled, TokuFT will not start, and you should turn it off. If you want to run with transparent hugepages on, you can set an environment variable `TOKU_HUGE_PAGES_OK=1`, but only do this for testing, and only with a small cache size. @@ -68,31 +67,26 @@ Examples -------- -There are some sample programs that can use either TokuKV or Berkeley DB +There are some sample programs that can use either TokuFT or Berkeley DB in the `examples/` directory. Follow the above instructions to build and -install TokuKV, and then look in the installed `examples/` directory for +install TokuFT, and then look in the installed `examples/` directory for instructions on building and running them. Testing ------- -TokuKV uses CTest for testing. The CDash testing dashboard is not +TokuFT uses CTest for testing. The CDash testing dashboard is not currently public, but you can run the tests without submitting them. There are some large data files not stored in the git repository, that will be made available soon. For now, the tests that use these files will not run. -Many of the tests are linked with both TokuKV and Berkeley DB, as a sanity -check on the tests themselves. To build these tests, you will need -Berkeley DB and its header files installed. If you do not have Berkeley -DB installed, just don't pass `USE_BDB=ON`. - In the build directory from above: ```sh -cmake -D BUILD_TESTING=ON [-D USE_BDB=ON] .. +cmake -D BUILD_TESTING=ON .. ctest -D ExperimentalStart \ -D ExperimentalConfigure \ -D ExperimentalBuild \ @@ -103,7 +97,7 @@ Contributing ------------ -Please report bugs in TokuKV here on github. +Please report bugs in TokuFT to the [issue tracker][jira]. We have two publicly accessible mailing lists for TokuDB: @@ -121,11 +115,13 @@ We are also available on IRC on freenode.net, in the #tokutek channel. +[jira]: https://tokutek.atlassian.net/browse/FT/ + License ------- -TokuKV is available under the GPL version 2, with slight modifications. +TokuFT is available under the GPL version 2, with slight modifications. See [README-TOKUDB][license]. [license]: http://github.com/Tokutek/ft-index/blob/master/README-TOKUDB diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/README-TOKUDB mariadb-5.5-5.5.40/storage/tokudb/ft-index/README-TOKUDB --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/README-TOKUDB 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/README-TOKUDB 2014-10-08 13:19:51.000000000 +0000 @@ -25,7 +25,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.db-benchmark-test.bash mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.db-benchmark-test.bash --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.db-benchmark-test.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.db-benchmark-test.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,201 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "run db-benchmark-test" - echo "[--tokudb=$tokudb" - echo "[--revision=$revision]" - echo "[--branch=$branch]" - echo "[--suffix=$suffix]" - echo "[--commit=$commit]" - echo "[--cc=$cc]" - echo "[--n=$n]" -} - -function retry() { - local cmd - local retries - local exitcode - cmd=$* - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 10 - done - test $exitcode = 0 -} - -n=100 -cc=gcc44 -ft_loader=cilk -branch=toku -revision=0 -tokudb=tokudb -suffix=. -commit=0 -svnserver=https://svn.tokutek.com/tokudb -basedir=$HOME/svn.build -builddir=$basedir/tokudb.build -system=`uname -s | tr [:upper:] [:lower:]` -arch=`uname -m | tr [:upper:] [:lower:]` -hostname=`hostname` -instancetype="" - -# parse the command line -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -if [ $cc = icc ] ; then - d=/opt/intel/bin - if [ -d $d ] ; then - export PATH=$d:$PATH - . $d/compilervars.sh intel64 - fi - d=/opt/intel/cilkutil/bin - if [ -d $d ] ; then - export PATH=$d:$PATH - fi -fi - -# require a revision -if [ $revision -eq 0 ] ; then exit 1; fi -if [ $branch = "." ] ; then branch="toku"; fi - -function append() { - local s=""; local x - for x in $*; do - if [ "$s" != "" ] ; then s=$s-$x; else s=$x; fi - done - echo $s -} - -# setup the branchrevision string -branchrevision="" -if [ $branch != "toku" ] ; then branchrevision=$(append $branchrevision $(basename $branch)); fi -if [ $tokudb != "tokudb" ] ; then branchrevision=$(append $branchrevision $tokudb); fi -branchrevision=$(append $branchrevision $revision) -if [ $suffix != "." ] ; then branchrevision=$(append $branchrevision $suffix); fi - -# goto the base directory -if [ ! -d $basedir ] ; then mkdir $basedir; fi - -pushd $basedir - -# update the build directory -if [ ! -d $builddir ] ; then mkdir $builddir; fi - -date=`date +%Y%m%d` -pushd $builddir - while [ ! -d $date ] ; do - svn mkdir $svnserver/mysql.build/$date -m "" - svn co -q $svnserver/mysql.build/$date - if [ $? -ne 0 ] ; then rm -rf $date; fi - done -popd -testresultsdir=$builddir/$date - -gccversion=`$cc --version|head -1|cut -f3 -d" "` - -runfile=$testresultsdir/db-benchmark-test-$branchrevision-$cc-$gccversion-$system-$arch-$hostname -if [ "$instancetype" != "" ] ; then runfile=$runfile-$instancetype; fi -rm -rf $runfile - -testresult="PASS" -testdir=db-benchmark-test-$branchrevision -rm -rf $testdir - -# checkout the tokudb branch -if [ $testresult = "PASS" ] ; then - retry svn export -q https://svn.tokutek.com/tokudb/$branch/$tokudb $testdir - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi -fi - -# build it -if [ $testresult = "PASS" ] ; then - pushd $testdir - make release -s CC=$cc GCCVERSION=$gccversion FTLOADER=$ft_loader >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd - pushd $testdir/db-benchmark-test - make build.tdb CC=$cc GCCVERSION=$gccversion -s >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -# run tests -if [ $testresult = "PASS" ] ; then - let i=$n - pushd $testdir/db-benchmark-test - echo ./db-benchmark-test-tokudb -x $i >>$runfile 2>&1 - ./db-benchmark-test-tokudb -x $i >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -if [ $testresult = "PASS" ] ; then - let i=2*$n - pushd $testdir/db-benchmark-test - echo ./db-benchmark-test-tokudb -x --norandom $i >>$runfile 2>&1 - ./db-benchmark-test-tokudb -x --norandom $i >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -if [ $testresult = "PASS" ] ; then - let i=2*$n - pushd $testdir/db-benchmark-test - echo ./db-benchmark-test-tokudb -x --noserial $i >>$runfile 2>&1 - ./db-benchmark-test-tokudb -x --noserial $i >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -# commit results -if [ $commit != 0 ] ; then - svn add $runfile - retry svn commit -m \"$testresult db-benchmark-test $branchrevision $system $arch\" $runfile -fi - -popd - -exit 0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.bash mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.bash --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,460 +0,0 @@ -#!/bin/bash - -function usage() { - echo "run.fractal.tree.tests.bash - run the nightly fractal tree test suite" - echo "[--ftcc=$ftcc] [--ftcxx=$ftcxx] [--BDBVERSION=$BDBVERSION] [--ctest_model=$ctest_model]" - echo "[--commit=$commit] [--generator=$generator] [--toku_svnroot=$toku_svnroot]" - return 1 -} - -[ -f /etc/profile.d/gcc47.sh ] && . /etc/profile.d/gcc47.sh -[ -f /etc/profile.d/binutils222.sh ] && . /etc/profile.d/binutils222.sh - -set -e - -pushd $(dirname $0) &>/dev/null -SCRIPTDIR=$PWD -popd &>/dev/null -FULLTOKUDBDIR=$(dirname $SCRIPTDIR) -TOKUDBDIR=$(basename $FULLTOKUDBDIR) -BRANCHDIR=$(basename $(dirname $FULLTOKUDBDIR)) - -function make_tokudb_name() { - local tokudb_dir=$1 - local tokudb=$2 - if [ $tokudb_dir = "toku" ] ; then - echo $tokudb - else - echo $(echo $tokudb_dir-$tokudb | tr / -) - fi -} -tokudb_name=$(make_tokudb_name $BRANCHDIR $TOKUDBDIR) -export TOKUDB_NAME=$tokudb_name - -productname=$tokudb_name - -ftcc=gcc47 -ftcxx=g++47 -BDBVERSION=5.3 -ctest_model=Nightly -generator="Unix Makefiles" -toku_svnroot=$FULLTOKUDBDIR/../.. -commit=1 -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1; - fi -done - -if [[ ! ( ( $ctest_model = Nightly ) || ( $ctest_model = Experimental ) || ( $ctest_model = Continuous ) ) ]]; then - echo "--ctest_model must be Nightly, Experimental, or Continuous" - usage -fi - -BDBDIR=/usr/local/BerkeleyDB.$BDBVERSION -if [ -d $BDBDIR ] ; then - CMAKE_PREFIX_PATH=$BDBDIR:$CMAKE_PREFIX_PATH - export CMAKE_PREFIX_PATH -fi - -# delete some characters that cygwin and osx have trouble with -function sanitize() { - tr -d '[/:\\\\()]' -} - -# gather some info -svnserver=https://svn.tokutek.com/tokudb -nodename=$(uname -n) -system=$(uname -s | tr '[:upper:]' '[:lower:]' | sanitize) -release=$(uname -r | sanitize) -arch=$(uname -m | sanitize) -date=$(date +%Y%m%d) -ncpus=$([ -f /proc/cpuinfo ] && (grep bogomips /proc/cpuinfo | wc -l) || sysctl -n hw.ncpu) -njobs=$(if [ $ncpus -gt 8 ] ; then echo "$ncpus / 3" | bc ; else echo "$ncpus" ; fi) - -GCCVERSION=$($ftcc --version|head -1|cut -f3 -d" ") -export GCCVERSION -CC=$ftcc -export CC -CXX=$ftcxx -export CXX - -function retry() { - local cmd - local retries - local exitcode - cmd=$* - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 10 - done - test $exitcode = 0 -} - -if [[ $commit -eq 1 ]]; then - svnbase=~/svn.build - if [ ! -d $svnbase ] ; then mkdir $svnbase ; fi - - # checkout the build dir - buildbase=$svnbase/tokudb.build - if [ ! -d $buildbase ] ; then - mkdir $buildbase - fi - - # make the build directory, possibly on multiple machines simultaneously, there can be only one - builddir=$buildbase/$date - pushd $buildbase - set +e - svn mkdir $svnserver/tokudb.build/$date -m "" || true - retry svn co -q $svnserver/tokudb.build/$date - if [ ! -d $date ] ; then - exit 1 - fi - set -e - popd - - tracefilepfx=$builddir/$productname+$ftcc-$GCCVERSION+bdb-$BDBVERSION+$nodename+$system+$release+$arch -else - tracefilepfx=$FULLTOKUDBDIR/test-trace -fi - -function getsysinfo() { - tracefile=$1; shift - set +e - uname -a >$tracefile 2>&1 - ulimit -a >>$tracefile 2>&1 - cmake --version >>$tracefile 2>&1 - $ftcc -v >>$tracefile 2>&1 - $ftcxx -v >>$tracefile 2>&1 - valgrind --version >>$tracefile 2>&1 - cat /etc/issue >>$tracefile 2>&1 - cat /proc/version >>$tracefile 2>&1 - cat /proc/cpuinfo >>$tracefile 2>&1 - env >>$tracefile 2>&1 - set -e -} - -function get_latest_svn_revision() { - svn info $1 | awk -v ORS="" '/Last Changed Rev:/ { print $4 }' -} - -function my_mktemp() { - mktemp /tmp/$(whoami).$1.XXXXXXXXXX -} - -yesterday="$(date -u -d yesterday +%F) 03:59:00 +0000" - -if [[ $commit -eq 1 ]]; then - # hack to make long tests run nightly but not when run in experimental mode - longtests=ON -else - longtests=OFF -fi -################################################################################ -## run normal and valgrind on optimized build -resultsdir=$tracefilepfx-Release -mkdir $resultsdir -tracefile=$tracefilepfx-Release/trace - -getsysinfo $tracefile - -mkdir -p $FULLTOKUDBDIR/opt >/dev/null 2>&1 -cd $FULLTOKUDBDIR/opt -cmake \ - -D CMAKE_BUILD_TYPE=Release \ - -D USE_VALGRIND=ON \ - -D USE_BDB=ON \ - -D RUN_LONG_TESTS=$longtests \ - -D USE_CTAGS=OFF \ - -D USE_GTAGS=OFF \ - -D USE_ETAGS=OFF \ - -D USE_CSCOPE=OFF \ - -D TOKU_SVNROOT="$toku_svnroot" \ - -G "$generator" \ - .. 2>&1 | tee -a $tracefile -cmake --system-information $resultsdir/sysinfo -make clean -# update to yesterday exactly just before ctest does nightly update -svn up -q -r "{$yesterday}" .. -set +e -ctest -j$njobs \ - -D ${ctest_model}Start \ - -D ${ctest_model}Update \ - -D ${ctest_model}Configure \ - -D ${ctest_model}Build \ - -D ${ctest_model}Test \ - -E '/drd|/helgrind' \ - 2>&1 | tee -a $tracefile -ctest -j$njobs \ - -D ${ctest_model}MemCheck \ - -E '^ydb/.*\.bdb$|test1426.tdb|/drd|/helgrind' \ - 2>&1 | tee -a $tracefile -set -e - -cp $tracefile notes.txt -set +e -ctest -D ${ctest_model}Submit -A notes.txt \ - 2>&1 | tee -a $tracefile -set -e -rm notes.txt - -tag=$(head -n1 Testing/TAG) -cp -r Testing/$tag $resultsdir -if [[ $commit -eq 1 ]]; then - cf=$(my_mktemp ftresult) - cat "$resultsdir/trace" | awk ' -BEGIN { - errs=0; - look=0; - ORS=" "; -} -/[0-9]+% tests passed, [0-9]+ tests failed out of [0-9]+/ { - fail=$4; - total=$9; - pass=total-fail; -} -/^Memory checking results:/ { - look=1; - FS=" - "; -} -/Errors while running CTest/ { - look=0; - FS=" "; -} -{ - if (look) { - errs+=$2; - } -} -END { - print "ERRORS=" errs; - if (fail>0) { - print "FAIL=" fail - } - print "PASS=" pass -}' >"$cf" - get_latest_svn_revision $FULLTOKUDBDIR >>"$cf" - echo -n " " >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - FS=": "; -} -/Build name/ { - print $2; - exit -}' >>"$cf" - (echo; echo) >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - printit=0 -} -/[0-9]*\% tests passed, [0-9]* tests failed out of [0-9]*/ { printit=1 } -/Memory check project/ { printit=0 } -/^ Site:/ { printit=0 } -{ - if (printit) { - print $0 - } -}' >>"$cf" - svn add $resultsdir - svn commit -F "$cf" $resultsdir - rm $cf -fi - -################################################################################ -## run drd tests on debug build -resultsdir=$tracefilepfx-Debug -mkdir $resultsdir -tracefile=$tracefilepfx-Debug/trace - -getsysinfo $tracefile - -mkdir -p $FULLTOKUDBDIR/dbg >/dev/null 2>&1 -cd $FULLTOKUDBDIR/dbg -cmake \ - -D CMAKE_BUILD_TYPE=Debug \ - -D USE_VALGRIND=ON \ - -D USE_BDB=OFF \ - -D RUN_LONG_TESTS=$longtests \ - -D USE_CTAGS=OFF \ - -D USE_GTAGS=OFF \ - -D USE_ETAGS=OFF \ - -D USE_CSCOPE=OFF \ - -D CMAKE_C_FLAGS_DEBUG="-O1" \ - -D CMAKE_CXX_FLAGS_DEBUG="-O1" \ - -D TOKU_SVNROOT="$toku_svnroot" \ - -G "$generator" \ - .. 2>&1 | tee -a $tracefile -cmake --system-information $resultsdir/sysinfo -make clean -# update to yesterday exactly just before ctest does nightly update -svn up -q -r "{$yesterday}" .. -set +e -ctest -j$njobs \ - -D ${ctest_model}Start \ - -D ${ctest_model}Update \ - -D ${ctest_model}Configure \ - -D ${ctest_model}Build \ - -D ${ctest_model}Test \ - -R '/drd|/helgrind' \ - 2>&1 | tee -a $tracefile -set -e - -cp $tracefile notes.txt -set +e -ctest -D ${ctest_model}Submit -A notes.txt \ - 2>&1 | tee -a $tracefile -set -e -rm notes.txt - -tag=$(head -n1 Testing/TAG) -cp -r Testing/$tag $resultsdir -if [[ $commit -eq 1 ]]; then - cf=$(my_mktemp ftresult) - cat "$resultsdir/trace" | awk ' -BEGIN { - ORS=" "; -} -/[0-9]+% tests passed, [0-9]+ tests failed out of [0-9]+/ { - fail=$4; - total=$9; - pass=total-fail; -} -END { - if (fail>0) { - print "FAIL=" fail - } - print "PASS=" pass -}' >"$cf" - get_latest_svn_revision $FULLTOKUDBDIR >>"$cf" - echo -n " " >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - FS=": "; -} -/Build name/ { - print $2; - exit -}' >>"$cf" - (echo; echo) >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - printit=0 -} -/[0-9]*\% tests passed, [0-9]* tests failed out of [0-9]*/ { printit=1 } -/^ Site:/ { printit=0 } -{ - if (printit) { - print $0 - } -}' >>"$cf" - svn add $resultsdir - svn commit -F "$cf" $resultsdir - rm $cf -fi - -################################################################################ -## run gcov on debug build -resultsdir=$tracefilepfx-Coverage -mkdir $resultsdir -tracefile=$tracefilepfx-Coverage/trace - -getsysinfo $tracefile - -mkdir -p $FULLTOKUDBDIR/cov >/dev/null 2>&1 -cd $FULLTOKUDBDIR/cov -cmake \ - -D CMAKE_BUILD_TYPE=Debug \ - -D BUILD_TESTING=ON \ - -D USE_GCOV=ON \ - -D USE_BDB=OFF \ - -D RUN_LONG_TESTS=$longtests \ - -D USE_CTAGS=OFF \ - -D USE_GTAGS=OFF \ - -D USE_ETAGS=OFF \ - -D USE_CSCOPE=OFF \ - -D TOKU_SVNROOT="$toku_svnroot" \ - -G "$generator" \ - .. 2>&1 | tee -a $tracefile -cmake --system-information $resultsdir/sysinfo -make clean -# update to yesterday exactly just before ctest does nightly update -svn up -q -r "{$yesterday}" .. -set +e -ctest -j$njobs \ - -D ${ctest_model}Start \ - -D ${ctest_model}Update \ - -D ${ctest_model}Configure \ - -D ${ctest_model}Build \ - -D ${ctest_model}Test \ - -D ${ctest_model}Coverage \ - 2>&1 | tee -a $tracefile -set -e - -cp $tracefile notes.txt -set +e -ctest -D ${ctest_model}Submit -A notes.txt \ - 2>&1 | tee -a $tracefile -set -e -rm notes.txt - -tag=$(head -n1 Testing/TAG) -cp -r Testing/$tag $resultsdir -if [[ $commit -eq 1 ]]; then - cf=$(my_mktemp ftresult) - cat "$resultsdir/trace" | awk ' -BEGIN { - ORS=" "; -} -/Percentage Coverage:/ { - covpct=$3; -} -/[0-9]+% tests passed, [0-9]+ tests failed out of [0-9]+/ { - fail=$4; - total=$9; - pass=total-fail; -} -END { - print "COVERAGE=" covpct - if (fail>0) { - print "FAIL=" fail - } - print "PASS=" pass -}' >"$cf" - get_latest_svn_revision $FULLTOKUDBDIR >>"$cf" - echo -n " " >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - FS=": "; -} -/Build name/ { - print $2; - exit -}' >>"$cf" - (echo; echo) >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - printit=0 -} -/[0-9]*\% tests passed, [0-9]* tests failed out of [0-9]*/ { printit=1 } -/^ Site:/ { printit=0 } -{ - if (printit) { - print $0 - } -}' >>"$cf" - svn add $resultsdir - svn commit -F "$cf" $resultsdir - rm $cf -fi - -exit 0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.cmake mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.cmake --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.cmake 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.cmake 2014-10-08 13:19:52.000000000 +0000 @@ -78,26 +78,20 @@ ) set(all_opts - -DBDBDIR=/usr/local/BerkeleyDB.5.3 -DBUILD_TESTING=ON -DUSE_CILK=OFF ) set(rel_opts ${all_opts} -DCMAKE_BUILD_TYPE=Release - -DINTEL_CC=ON - -DUSE_BDB=ON ) set(dbg_opts ${all_opts} -DCMAKE_BUILD_TYPE=Debug - -DINTEL_CC=ON - -DUSE_BDB=ON ) set(cov_opts ${all_opts} -DCMAKE_BUILD_TYPE=Debug - -DINTEL_CC=OFF -DUSE_GCOV=ON ) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.icc.bash mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.icc.bash --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.icc.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.icc.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -run.fractal.tree.tests.bash --ftcc=icc $* diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.now.bash mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.now.bash --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.now.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.now.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,7 +0,0 @@ -#!/bin/bash - -pushd $(dirname $0) &>/dev/null -SCRIPTDIR=$PWD -popd &>/dev/null - -exec $SCRIPTDIR/run.fractal.tree.tests.bash --ctest_model=Experimental --commit=0 "$@" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.loader.stress.bash mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.loader.stress.bash --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.loader.stress.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.loader.stress.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,164 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "run the loader verify test" - echo "[--rows=$rows]" - echo "[--dictionaries=$dictionaries]" - echo "[--ft_loader=$ft_loader]" - echo "[--tokudb=$tokudb]" - echo "[--branch=$branch]" - echo "[--revision=$revision]" - echo "[--suffix=$suffix]" - echo "[--commit=$commit]" -} - -function retry() { - local cmd - local retries - local exitcode - cmd=$* - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 10 - done - test $exitcode = 0 -} - -rows=100000000 -dictionaries=3 -ft_loader=cilk -tokudb=tokudb -branch=. -revision=0 -suffix=. -commit=0 -svnserver=https://svn.tokutek.com/tokudb -basedir=~/svn.build -builddir=$basedir/mysql.build -system=`uname -s | tr [:upper:] [:lower:]` -arch=`uname -m | tr [:upper:] [:lower:]` -myhost=`hostname` -instancetype="" -ftcc=gcc -have_cilk=0 - -# parse the command line -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -# require a revision -if [ $revision -eq 0 ] ; then - exit 1 -fi - -# build -if [ $ftcc = icc ] ; then - d=/opt/intel/bin - if [ -d $d ] ; then - export PATH=$d:$PATH - . $d/compilervars.sh intel64 - fi - d=/opt/intel/cilkutil/bin - if [ -d $d ] ; then - export PATH=$d:$PATH - fi -fi - -# setup the branchrevision string -if [ $branch = "." ] ; then - branchrevision=$revision -else - branchrevision=`basename $branch`-$revision -fi -if [ $suffix != "." ] ; then - branchrevision=$branchrevision-$suffix -fi - -ftccversion=$($ftcc --version|head -1|cut -f3 -d" ") - -# goto the base directory -if [ ! -d $basedir ] ; then mkdir $basedir; fi - -pushd $basedir - -# update the build directory -if [ ! -d $builddir ] ; then mkdir $builddir; fi - -date=`date +%Y%m%d` -testresultsdir=$builddir/$date -pushd $builddir - while [ ! -d $date ] ; do - svn mkdir $svnserver/mysql.build/$date -m "" - svn checkout $svnserver/mysql.build/$date - if [ $? -ne 0 ] ; then rm -rf $date; fi - done -popd - -testresult="PASS" -runfile=$testresultsdir/loader-stress-$rows-$dictionaries-$tokudb-$branchrevision-$ftcc-$ftccversion-$system-$arch-$myhost -if [ "$instancetype" != "" ] ; then runfilefile=$runfile-$instancetype; fi -rm -f $runfile - -# checkout the code -if [ -d loader-stress-$branchrevision ] ; then rm -rf loader-stress-$branchrevision; fi -mkdir loader-stress-$branchrevision - -if [ $branch = "." ] ; then branch=toku; fi - -retry svn export -r $revision -q $svnserver/$branch/$tokudb loader-stress-$branchrevision/$tokudb -exitcode=$? -if [ $exitcode != 0 ] ; then - testresult="FAIL" -fi - -if [ $testresult = "PASS" ] ; then - pushd loader-stress-$branchrevision/$tokudb - echo `date` make release -s CC=$ftcc HAVE_CILK=$have_cilk FTLOADER=$ft_loader >>$runfile - make -s release CC=$ftcc HAVE_CILK=$have_cilk FTLOADER=$ft_loader >>$runfile 2>&1 - exitcode=$? - echo `date` complete $exitcode >>$runfile - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi -if [ $testresult = "PASS" ] ; then - pushd loader-stress-$branchrevision/$tokudb/src/tests - echo `date` make loader-stress-test.tdb CC=$ftcc HAVE_CILK=$have_cilk >>$runfile - make loader-stress-test.tdb -s CC=$ftcc HAVE_CILK=$have_cilk >>$runfile 2>&1 - exitcode=$? - echo `date` complete $exitcode >>$runfile - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -# run -if [ $testresult = "PASS" ] ; then - pushd loader-stress-$branchrevision/$tokudb/src/tests - echo `date` ./loader-stress-test.tdb -v -r $rows -d $dictionaries -c >>$runfile - ./loader-stress-test.tdb -v -r $rows -d $dictionaries -c >>$runfile 2>&1 - exitcode=$? - echo `date` complete $exitcode >>$runfile - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -if [ $commit != 0 ] ; then - svn add $runfile - retry svn commit -m \"$testresult loader stress $rows $dictionaries $tokudb $branchrevision $ftcc $ftccversion $system $arch $myhost\" $runfile -fi - -popd - -if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi -exit $exitcode diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run-nightly-coverage-tests.bash mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run-nightly-coverage-tests.bash --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run-nightly-coverage-tests.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run-nightly-coverage-tests.bash 2014-10-08 13:19:52.000000000 +0000 @@ -12,7 +12,7 @@ if [ ! -d build ] ; then mkdir build pushd build - CC=gcc47 CXX=g++47 cmake \ + cmake \ -D CMAKE_BUILD_TYPE=Debug \ -D USE_VALGRIND=ON \ -D TOKU_DEBUG_PARANOID=ON \ @@ -20,7 +20,6 @@ -D USE_GTAGS=OFF \ -D USE_CSCOPE=OFF \ -D USE_ETAGS=OFF \ - -D USE_BDB=OFF \ -D USE_GCOV=ON \ -D CMAKE_LINK_DEPENDS_NO_SHARED=ON \ -G Ninja \ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run-nightly-drd-tests.bash mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run-nightly-drd-tests.bash --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run-nightly-drd-tests.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run-nightly-drd-tests.bash 2014-10-08 13:19:52.000000000 +0000 @@ -12,7 +12,7 @@ if [ ! -d build ] ; then mkdir build pushd build - CC=gcc47 CXX=g++47 cmake \ + cmake \ -D CMAKE_BUILD_TYPE=drd \ -D USE_VALGRIND=ON \ -D TOKU_DEBUG_PARANOID=ON \ @@ -20,7 +20,6 @@ -D USE_GTAGS=OFF \ -D USE_CSCOPE=OFF \ -D USE_ETAGS=OFF \ - -D USE_BDB=OFF \ -D CMAKE_LINK_DEPENDS_NO_SHARED=ON \ -G Ninja \ -D RUN_LONG_TESTS=ON \ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run-nightly-release-tests.bash mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run-nightly-release-tests.bash --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run-nightly-release-tests.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run-nightly-release-tests.bash 2014-10-08 13:19:52.000000000 +0000 @@ -12,7 +12,7 @@ if [ ! -d build ] ; then mkdir build pushd build - CC=gcc47 CXX=g++47 cmake \ + cmake \ -D CMAKE_BUILD_TYPE=Release \ -D USE_VALGRIND=ON \ -D TOKU_DEBUG_PARANOID=OFF \ @@ -20,7 +20,6 @@ -D USE_GTAGS=OFF \ -D USE_CSCOPE=OFF \ -D USE_ETAGS=OFF \ - -D USE_BDB=ON \ -D CMAKE_LINK_DEPENDS_NO_SHARED=ON \ -G Ninja \ -D RUN_LONG_TESTS=ON \ @@ -41,6 +40,6 @@ -E '/drd|/helgrind' ctest -j16 \ -D NightlyMemCheck \ - -E '^ydb/.*\.bdb|test1426\.tdb|/drd|/helgrind' + -E 'test1426\.tdb|/drd|/helgrind' set -e ctest -D NightlySubmit diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.stress-tests.bash mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.stress-tests.bash --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.stress-tests.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.stress-tests.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,332 +0,0 @@ -#!/bin/bash -# $Id$ - -DOC=<&2 - echo " [--toku_toplevel=]" 1>&2 - echo " [--log=]" 1>&2 - echo " [--savedir=]" 1>&2 -} - -# parse the command line -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - ok=no - for opt in toku_toplevel log savedir - do - if [[ ${BASH_REMATCH[1]} = $opt ]] - then - ok=yes - fi - done - if [[ $ok = no ]] - then - usage; exit 1 - fi - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -src_tests="${toku_toplevel}/src/tests" -testnames=(test_stress1.tdb \ - test_stress5.tdb \ - test_stress6.tdb) -recover_testnames=(recover-test_stress1.tdb \ - recover-test_stress2.tdb \ - recover-test_stress3.tdb) - -save_failure() { - dir="$1"; shift - out="$1"; shift - envdir="$1"; shift - rev=$1; shift - exec="$1"; shift - table_size=$1; shift - cachetable_size=$1; shift - num_ptquery=$1; shift - num_update=$1; shift - phase=$1; shift - dest="${dir}/${exec}-${table_size}-${cachetable_size}-${num_ptquery}-${num_update}-${phase}-${rev}-$$" - mkdir -p "$dest" - mv $out "${dest}/output.txt" - mv core* "${dest}/" - mv $envdir "${dest}/" -} - -running=no - -run_test() { - rev=$1; shift - exec="$1"; shift - table_size="$1"; shift - cachetable_size="$1"; shift - num_ptquery="$1"; shift - num_update="$1"; shift - mylog="$1"; shift - mysavedir="$1"; shift - - rundir=$(mktemp -d ./rundir.XXXXXXXX) - tmplog=$(mktemp) - - ulimit -c unlimited - t0="$(date)" - t1="" - t2="" - envdir="../${exec}-${table_size}-${cachetable_size}-${num_ptquery}-${num_update}-$$.dir" - cd $rundir - if LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \ - ../$exec -v --only_create --num_seconds 600 --envdir "$envdir" \ - --num_elements $table_size \ - --cachetable_size $cachetable_size &> $tmplog - then - rm -f $tmplog - t1="$(date)" - if LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \ - ../$exec -v --only_stress --num_seconds 600 --no-crash_on_update_failure --envdir "$envdir" \ - --num_elements $table_size \ - --cachetable_size $cachetable_size \ - --num_ptquery_threads $num_ptquery \ - --num_update_threads $num_update &> $tmplog - then - rm -f $tmplog - t2="$(date)" - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,PASS" | tee -a "$mylog" - else - save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update stress - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog" - fi - else - save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update create - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog" - fi - cd .. - rm -rf $rundir "$envdir" -} - -loop_test() { - rev=$1; shift - exec="$1"; shift - table_size="$1"; shift - cachetable_size="$1"; shift - mylog="$1"; shift - mysavedir="$1"; shift - - ptquery_rand=0 - update_rand=0 - while [[ $running = "yes" ]] - do - num_ptquery=1 - num_update=1 - if [[ $ptquery_rand -gt 1 ]] - then - (( num_ptquery = $RANDOM % 16 )) - fi - if [[ $update_rand -gt 0 ]] - then - (( num_update = $RANDOM % 16 )) - fi - (( ptquery_rand = (ptquery_rand + 1) % 4 )) - (( update_rand = (update_rand + 1) % 2 )) - run_test $rev $exec $table_size $cachetable_size $num_ptquery $num_update $mylog $mysavedir - done -} - -run_recover_test() { - rev=$1; shift - exec="$1"; shift - table_size="$1"; shift - cachetable_size="$1"; shift - num_ptquery="$1"; shift - num_update="$1"; shift - mylog="$1"; shift - mysavedir="$1"; shift - - rundir=$(mktemp -d ./rundir.XXXXXXXX) - tmplog=$(mktemp) - - ulimit -c unlimited - t0="$(date)" - t1="" - t2="" - envdir="../${exec}-${table_size}-${cachetable_size}-${num_ptquery}-${num_update}-$$.dir" - cd $rundir - if ! LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \ - ../$exec -v --test --num_seconds 600 --no-crash_on_update_failure --envdir "$envdir" \ - --num_elements $table_size \ - --cachetable_size $cachetable_size \ - --num_ptquery_threads $num_ptquery \ - --num_update_threads $num_update &> $tmplog - then - rm -f $tmplog - t1="$(date)" - if LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \ - ../$exec -v --recover --envdir "$envdir" \ - --num_elements $table_size \ - --cachetable_size $cachetable_size &> $tmplog - then - rm -f $tmplog - t2="$(date)" - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,PASS" | tee -a "$mylog" - else - save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update recover - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog" - fi - else - save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update test - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog" - fi - cd .. - rm -rf $rundir "$envdir" -} - -loop_recover_test() { - rev=$1; shift - exec="$1"; shift - table_size="$1"; shift - cachetable_size="$1"; shift - mylog="$1"; shift - mysavedir="$1"; shift - - ptquery_rand=0 - update_rand=0 - while [[ $running = "yes" ]] - do - num_ptquery=1 - num_update=1 - if [[ $ptquery_rand -gt 1 ]] - then - (( num_ptquery = $RANDOM % 16 )) - fi - if [[ $update_rand -gt 0 ]] - then - (( num_update = $RANDOM % 16 )) - fi - (( ptquery_rand = (ptquery_rand + 1) % 4 )) - (( update_rand = (update_rand + 1) % 2 )) - run_recover_test $rev $exec $table_size $cachetable_size $num_ptquery $num_update $mylog $mysavedir - done -} - -declare -a pids=(0) -i=0 - -savepid() { - pids[$i]=$1 - (( i = i + 1 )) -} - -killchildren() { - kill ${pids[@]} || true - for exec in ${testnames[@]} ${recover_testnames[@]} - do - pkill -f $exec || true - done -} - -trap killchildren INT TERM EXIT - -mkdir -p $log -mkdir -p $savedir - -while true -do - (cd $toku_toplevel; \ - svn update; \ - make CC=icc DEBUG=0 HAVE_CILK=0 clean fastbuild; \ - make CC=icc DEBUG=0 HAVE_CILK=0 -C src/tests ${testnames[@]} ${recover_testnames[@]}) - - cd $src_tests - - rev=$(svn info ../.. | awk '/Revision/ { print $2 }') - - running=yes - - for exec in ${testnames[@]} - do - for table_size in 2000 200000 50000000 - do - (( small_cachetable = table_size * 50 )) - suffix="${exec}-${table_size}-${small_cachetable}-$$" - touch "${log}/${suffix}" - loop_test $rev $exec $table_size $small_cachetable "${log}/${suffix}" "${savedir}/${suffix}" & savepid $! - - suffix="${exec}-${table_size}-1000000000-$$" - touch "${log}/${suffix}" - loop_test $rev $exec $table_size 1000000000 "${log}/${suffix}" "${savedir}/${suffix}" & savepid $! - done - done - - for exec in ${recover_testnames[@]} - do - for table_size in 2000 200000 50000000 - do - (( small_cachetable = table_size * 50 )) - suffix="${exec}-${table_size}-${small_cachetable}-$$" - touch "${log}/${suffix}" - loop_recover_test $rev $exec $table_size $small_cachetable "${log}/${suffix}" "${savedir}/${suffix}" & savepid $! - - suffix="${exec}-${table_size}-1000000000-$$" - touch "${log}/${suffix}" - loop_recover_test $rev $exec $table_size 1000000000 "${log}/${suffix}" "${savedir}/${suffix}" & savepid $! - done - done - - sleep 1d - - running=no - - killchildren - - wait ${pids[@]} || true - - idx=0 - for pid in ${pids[@]} - do - pids[$idx]=0 - (( idx = idx + 1 )) - done -done diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.stress-tests.py mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.stress-tests.py --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/run.stress-tests.py 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/run.stress-tests.py 2014-10-08 13:19:52.000000000 +0000 @@ -552,7 +552,6 @@ newenv['CXX'] = cxx r = call(['cmake', '-DCMAKE_BUILD_TYPE=Debug', - '-DUSE_BDB=OFF', '-DUSE_GTAGS=OFF', '-DUSE_CTAGS=OFF', '-DUSE_ETAGS=OFF', diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/tokucilkscreen mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/tokucilkscreen --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/tokucilkscreen 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/tokucilkscreen 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -# exit 1 if cilkscreen finds errors - -function cleanup() { - if [ "$logfile" != "" ] ; then rm $logfile; logfile=; fi -} - -trap cleanup SIGINT -logfile=$(mktemp /tmp/toku_cilkscreen.XXXXXXXX) -cilkscreen $* 2>$logfile -exitcode=$? -if [ $exitcode = 0 ] ; then - cat $logfile >>/dev/fd/2 - grep "No errors found by Cilkscreen" $logfile >/dev/null 2>&1 - exitcode=$? -fi -rm $logfile -exit $exitcode \ No newline at end of file diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/tokuvalgrind mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/tokuvalgrind --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/scripts/tokuvalgrind 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/scripts/tokuvalgrind 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +function usage() { + echo "check for valgrind error and set the exit code" +} + +function cleanup() { + if [ "$logfile" != "" ] ; then rm $logfile; fi + exit 1 +} + +args=$* + +logfile= +createlogfile=0 +errorexitcode=1 + +while [ $# -gt 0 ] ; do + arg=$1; shift + if [[ $arg =~ "--" ]] ; then + if [[ $arg =~ --log-file=(.*) ]] ; then + logfile=${BASH_REMATCH[1]} + elif [[ $arg =~ --error-exitcode=(.*) ]] ; then + errorexitcode=${BASH_REMATCH[1]} + fi + else + break + fi +done + +if [ "$logfile" = "" ] ; then + createlogfile=1 + trap cleanup SIGINT + logfile=`mktemp /tmp/$(whoami).tokugrind.XXXXXXXX` + args="--log-file=$logfile $args" +fi + +valgrind $args +exitcode=$? +if [ $exitcode = 0 ] ; then + lines=$(wc -l <$logfile) + if [ $lines -ne 0 ] ; then + exitcode=$errorexitcode + fi +fi + +if [ $createlogfile != 0 ] ; then + cat $logfile >>/dev/stderr + rm $logfile +fi + +exit $exitcode diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/errors.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/errors.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/errors.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/errors.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,6 +96,8 @@ The error handling routines for ydb */ +#include + #include #include #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/indexer.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/indexer.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/indexer.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/indexer.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,6 +92,8 @@ /* * The indexer */ +#include + #include #include #include @@ -99,13 +101,12 @@ #include "ydb-internal.h" #include #include "indexer.h" -#include #include #include #include -#include -#include -#include +#include +#include +#include #include #include "loader.h" #include @@ -118,7 +119,7 @@ static INDEXER_STATUS_S indexer_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(indexer_status, k, c, t, "indexer: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(indexer_status, k, c, t, "indexer: " l, inc) static void status_init(void) { @@ -233,32 +234,25 @@ // after grabbing the indexer lock bool toku_indexer_may_insert(DB_INDEXER* indexer, const DBT* key) { - bool retval = false; + bool may_insert = false; toku_mutex_lock(&indexer->i->indexer_estimate_lock); + // if we have no position estimate, we can't tell, so return false - if (indexer->i->position_estimate.data == NULL) { - retval = false; - } - else { - FT_HANDLE ft_handle = indexer->i->src_db->i->ft_handle; - ft_compare_func keycompare = toku_ft_get_bt_compare(ft_handle); - int r = keycompare( - indexer->i->src_db, - &indexer->i->position_estimate, - key - ); + if (indexer->i->position_estimate.data == nullptr) { + may_insert = false; + } else { + DB *db = indexer->i->src_db; + const toku::comparator &cmp = toku_ft_get_comparator(db->i->ft_handle); + int c = cmp(&indexer->i->position_estimate, key); + // if key > position_estimate, then we know the indexer cursor // is past key, and we can safely say that associated values of // key must be inserted into the indexer's db - if (r < 0) { - retval = true; - } - else { - retval = false; - } + may_insert = c < 0; } + toku_mutex_unlock(&indexer->i->indexer_estimate_lock); - return retval; + return may_insert; } void @@ -546,7 +540,7 @@ // cachetable pair locks. because no txn can commit on this db, read // the provisional info for the newly read ule. static int -le_cursor_callback(ITEMLEN keylen, bytevec key, ITEMLEN UU(vallen), bytevec val, void *extra, bool lock_only) { +le_cursor_callback(uint32_t keylen, const void *key, uint32_t UU(vallen), const void *val, void *extra, bool lock_only) { if (lock_only || val == NULL) { ; // do nothing if only locking. do nothing if val==NULL, means DB_NOTFOUND } else { @@ -696,7 +690,7 @@ } -// derived from ha_tokudb::estimate_num_rows +// derived from the handlerton's estimate_num_rows() static int update_estimated_rows(DB_INDEXER *indexer) { int error; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/indexer.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/indexer.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/indexer.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/indexer.h 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,9 +89,7 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" -#ifndef TOKU_INDEXER_H -#define TOKU_INDEXER_H - +#pragma once // locking and unlocking functions to synchronize cursor position with // XXX_multiple APIs @@ -178,6 +176,3 @@ } INDEXER_STATUS_S, *INDEXER_STATUS; void toku_indexer_get_status(INDEXER_STATUS s); - - -#endif // TOKU_INDEXER_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/indexer-internal.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/indexer-internal.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/indexer-internal.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/indexer-internal.h 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,10 +89,9 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" -#ifndef TOKU_INDEXER_INTERNAL_H -#define TOKU_INDEXER_INTERNAL_H +#pragma once -#include +#include #include // the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array. @@ -168,5 +167,3 @@ void indexer_undo_do_destroy(DB_INDEXER *indexer); int indexer_undo_do(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info *prov_info, DBT_ARRAY *hot_keys, DBT_ARRAY *hot_vals); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/indexer-undo-do.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/indexer-undo-do.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/indexer-undo-do.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/indexer-undo-do.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,6 +89,8 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" +#include + #include #include @@ -96,13 +98,12 @@ #include #include -#include #include #include #include -#include -#include -#include +#include +#include +#include #include "ydb-internal.h" #include "ydb_row_lock.h" @@ -199,7 +200,7 @@ ULEHANDLE ule = prov_info->ule; // init the xids to the root xid - XIDS xids = xids_get_root_xids(); + XIDS xids = toku_xids_get_root_xids(); // scan the committed stack from bottom to top uint32_t num_committed = ule_get_num_committed(ule); @@ -280,7 +281,7 @@ break; } - xids_destroy(&xids); + toku_xids_destroy(&xids); return result; } @@ -312,7 +313,7 @@ ULEHANDLE ule = prov_info->ule; // init the xids to the root xid - XIDS xids = xids_get_root_xids(); + XIDS xids = toku_xids_get_root_xids(); uint32_t num_provisional = prov_info->num_provisional; uint32_t num_committed = prov_info->num_committed; @@ -472,7 +473,7 @@ // then this will need to be handled below exit release_txns(ule, prov_states, prov_txns, indexer); exit: - xids_destroy(&xids); + toku_xids_destroy(&xids); return result; } @@ -496,16 +497,16 @@ indexer_set_xid(DB_INDEXER *UU(indexer), TXNID this_xid, XIDS *xids_result) { int result = 0; XIDS old_xids = *xids_result; - XIDS new_xids = xids_get_root_xids(); + XIDS new_xids = toku_xids_get_root_xids(); if (this_xid != TXNID_NONE) { XIDS child_xids; - result = xids_create_child(new_xids, &child_xids, this_xid); - xids_destroy(&new_xids); + result = toku_xids_create_child(new_xids, &child_xids, this_xid); + toku_xids_destroy(&new_xids); if (result == 0) new_xids = child_xids; } if (result == 0) { - xids_destroy(&old_xids); + toku_xids_destroy(&old_xids); *xids_result = new_xids; } @@ -517,9 +518,9 @@ indexer_append_xid(DB_INDEXER *UU(indexer), TXNID xid, XIDS *xids_result) { XIDS old_xids = *xids_result; XIDS new_xids; - int result = xids_create_child(old_xids, &new_xids, xid); + int result = toku_xids_create_child(old_xids, &new_xids, xid); if (result == 0) { - xids_destroy(&old_xids); + toku_xids_destroy(&old_xids); *xids_result = new_xids; } return result; @@ -682,7 +683,7 @@ static int indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids) { int result = 0; - if (xids_get_num_xids(xids) > 0) {// send commit only when not the root xid + if (toku_xids_get_num_xids(xids) > 0) {// send commit only when not the root xid // TEST if (indexer->i->test_commit_any) { result = indexer->i->test_commit_any(indexer, hotdb, hotkey, xids); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/loader.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/loader.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/loader.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/loader.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,14 +93,16 @@ * The loader */ +#include + #include #include #include #include #include -#include -#include +#include +#include #include "ydb-internal.h" #include "ydb_db.h" @@ -119,7 +121,7 @@ static LOADER_STATUS_S loader_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(loader_status, k, c, t, "loader: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(loader_status, k, c, t, "loader: " l, inc) static void status_init(void) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/loader.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/loader.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/loader.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/loader.h 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,7 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" -#ifndef TOKU_LOADER_H -#define TOKU_LOADER_H +#pragma once /* Create and set up a loader. @@ -208,6 +207,3 @@ void toku_loader_get_status(LOADER_STATUS s); - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/big-nested-abort-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/big-nested-abort-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/big-nested-abort-abort.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/big-nested-abort-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/big-nested-abort-commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/big-nested-abort-commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/big-nested-abort-commit.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/big-nested-abort-commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/big-nested-commit-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/big-nested-commit-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/big-nested-commit-abort.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/big-nested-commit-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/big-nested-commit-commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/big-nested-commit-commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/big-nested-commit-commit.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/big-nested-commit-commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/big-shutdown.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/big-shutdown.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/big-shutdown.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/big-shutdown.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,189 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// Create a lot of dirty nodes, kick off a checkpoint, and close the environment. +// Measure the time it takes to close the environment since we are speeding up that +// function. + +#include "test.h" +#include + +// Insert max_rows key/val pairs into the db +static void do_inserts(DB_ENV *env, DB *db, uint64_t max_rows, size_t val_size) { + char val_data[val_size]; memset(val_data, 0, val_size); + int r; + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + for (uint64_t i = 1; i <= max_rows; i++) { + // pick a sequential key but it does not matter for this test. + uint64_t k[2] = { + htonl(i), random64(), + }; + DBT key = { .data = k, .size = sizeof k }; + DBT val = { .data = val_data, .size = (uint32_t) val_size }; + r = db->put(db, txn, &key, &val, 0); + CKERR(r); + + if ((i % 1000) == 0) { + if (verbose) + fprintf(stderr, "put %" PRIu64 "\n", i); + r = txn->commit(txn, 0); + CKERR(r); + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + } + } + + r = txn->commit(txn, 0); + CKERR(r); +} + +// Create a cache with a lot of dirty nodes, kick off a checkpoint, and measure the time to +// close the environment. +static void big_shutdown(void) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->set_cachesize(env, 8, 0, 1); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB *db = nullptr; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + do_inserts(env, db, 1000000, 1024); + + // kick the checkpoint thread + if (verbose) + fprintf(stderr, "env->checkpointing_set_period\n"); + r = env->checkpointing_set_period(env, 2); + CKERR(r); + sleep(3); + + if (verbose) + fprintf(stderr, "db->close\n"); + r = db->close(db, 0); + CKERR(r); + + // measure the shutdown time + uint64_t tstart = toku_current_time_microsec(); + if (verbose) + fprintf(stderr, "env->close\n"); + r = env->close(env, 0); + CKERR(r); + uint64_t tend = toku_current_time_microsec(); + if (verbose) + fprintf(stderr, "env->close complete %" PRIu64 " sec\n", (tend - tstart)/1000000); +} + +int test_main (int argc, char *const argv[]) { + default_parse_args(argc, argv); + + // init the env directory + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + // run the test + big_shutdown(); + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/bigtxn27.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/bigtxn27.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/bigtxn27.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/bigtxn27.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blackhole.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blackhole.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blackhole.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blackhole.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ // Test that a db ignores insert messages in blackhole mode #include "test.h" -#include +#include static DB *db; static DB *blackhole_db; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-first.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-first.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-first.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-first.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-first-empty.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-first-empty.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-first-empty.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-first-empty.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-last.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-last.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-last.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-last.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-next-prev.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-next-prev.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-next-prev.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-next-prev.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-next-prev-deadlock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-next-prev-deadlock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-next-prev-deadlock.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-next-prev-deadlock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-prelock-range.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-prelock-range.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-prelock-range.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-prelock-range.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-put.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-put.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-put.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-put.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-put-timeout.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-put-timeout.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-put-timeout.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-put-timeout.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-put-wakeup.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-put-wakeup.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-put-wakeup.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-put-wakeup.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-set.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-set.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-set.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-set.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-set-range-0.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-set-range-0.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-set-range-0.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-set-range-0.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-set-range-n.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-set-range-n.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-set-range-n.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-set-range-n.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-set-range-reverse-0.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-set-range-reverse-0.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-set-range-reverse-0.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-set-range-reverse-0.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-table-lock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-table-lock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/blocking-table-lock.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/blocking-table-lock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/bug1381.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/bug1381.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/bug1381.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/bug1381.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cachetable-race.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cachetable-race.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cachetable-race.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cachetable-race.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/checkpoint1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/checkpoint1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/checkpoint1.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/checkpoint1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/checkpoint_fairness.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/checkpoint_fairness.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/checkpoint_fairness.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/checkpoint_fairness.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/checkpoint_stress.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/checkpoint_stress.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/checkpoint_stress.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/checkpoint_stress.cc 2014-10-08 13:19:52.000000000 +0000 @@ -27,7 +27,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/checkpoint_test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/checkpoint_test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/checkpoint_test.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/checkpoint_test.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,13 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." #ident "$Id$" -#ifndef CHECKPOINT_TEST_H -#define CHECKPOINT_TEST_H - - DB_ENV *env; enum {MAX_NAME=128}; @@ -537,6 +535,3 @@ db_startup(d, NULL); } } - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/CMakeLists.txt mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/CMakeLists.txt 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/CMakeLists.txt 2014-10-08 13:19:52.000000000 +0000 @@ -50,7 +50,7 @@ ## #5138 only reproduces when using the static library. list(REMOVE_ITEM tdb_bins test-5138.tdb) add_executable(test-5138.tdb test-5138.cc) - target_link_libraries(test-5138.tdb ${LIBTOKUDB}_static ${ZLIB_LIBRARY} ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) + target_link_libraries(test-5138.tdb ${LIBTOKUDB}_static z ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) add_space_separated_property(TARGET test-5138.tdb COMPILE_FLAGS -fvisibility=hidden) add_ydb_test(test-5138.tdb) @@ -160,10 +160,7 @@ endforeach(av) endforeach(ov) - if (NOT (CMAKE_SYSTEM_NAME MATCHES Darwin OR - (CMAKE_CXX_COMPILER_ID STREQUAL Intel AND - CMAKE_BUILD_TYPE STREQUAL Release) - OR USE_GCOV)) + if (NOT (CMAKE_SYSTEM_NAME MATCHES Darwin OR USE_GCOV)) declare_custom_tests(helgrind1.tdb) add_test(NAME ydb/helgrind_helgrind1.tdb COMMAND valgrind --quiet --tool=helgrind --error-exitcode=1 --log-file=helgrind1.tdb.deleteme $) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/create-datadir.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/create-datadir.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/create-datadir.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/create-datadir.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cursor-isolation.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cursor-isolation.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cursor-isolation.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cursor-isolation.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cursor-more-than-a-leaf-provdel.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cursor-more-than-a-leaf-provdel.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cursor-more-than-a-leaf-provdel.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cursor-more-than-a-leaf-provdel.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cursor-set-del-rmw.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cursor-set-del-rmw.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cursor-set-del-rmw.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cursor-set-del-rmw.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cursor-set-range-rmw.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cursor-set-range-rmw.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cursor-set-range-rmw.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cursor-set-range-rmw.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cursor-step-over-delete.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cursor-step-over-delete.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/cursor-step-over-delete.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/cursor-step-over-delete.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock-threads.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock-threads.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock-threads.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock-threads.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/db-put-simple-lockwait.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/db-put-simple-lockwait.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/db-put-simple-lockwait.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/db-put-simple-lockwait.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/db-put-update-deadlock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/db-put-update-deadlock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/db-put-update-deadlock.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/db-put-update-deadlock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/dbremove-nofile-limit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/dbremove-nofile-limit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/dbremove-nofile-limit.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/dbremove-nofile-limit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/del-multiple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/del-multiple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/del-multiple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/del-multiple.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/del-multiple-huge-primary-row.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/del-multiple-huge-primary-row.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/del-multiple-huge-primary-row.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/del-multiple-huge-primary-row.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/del-multiple-srcdb.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/del-multiple-srcdb.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/del-multiple-srcdb.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/del-multiple-srcdb.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/del-simple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/del-simple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/del-simple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/del-simple.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/directory_lock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/directory_lock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/directory_lock.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/directory_lock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/diskfull.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/diskfull.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/diskfull.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/diskfull.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/dump-env.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/dump-env.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/dump-env.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/dump-env.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,6 @@ static DB *db; DB_TXN *txn; -const int num_insert = 25000; static void setup (void) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/env_loader_memory.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/env_loader_memory.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/env_loader_memory.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/env_loader_memory.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/env_nproc.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/env_nproc.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/env_nproc.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/env_nproc.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/env-put-multiple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/env-put-multiple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/env-put-multiple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/env-put-multiple.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/env_startup.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/env_startup.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/env_startup.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/env_startup.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/filesize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/filesize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/filesize.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/filesize.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/get_key_after_bytes_unit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/get_key_after_bytes_unit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/get_key_after_bytes_unit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/get_key_after_bytes_unit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/get_last_key.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/get_last_key.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/get_last_key.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/get_last_key.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/helgrind1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/helgrind1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/helgrind1.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/helgrind1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/helgrind2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/helgrind2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/helgrind2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/helgrind2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/helgrind3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/helgrind3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/helgrind3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/helgrind3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-bw.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-bw.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-bw.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-bw.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -103,7 +103,6 @@ static const int FORWARD = 0; static const int BACKWARD = 1; typedef int Direction; -static const int TXN_NONE = 0; static const int TXN_CREATE = 1; static const int TXN_END = 2; typedef int TxnWork; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-error-callback.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-error-callback.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-error-callback.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-error-callback.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed-optimized.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed-optimized.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed-optimized.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed-optimized.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-insert-provisional.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-insert-provisional.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-insert-provisional.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-insert-provisional.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-lock-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-lock-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-lock-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-lock-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-multiclient.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-multiclient.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-multiclient.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-multiclient.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-nested-insert-committed.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-nested-insert-committed.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-nested-insert-committed.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-nested-insert-committed.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-put-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-put-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-put-abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-put-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-put-commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-put-commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-put-commit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-put-commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-put-multiple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-put-multiple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-put-multiple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-put-multiple.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort-put.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort-put.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort-put.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort-put.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-undo-do-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-undo-do-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-undo-do-test.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-undo-do-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,13 +95,10 @@ #include "test.h" -#include -#include -#include #include #include #include -#include +#include #include "indexer-internal.h" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-with-queries.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-with-queries.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hotindexer-with-queries.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hotindexer-with-queries.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hot-optimize-table-tests.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hot-optimize-table-tests.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/hot-optimize-table-tests.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/hot-optimize-table-tests.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -103,7 +103,7 @@ DB_ENV* env; unsigned int leaf_hits; -// Custom Update Function for our test BRT. +// Custom Update Function for our test FT. static int update_func(DB* UU(db), const DBT* key, @@ -148,7 +148,7 @@ // Remove any previous environment. toku_os_recursive_delete(TOKU_TEST_FILENAME); - // Set up a new TokuDB. + // Set up a new environment. { int chk_r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(chk_r); } { int chk_r = db_env_create(&env, 0); CKERR(chk_r); } env->set_errfile(env, stderr); @@ -266,7 +266,7 @@ default_parse_args(argc, argv); hot_test_setup(); - // Create and Open the Database/BRT + // Create and Open the Database/FT DB *db = NULL; const unsigned int BIG = 4000000; const unsigned int SMALL = 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/inflate2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/inflate2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/inflate2.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/inflate2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/inflate.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/inflate.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/inflate.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/inflate.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/insert-dup-prelock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/insert-dup-prelock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/insert-dup-prelock.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/insert-dup-prelock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/isolation.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/isolation.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/isolation.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/isolation.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/isolation-read-committed.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/isolation-read-committed.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/isolation-read-committed.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/isolation-read-committed.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/keyrange.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/keyrange.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/keyrange.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/keyrange.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/keyrange-merge.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/keyrange-merge.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/keyrange-merge.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/keyrange-merge.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/key-val.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/key-val.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/key-val.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/key-val.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,10 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." -#ifndef KEY_VAL_H -#define KEY_VAL_H // // Functions to create unique key/value pairs, row generators, checkers, ... for each of NUM_DBS // @@ -295,8 +295,3 @@ return r; } - - - - -#endif // KEY_VAL_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/last-verify-time.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/last-verify-time.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/last-verify-time.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/last-verify-time.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-cleanup-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-cleanup-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-cleanup-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-cleanup-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-close-nproc-limit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-close-nproc-limit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-close-nproc-limit.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-close-nproc-limit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-create-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-create-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-create-abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-create-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-create-close.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-create-close.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-create-close.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-create-close.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-create-commit-nproc-limit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-create-commit-nproc-limit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-create-commit-nproc-limit.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-create-commit-nproc-limit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-create-nproc-limit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-create-nproc-limit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-create-nproc-limit.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-create-nproc-limit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-dup-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-dup-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-dup-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-dup-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-no-puts.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-no-puts.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-no-puts.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-no-puts.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-reference-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-reference-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-reference-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-reference-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-stress-del.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-stress-del.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-stress-del.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-stress-del.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-stress-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-stress-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-stress-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-stress-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-tpch-load.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-tpch-load.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/loader-tpch-load.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/loader-tpch-load.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/locktree_escalation_stalls.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/locktree_escalation_stalls.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/locktree_escalation_stalls.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/locktree_escalation_stalls.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/manyfiles.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/manyfiles.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/manyfiles.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/manyfiles.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/maxsize-for-loader.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/maxsize-for-loader.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/maxsize-for-loader.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/maxsize-for-loader.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/medium-nested-commit-commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/medium-nested-commit-commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/medium-nested-commit-commit.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/medium-nested-commit-commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/multiprocess.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/multiprocess.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/multiprocess.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/multiprocess.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/mvcc-create-table.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/mvcc-create-table.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/mvcc-create-table.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/mvcc-create-table.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/mvcc-many-committed.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/mvcc-many-committed.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/mvcc-many-committed.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/mvcc-many-committed.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/mvcc-read-committed.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/mvcc-read-committed.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/mvcc-read-committed.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/mvcc-read-committed.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/openlimit17.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/openlimit17.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/openlimit17.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/openlimit17.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/openlimit17-locktree.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/openlimit17-locktree.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/openlimit17-locktree.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/openlimit17-locktree.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/openlimit17-metafiles.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/openlimit17-metafiles.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/openlimit17-metafiles.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/openlimit17-metafiles.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_checkpoint_var.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_checkpoint_var.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_checkpoint_var.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_checkpoint_var.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_child_txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_child_txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_child_txn.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_child_txn.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_cursor_nop.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_cursor_nop.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_cursor_nop.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_cursor_nop.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_iibench.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_iibench.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_iibench.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_iibench.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_insert.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_insert.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_insert.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_insert.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_malloc_free.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_malloc_free.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_malloc_free.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_malloc_free.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_nop.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_nop.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_nop.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_nop.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_ptquery2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_ptquery2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_ptquery2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_ptquery2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_ptquery.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_ptquery.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_ptquery.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_ptquery.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_rangequery.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_rangequery.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_rangequery.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_rangequery.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_read_txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_read_txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_read_txn.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_read_txn.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_read_txn_single_thread.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_read_txn_single_thread.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_read_txn_single_thread.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_read_txn_single_thread.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_read_write.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_read_write.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_read_write.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_read_write.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_txn_single_thread.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_txn_single_thread.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/perf_txn_single_thread.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/perf_txn_single_thread.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/powerfail.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/powerfail.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/powerfail.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/powerfail.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/preload-db.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/preload-db.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/preload-db.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/preload-db.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/preload-db-nested.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/preload-db-nested.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/preload-db-nested.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/preload-db-nested.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/prelock-read-read.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/prelock-read-read.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/prelock-read-read.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/prelock-read-read.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/prelock-read-write.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/prelock-read-write.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/prelock-read-write.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/prelock-read-write.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/prelock-write-read.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/prelock-write-read.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/prelock-write-read.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/prelock-write-read.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/prelock-write-write.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/prelock-write-write.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/prelock-write-write.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/prelock-write-write.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/print_engine_status.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/print_engine_status.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/print_engine_status.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/print_engine_status.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/progress.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/progress.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/progress.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/progress.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/put-del-multiple-array-indexing.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/put-del-multiple-array-indexing.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/put-del-multiple-array-indexing.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/put-del-multiple-array-indexing.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/queries_with_deletes.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/queries_with_deletes.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/queries_with_deletes.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/queries_with_deletes.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-2483.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-2483.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-2483.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-2483.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,8 +94,6 @@ #include "test.h" -const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE; - DB_ENV *env; DB_TXN *tid; DB *db; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-3113.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-3113.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-3113.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-3113.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-5146.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-5146.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-5146.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-5146.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-commit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-child-rollback.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-child-rollback.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-child-rollback.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-child-rollback.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-compare-db.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-compare-db.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-compare-db.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-compare-db.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-compare-db-descriptor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-compare-db-descriptor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-compare-db-descriptor.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-compare-db-descriptor.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-delboth-after-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-delboth-after-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-delboth-after-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-delboth-after-checkpoint.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-delboth-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-delboth-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-delboth-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-delboth-checkpoint.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-del-multiple-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-del-multiple-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-del-multiple-abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-del-multiple-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-del-multiple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-del-multiple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-del-multiple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-del-multiple.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-del-multiple-srcdb-fdelete-all.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-del-multiple-srcdb-fdelete-all.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-del-multiple-srcdb-fdelete-all.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-del-multiple-srcdb-fdelete-all.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor10.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor10.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor10.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor10.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor11.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor11.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor11.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor11.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor12.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor12.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor12.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor12.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor4.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor4.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor5.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor5.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor5.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor5.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor6.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor6.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor6.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor6.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor7.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor7.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor7.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor7.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor8.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor8.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor8.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor8.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor9.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor9.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor9.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor9.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-descriptor.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-descriptor.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fassociate.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fassociate.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fassociate.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fassociate.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fclose-in-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fclose-in-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fclose-in-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fclose-in-checkpoint.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fcreate-basementnodesize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fcreate-basementnodesize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fcreate-basementnodesize.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fcreate-basementnodesize.cc 2014-10-08 13:19:52.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fcreate-fclose.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fcreate-fclose.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fcreate-fclose.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fcreate-fclose.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fcreate-fdelete.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fcreate-fdelete.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fcreate-fdelete.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fcreate-fdelete.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fcreate-nodesize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fcreate-nodesize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fcreate-nodesize.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fcreate-nodesize.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fcreate-xabort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fcreate-xabort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fcreate-xabort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fcreate-xabort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt10.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt10.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt10.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt10.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt1.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt4.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt4.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt5.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt5.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt5.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt5.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt6.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt6.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt6.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt6.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt7.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt7.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt7.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt7.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt8.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt8.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt8.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt8.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt9.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt9.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-flt9.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-flt9.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fopen-checkpoint-fclose.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fopen-checkpoint-fclose.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fopen-checkpoint-fclose.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fopen-checkpoint-fclose.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fopen-fclose-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fopen-fclose-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fopen-fclose-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fopen-fclose-checkpoint.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-hotindexer-simple-abort-put.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-hotindexer-simple-abort-put.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-hotindexer-simple-abort-put.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-hotindexer-simple-abort-put.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-loader-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-loader-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-loader-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-loader-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-lsn-filter.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-lsn-filter.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-lsn-filter.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-lsn-filter.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-lsn-filter-multiple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-lsn-filter-multiple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-lsn-filter-multiple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-lsn-filter-multiple.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-missing-dbfile-2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-missing-dbfile-2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-missing-dbfile-2.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-missing-dbfile-2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-missing-dbfile.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-missing-dbfile.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-missing-dbfile.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-missing-dbfile.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-missing-logfile.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-missing-logfile.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-missing-logfile.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-missing-logfile.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-put-multiple-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-put-multiple-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-put-multiple-abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-put-multiple-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-put-multiple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-put-multiple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-put-multiple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-put-multiple.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-all.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-all.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-all.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-all.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-some.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-some.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-some.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-some.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-put-multiple-srcdb-fdelete-all.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-put-multiple-srcdb-fdelete-all.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-put-multiple-srcdb-fdelete-all.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-put-multiple-srcdb-fdelete-all.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-rollback.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-rollback.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-rollback.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-rollback.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,262 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// Test dirty upgrade. +// Generate a rollback log that requires recovery. + +#include "test.h" + +// Insert max_rows key/val pairs into the db +static void do_inserts(DB_TXN *txn, DB *db, uint64_t max_rows, size_t val_size) { + char val_data[val_size]; memset(val_data, 0, val_size); + int r; + + for (uint64_t i = 0; i < max_rows; i++) { + // pick a sequential key but it does not matter for this test. + uint64_t k[2] = { + htonl(i), random64(), + }; + + DBT key = { .data = k, .size = sizeof k }; + DBT val = { .data = val_data, .size = (uint32_t) val_size }; + r = db->put(db, txn, &key, &val, 0); + CKERR(r); + } +} + +static void run_test(uint64_t num_rows, size_t val_size, bool do_crash) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->set_cachesize(env, 8, 0, 1); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB *db = nullptr; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + r = env->txn_checkpoint(env, 0, 0, 0); + CKERR(r); + + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + do_inserts(txn, db, num_rows, val_size); + + r = env->txn_checkpoint(env, 0, 0, 0); + CKERR(r); + + r = txn->commit(txn, 0); + CKERR(r); + + if (do_crash) + assert(0); // crash on purpose + + r = db->close(db, 0); + CKERR(r); + + r = env->close(env, 0); + CKERR(r); +} + +static void do_verify(DB_ENV *env, DB *db, uint64_t num_rows, size_t val_size UU()) { + int r; + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + DBC *c = nullptr; + r = db->cursor(db, txn, &c, 0); + CKERR(r); + + uint64_t i = 0; + while (1) { + DBT key = {}; + DBT val = {}; + r = c->c_get(c, &key, &val, DB_NEXT); + if (r == DB_NOTFOUND) + break; + CKERR(r); + assert(key.size == 16); + uint64_t k[2]; + memcpy(k, key.data, key.size); + assert(htonl(k[0]) == i); + assert(val.size == val_size); + i++; + } + assert(i == num_rows); + + r = c->c_close(c); + CKERR(r); + + r = txn->commit(txn, 0); + CKERR(r); +} + +static void run_recover(uint64_t num_rows, size_t val_size) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->set_cachesize(env, 8, 0, 1); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB *db = nullptr; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + do_verify(env, db, num_rows, val_size); + + r = db->close(db, 0); + CKERR(r); + + r = env->close(env, 0); + CKERR(r); +} + +int test_main (int argc, char *const argv[]) { + bool do_test = false; + bool do_recover = false; + bool do_crash = true; + uint64_t num_rows = 1; + size_t val_size = 1; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "-q") == 0) { + if (verbose > 0) verbose--; + continue; + } + if (strcmp(argv[i], "--test") == 0) { + do_test = true; + continue; + } + if (strcmp(argv[i], "--recover") == 0) { + do_recover = true; + continue; + } + if (strcmp(argv[i], "--crash") == 0 && i+1 < argc) { + do_crash = atoi(argv[++i]); + continue; + } + } + if (do_test) { + // init the env directory + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + run_test(num_rows, val_size, do_crash); + } + if (do_recover) { + run_recover(num_rows, val_size); + } + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-rollinclude.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-rollinclude.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-rollinclude.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-rollinclude.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,274 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// Create a rollback log with a rollinclude log entry, crash after the txn commits and before the last checkpoint. +// Recovery crashes 7.1.0, should succeed. + +#include "test.h" + +// Insert max_rows key/val pairs into the db + +// We want to force a rollinclude so we use a child transaction and insert enough rows so that it spills. +// It spills at about 144K and 289K rows. +static void do_inserts(DB_ENV *env, DB *db, uint64_t max_rows, size_t val_size) { + char val_data[val_size]; memset(val_data, 0, val_size); + int r; + DB_TXN *parent = nullptr; + r = env->txn_begin(env, nullptr, &parent, 0); + CKERR(r); + + DB_TXN *child = nullptr; + r = env->txn_begin(env, parent, &child, 0); + CKERR(r); + + for (uint64_t i = 0; i < max_rows; i++) { + // pick a sequential key but it does not matter for this test. + uint64_t k[2] = { + htonl(i), random64(), + }; + + DBT key = { .data = k, .size = sizeof k }; + DBT val = { .data = val_data, .size = (uint32_t) val_size }; + r = db->put(db, child, &key, &val, 0); + CKERR(r); + + if (i == max_rows-1) { + r = child->commit(child, 0); + CKERR(r); + + r = env->txn_checkpoint(env, 0, 0, 0); + CKERR(r); + } + } + + r = parent->commit(parent, 0); + CKERR(r); +} + +static void run_test(uint64_t num_rows, size_t val_size, bool do_crash) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->set_cachesize(env, 8, 0, 1); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB *db = nullptr; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + r = env->txn_checkpoint(env, 0, 0, 0); + CKERR(r); + + do_inserts(env, db, num_rows, val_size); + + if (do_crash) + assert(0); // crash on purpose + + r = db->close(db, 0); + CKERR(r); + + r = env->close(env, 0); + CKERR(r); +} + +static void do_verify(DB_ENV *env, DB *db, uint64_t num_rows, size_t val_size UU()) { + int r; + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + DBC *c = nullptr; + r = db->cursor(db, txn, &c, 0); + CKERR(r); + + uint64_t i = 0; + while (1) { + DBT key = {}; + DBT val = {}; + r = c->c_get(c, &key, &val, DB_NEXT); + if (r == DB_NOTFOUND) + break; + CKERR(r); + assert(key.size == 16); + uint64_t k[2]; + memcpy(k, key.data, key.size); + assert(htonl(k[0]) == i); + assert(val.size == val_size); + i++; + } + assert(i == num_rows); + + r = c->c_close(c); + CKERR(r); + + r = txn->commit(txn, 0); + CKERR(r); +} + +static void run_recover(uint64_t num_rows, size_t val_size) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->set_cachesize(env, 8, 0, 1); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB *db = nullptr; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + do_verify(env, db, num_rows, val_size); + + r = db->close(db, 0); + CKERR(r); + + r = env->close(env, 0); + CKERR(r); +} + +int test_main (int argc, char *const argv[]) { + bool do_test = false; + bool do_recover = false; + bool do_crash = true; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "-q") == 0) { + if (verbose > 0) verbose--; + continue; + } + if (strcmp(argv[i], "--test") == 0) { + do_test = true; + continue; + } + if (strcmp(argv[i], "--recover") == 0) { + do_recover = true; + continue; + } + if (strcmp(argv[i], "--crash") == 0 && i+1 < argc) { + do_crash = atoi(argv[++i]); + continue; + } + } + + uint64_t num_rows = 300000; + size_t val_size = 1; + + if (do_test) { + // init the env directory + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + run_test(num_rows, val_size, do_crash); + } + if (do_recover) { + run_recover(num_rows, val_size); + } + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-split-checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-split-checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-split-checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-split-checkpoint.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-straddle-txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-straddle-txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-straddle-txn.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-straddle-txn.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-straddle-txn-nested.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-straddle-txn-nested.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-straddle-txn-nested.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-straddle-txn-nested.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-tablelock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-tablelock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-tablelock.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-tablelock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test1.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,7 +94,6 @@ #include "test.h" -const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE; const char *namea="a.db"; DB_ENV *env; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,7 +94,6 @@ #include "test.h" -const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE; const char *namea="a.db"; DB_ENV *env; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test_crash_in_flusher_thread.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test_crash_in_flusher_thread.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test_crash_in_flusher_thread.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test_crash_in_flusher_thread.h 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -85,8 +85,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "$Id$" + #include "test.h" #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test-logsuppress.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test-logsuppress.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test-logsuppress.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test-logsuppress.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test-logsuppress-put.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test-logsuppress-put.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test-logsuppress-put.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test-logsuppress-put.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test_stress1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test_stress1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test_stress1.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test_stress1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test_stress2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test_stress2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test_stress2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test_stress2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test_stress3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test_stress3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test_stress3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test_stress3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test_stress_openclose.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test_stress_openclose.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-test_stress_openclose.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-test_stress_openclose.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_checkpoint.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_close.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_close.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_close.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_close.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_aborts.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_aborts.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_aborts.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_aborts.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_close.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_close.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_close.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_close.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_close.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_close.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_close.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_close.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_checkpoint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_checkpoint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_checkpoint.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_checkpoint.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_close.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_close.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_close.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_close.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_changes_values.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_changes_values.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update_changes_values.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update_changes_values.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update-multiple-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update-multiple-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update-multiple-abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update-multiple-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update-multiple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update-multiple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-update-multiple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-update-multiple.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor-multihandle.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor-multihandle.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor-multihandle.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor-multihandle.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x1-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x1-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x1-abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x1-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x1-commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x1-commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x1-commit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x1-commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x1-nested-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x1-nested-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x1-nested-abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x1-nested-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x1-nested-commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x1-nested-commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x1-nested-commit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x1-nested-commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x2-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x2-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x2-abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x2-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x2-commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x2-commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recover-x2-commit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recover-x2-commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recovery_fileops_stress.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recovery_fileops_stress.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recovery_fileops_stress.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recovery_fileops_stress.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -104,7 +104,6 @@ DB* states; static const int percent_do_op = 20; static const int percent_do_abort = 25; -static const int commit_abort_ratio = 3; static const int start_crashing_iter = 10; // iterations_per_crash_in_recovery should be an odd number; static const int iterations_per_crash_in_recovery = 7; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recovery_fileops_unit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recovery_fileops_unit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recovery_fileops_unit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recovery_fileops_unit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recovery_stress.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recovery_stress.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/recovery_stress.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/recovery_stress.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/redirect.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/redirect.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/redirect.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/redirect.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/replace-into-write-lock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/replace-into-write-lock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/replace-into-write-lock.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/replace-into-write-lock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/root_fifo_1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/root_fifo_1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/root_fifo_1.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/root_fifo_1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/root_fifo_2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/root_fifo_2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/root_fifo_2.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/root_fifo_2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/root_fifo_31.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/root_fifo_31.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/root_fifo_31.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/root_fifo_31.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/root_fifo_32.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/root_fifo_32.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/root_fifo_32.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/root_fifo_32.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/root_fifo_41.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/root_fifo_41.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/root_fifo_41.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/root_fifo_41.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/rowsize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/rowsize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/rowsize.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/rowsize.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/run_test1426.sh mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/run_test1426.sh --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/run_test1426.sh 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/run_test1426.sh 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -set -e - -test $# -ge 4 - -tdbbin=$1; shift -bdbbin=$1; shift -tdbenv=$1; shift -bdbenv=$1; shift -tdbdump=$1; shift -bdbdump=$1; shift - -TOKU_TEST_FILENAME=$bdbenv $bdbbin -$bdbdump -p -h $bdbenv main > dump.bdb.1426 - -TOKU_TEST_FILENAME=$tdbenv $tdbbin -$tdbdump -x -p -h $tdbenv main > dump.tdb.1426 -diff -I db_pagesize=4096 dump.bdb.1426 dump.tdb.1426 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/seqinsert.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/seqinsert.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/seqinsert.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/seqinsert.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/shutdown-3344.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/shutdown-3344.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/shutdown-3344.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/shutdown-3344.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/simple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/simple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/simple.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/simple.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stat64.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stat64.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stat64.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stat64.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stat64-create-modify-times.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stat64-create-modify-times.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stat64-create-modify-times.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stat64-create-modify-times.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stat64-null-txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stat64-null-txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stat64-null-txn.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stat64-null-txn.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stat64-root-changes.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stat64-root-changes.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stat64-root-changes.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stat64-root-changes.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stress-gc2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stress-gc2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stress-gc2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stress-gc2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stress-gc.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stress-gc.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stress-gc.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stress-gc.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stress_openclose.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stress_openclose.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stress_openclose.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stress_openclose.h 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -85,6 +85,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "$Id$" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stress-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stress-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/stress-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/stress-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test1572.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test1572.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test1572.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test1572.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ /* Is it feasible to run 4 billion transactions in one test in the regression tests? */ #include #include -#include +#include #include static void diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test1753.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test1753.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test1753.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test1753.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test1842.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test1842.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test1842.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test1842.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test3039.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test3039.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test3039.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test3039.cc 2014-10-08 13:19:52.000000000 +0000 @@ -36,7 +36,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test3219.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test3219.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test3219.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test3219.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test3522b.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test3522b.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test3522b.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test3522b.cc 2014-10-08 13:19:52.000000000 +0000 @@ -36,7 +36,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test3522.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test3522.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test3522.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test3522.cc 2014-10-08 13:19:52.000000000 +0000 @@ -35,7 +35,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test3529.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test3529.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test3529.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test3529.cc 2014-10-08 13:19:52.000000000 +0000 @@ -40,7 +40,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_3529_insert_2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_3529_insert_2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_3529_insert_2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_3529_insert_2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_3529_table_lock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_3529_table_lock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_3529_table_lock.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_3529_table_lock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_3645.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_3645.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_3645.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_3645.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_3755.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_3755.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_3755.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_3755.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_4015.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_4015.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_4015.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_4015.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_4368.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_4368.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_4368.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_4368.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test4573-logtrim.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test4573-logtrim.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test4573-logtrim.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test4573-logtrim.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_4657.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_4657.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_4657.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_4657.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_5015.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_5015.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_5015.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_5015.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test5092.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test5092.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test5092.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test5092.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-5138.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-5138.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-5138.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-5138.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_5469.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_5469.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_5469.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_5469.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_789.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_789.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_789.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_789.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_935.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_935.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_935.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_935.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test938b.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test938b.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test938b.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test938b.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test938.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test938.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test938.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test938.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test938c.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test938c.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test938c.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test938c.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -106,7 +106,7 @@ // add (1,101) to the tree // In another concurrent txn // look up (1,102) and do DB_NEXT - // That should be fine in TokuDB. + // That should be fine in TokuFT. // It fails before #938 is fixed. // It also fails for BDB for other reasons (page-level locking vs. row-level locking) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort1.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -/* Simple test of logging. Can I start a TokuDB with logging enabled? */ +/* Simple test of logging. Can I start TokuFT with logging enabled? */ #include #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort2.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort3.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort4.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort4.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort5.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort5.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort5.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort5.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort_delete_first.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort_delete_first.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_abort_delete_first.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_abort_delete_first.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_archive0.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_archive0.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_archive0.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_archive0.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_archive1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_archive1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_archive1.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_archive1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_archive2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_archive2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_archive2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_archive2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_bad_implicit_promotion.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_bad_implicit_promotion.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_bad_implicit_promotion.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_bad_implicit_promotion.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2014 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_blobs_leaf_split.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_blobs_leaf_split.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_blobs_leaf_split.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_blobs_leaf_split.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_bulk_fetch.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_bulk_fetch.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_bulk_fetch.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_bulk_fetch.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cachesize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cachesize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cachesize.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cachesize.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cmp_descriptor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cmp_descriptor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cmp_descriptor.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cmp_descriptor.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_compression_methods.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_compression_methods.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_compression_methods.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_compression_methods.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_db_current.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_db_current.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_db_current.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_db_current.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_DB_NEXT_no_dup.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_DB_NEXT_no_dup.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_DB_NEXT_no_dup.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_DB_NEXT_no_dup.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_delete2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_delete2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_delete2.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_delete2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_flags.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_flags.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_flags.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_flags.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_nonleaf_expand.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_nonleaf_expand.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_nonleaf_expand.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_nonleaf_expand.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_null.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_null.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_null.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_null.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_stickyness.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_stickyness.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_stickyness.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_stickyness.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_with_read_txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_with_read_txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_cursor_with_read_txn.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_cursor_with_read_txn.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_already_exists.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_already_exists.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_already_exists.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_already_exists.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_change_pagesize.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_change_pagesize.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_change_pagesize.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_change_pagesize.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_change_xxx.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_change_xxx.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_change_xxx.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_change_xxx.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_close_no_open.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_close_no_open.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_close_no_open.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_close_no_open.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_current_clobbers_db.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_current_clobbers_db.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_current_clobbers_db.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_current_clobbers_db.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_dbt_mem_behavior.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_dbt_mem_behavior.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_dbt_mem_behavior.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_dbt_mem_behavior.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_delete.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_delete.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_delete.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_delete.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_descriptor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_descriptor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_descriptor.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_descriptor.cc 2014-10-08 13:19:52.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_open_close.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_open_close.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_open_close.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_open_close.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_open_nocreate.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_open_nocreate.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_open_nocreate.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_open_nocreate.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -132,7 +132,7 @@ r = db_env_create(&dbenv, 0); CKERR(r); r = dbenv->open(dbenv, TOKU_TEST_FILENAME, private_flags|DB_INIT_MPOOL, 0); - // TokuDB has no trouble opening an environment if the directory exists. + // TokuFT has no trouble opening an environment if the directory exists. CKERR(r); assert(r==0); dbenv->close(dbenv,0); // free memory diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_open_open_close.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_open_open_close.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_open_open_close.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_open_open_close.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_set_errpfx.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_set_errpfx.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_set_errpfx.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_set_errpfx.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_set_lg_dir.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_set_lg_dir.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_set_lg_dir.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_set_lg_dir.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_set_tmp_dir.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_set_tmp_dir.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_set_tmp_dir.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_set_tmp_dir.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_strdup_null.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_strdup_null.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_env_strdup_null.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_env_strdup_null.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_get_put_flags.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_get_put_flags.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_get_put_flags.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_get_put_flags.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_named_delete_last.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_named_delete_last.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_named_delete_last.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_named_delete_last.cc 2014-10-08 13:19:52.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_no_env.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_no_env.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_no_env.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_no_env.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_open_notexist_reopen.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_open_notexist_reopen.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_open_notexist_reopen.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_open_notexist_reopen.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -/* Simple test of logging. Can I start a TokuDB with logging enabled? */ +/* Simple test of logging. Can I start TokuFT with logging enabled? */ #include #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_remove.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_remove.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_remove.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_remove.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_remove_subdb.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_remove_subdb.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_remove_subdb.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_remove_subdb.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_set_flags.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_set_flags.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_set_flags.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_set_flags.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_subdb.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_subdb.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_subdb.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_subdb.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_subdb_different_flags.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_subdb_different_flags.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_subdb_different_flags.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_subdb_different_flags.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_txn_locks_nonheaviside.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_txn_locks_nonheaviside.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_txn_locks_nonheaviside.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_txn_locks_nonheaviside.cc 2014-10-08 13:19:52.000000000 +0000 @@ -30,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_txn_locks_read_uncommitted.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_txn_locks_read_uncommitted.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_txn_locks_read_uncommitted.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_txn_locks_read_uncommitted.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_version.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_version.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_db_version.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_db_version.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_env_close_flags.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_env_close_flags.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_env_close_flags.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_env_close_flags.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_env_create_db_create.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_env_create_db_create.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_env_create_db_create.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_env_create_db_create.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_env_open_flags.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_env_open_flags.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_env_open_flags.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_env_open_flags.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_equal_keys_with_different_bytes.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_equal_keys_with_different_bytes.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_equal_keys_with_different_bytes.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_equal_keys_with_different_bytes.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2014 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_error.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_error.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_error.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_error.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_forkjoin.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_forkjoin.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_forkjoin.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_forkjoin.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_get_max_row_size.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_get_max_row_size.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_get_max_row_size.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_get_max_row_size.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_get_zeroed_dbt.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_get_zeroed_dbt.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_get_zeroed_dbt.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_get_zeroed_dbt.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_groupcommit_count.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_groupcommit_count.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_groupcommit_count.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_groupcommit_count.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_groupcommit_perf.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_groupcommit_perf.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_groupcommit_perf.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_groupcommit_perf.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test.h 2014-10-08 13:19:52.000000000 +0000 @@ -2,10 +2,6 @@ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: #ident "$Id$" -#ifndef __TEST_H -#define __TEST_H - - /* COPYING CONDITIONS NOTICE: @@ -34,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +87,10 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." + #include #include @@ -495,15 +494,8 @@ { int chk_r = (txn)->abort(txn); CKERR(chk_r); } \ }) - -int test_main (int argc, char * const argv[]); -int -#if defined(__cilkplusplus) -cilk_main(int argc, char *argv[]) -#else -main(int argc, char * const argv[]) -#endif -{ +int test_main(int argc, char *const argv[]); +int main(int argc, char *const argv[]) { int r; toku_os_initialize_settings(1); r = test_main(argc, argv); @@ -513,5 +505,3 @@ #ifndef DB_GID_SIZE #define DB_GID_SIZE DB_XIDDATASIZE #endif - -#endif // __TEST_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_hsoc.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_hsoc.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_hsoc.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_hsoc.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_insert_cursor_delete_insert.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_insert_cursor_delete_insert.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_insert_cursor_delete_insert.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_insert_cursor_delete_insert.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_insert_many_gc.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_insert_many_gc.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_insert_many_gc.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_insert_many_gc.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2014 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_insert_memleak.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_insert_memleak.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_insert_memleak.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_insert_memleak.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_insert_unique.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_insert_unique.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_insert_unique.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_insert_unique.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_iterate_live_transactions.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_iterate_live_transactions.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_iterate_live_transactions.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_iterate_live_transactions.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_iterate_pending_lock_requests.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_iterate_pending_lock_requests.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_iterate_pending_lock_requests.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_iterate_pending_lock_requests.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_keylen_diff.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_keylen_diff.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_keylen_diff.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_keylen_diff.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,284 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include "test.h" + +// test a comparison function that treats certain different-lengthed keys as equal + +struct packed_key { + char type; + char k[8]; + static packed_key as_int(int v) { + packed_key k; + k.type = 0; + memcpy(k.k, &v, sizeof(int)); + return k; + } + static packed_key as_double(double v) { + packed_key k; + k.type = 1; + memcpy(k.k, &v, sizeof(double)); + return k; + } + size_t size() const { + assert(type == 0 || type == 1); + return type == 0 ? 5 : 9; + } +}; + +// the point is that keys can be packed as integers or doubles, but +// we'll treat them both as doubles for the sake of comparison. +// this means a 4 byte number could equal an 8 byte number. +static int packed_key_cmp(DB *UU(db), const DBT *a, const DBT *b) { + assert(a->size == 5 || a->size == 9); + assert(b->size == 5 || b->size == 9); + char *k1 = reinterpret_cast(a->data); + char *k2 = reinterpret_cast(b->data); + assert(*k1 == 0 || *k1 == 1); + assert(*k2 == 0 || *k2 == 1); + double v1 = *k1 == 0 ? static_cast(*reinterpret_cast(k1 + 1)) : + *reinterpret_cast(k1 + 1); + double v2 = *k2 == 0 ? static_cast(*reinterpret_cast(k2 + 1)) : + *reinterpret_cast(k2 + 1); + if (v1 > v2) { + return 1; + } else if (v1 < v2) { + return -1; + } else { + return 0; + } +} + +static int update_callback(DB *UU(db), const DBT *UU(key), const DBT *old_val, const DBT *extra, + void (*set_val)(const DBT *new_val, void *setval_extra), void *setval_extra) { + assert(extra != nullptr); + assert(old_val != nullptr); + assert(extra->size == 0); + assert(old_val->size == 0); + if (extra->data == nullptr) { + set_val(nullptr, setval_extra); + } else { + DBT new_val; + char empty_v; + dbt_init(&new_val, &empty_v, 0); + set_val(&new_val, setval_extra); + } + return 0; +} + +enum overwrite_method { + VIA_UPDATE_OVERWRITE_BROADCAST, + VIA_UPDATE_DELETE_BROADCAST, + VIA_UPDATE_OVERWRITE, + VIA_UPDATE_DELETE, + VIA_DELETE, + VIA_INSERT, + NUM_OVERWRITE_METHODS +}; + +static void test_keylen_diff(enum overwrite_method method, bool control_test) { + int r; + + DB_ENV *env; + r = db_env_create(&env, 0); CKERR(r); + r = env->set_default_bt_compare(env, packed_key_cmp); CKERR(r); + env->set_update(env, update_callback); CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r); + + DB *db; + r = db_create(&db, env, 0); CKERR(r); + r = db->set_pagesize(db, 16 * 1024); // smaller pages so we get a more lush tree + r = db->set_readpagesize(db, 1 * 1024); // smaller basements so we get more per leaf + r = db->open(db, nullptr, "db", nullptr, DB_BTREE, DB_CREATE, 0666); CKERR(r); + + DBT null_dbt, empty_dbt; + char empty_v; + dbt_init(&empty_dbt, &empty_v, 0); + dbt_init(&null_dbt, nullptr, 0); + + const int num_keys = 256 * 1000; + + for (int i = 0; i < num_keys; i++) { + // insert it using a 4 byte key .. + packed_key key = packed_key::as_int(i); + + DBT dbt; + dbt_init(&dbt, &key, key.size()); + r = db->put(db, nullptr, &dbt, &empty_dbt, 0); CKERR(r); + } + + // overwrite keys randomly, so we induce flushes and get better / realistic coverage + int *XMALLOC_N(num_keys, shuffled_keys); + for (int i = 0; i < num_keys; i++) { + shuffled_keys[i] = i; + } + for (int i = num_keys - 1; i >= 1; i--) { + long rnd = random64() % (i + 1); + int tmp = shuffled_keys[rnd]; + shuffled_keys[rnd] = shuffled_keys[i]; + shuffled_keys[i] = tmp; + } + + for (int i = 0; i < num_keys; i++) { + // for the control test, delete it using the same length key + // + // .. otherwise, delete it with an 8 byte key + packed_key key = control_test ? packed_key::as_int(shuffled_keys[i]) : + packed_key::as_double(shuffled_keys[i]); + + DBT dbt; + dbt_init(&dbt, &key, key.size()); + DB_TXN *txn; + env->txn_begin(env, nullptr, &txn, DB_TXN_NOSYNC); CKERR(r); + switch (method) { + case VIA_INSERT: { + r = db->put(db, txn, &dbt, &empty_dbt, 0); CKERR(r); + break; + } + case VIA_DELETE: { + // we purposefully do not pass DB_DELETE_ANY because the hidden query acts as + // a sanity check for the control test and, overall, gives better code coverage + r = db->del(db, txn, &dbt, 0); CKERR(r); + break; + } + case VIA_UPDATE_OVERWRITE: + case VIA_UPDATE_DELETE: { + r = db->update(db, txn, &dbt, method == VIA_UPDATE_DELETE ? &null_dbt : &empty_dbt, 0); CKERR(r); + break; + } + case VIA_UPDATE_OVERWRITE_BROADCAST: + case VIA_UPDATE_DELETE_BROADCAST: { + r = db->update_broadcast(db, txn, method == VIA_UPDATE_DELETE_BROADCAST ? &null_dbt : &empty_dbt, 0); CKERR(r); + if (i > 1 ) { // only need to test broadcast twice - one with abort, one without + txn->abort(txn); // we opened a txn so we should abort it before exiting + goto done; + } + break; + } + default: { + assert(false); + } + } + const bool abort = i % 2 == 0; + if (abort) { + txn->abort(txn); + } else { + txn->commit(txn, 0); + } + } + +done: + toku_free(shuffled_keys); + + // optimize before close to ensure that all messages are applied and any potential bugs are exposed + r = db->optimize(db); + r = db->close(db, 0); CKERR(r); + r = env->close(env, 0); CKERR(r); +} + +int +test_main(int argc, char *const argv[]) { + parse_args(argc, argv); + + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + + for (int i = 0; i < NUM_OVERWRITE_METHODS; i++) { + enum overwrite_method method = static_cast(i); + + // control test - must pass for the 'real' test below to be interesting + printf("testing method %d (control)\n", i); + test_keylen_diff(method, true); + + // real test, actually mixes key lengths + printf("testing method %d (real)\n", i); + test_keylen_diff(method, false); + } + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_kv_gen.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_kv_gen.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_kv_gen.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_kv_gen.h 2014-10-08 13:19:52.000000000 +0000 @@ -2,10 +2,6 @@ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: #ident "$Id$" -#ifndef __TEST_KV_GEN_H -#define __TEST_KV_GEN_H - - /* COPYING CONDITIONS NOTICE: @@ -34,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,6 +87,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" @@ -279,6 +277,3 @@ } return 0; } - - -#endif // __TEST_KV_GEN_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_kv_limits.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_kv_limits.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_kv_limits.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_kv_limits.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_large_update_broadcast_small_cachetable.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_large_update_broadcast_small_cachetable.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_large_update_broadcast_small_cachetable.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_large_update_broadcast_small_cachetable.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_locking_with_read_txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_locking_with_read_txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_locking_with_read_txn.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_locking_with_read_txn.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_lock_timeout_callback.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_lock_timeout_callback.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_lock_timeout_callback.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_lock_timeout_callback.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_locktree_close.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_locktree_close.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_locktree_close.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_locktree_close.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log0.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log0.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log0.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log0.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -/* Simple test of logging. Can I start a TokuDB with logging enabled? */ +/* Simple test of logging. Can I start TokuFT with logging enabled? */ #include #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log10.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log10.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log10.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log10.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ /* This test_log10 inserts to a db, closes, reopens, and inserts more to db. We want to make sure that the recovery of the buffers works. */ /* Lots of stuff gets inserted. */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log1_abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log1_abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log1_abort.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log1_abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log1.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -/* Simple test of logging. Can I start a TokuDB with logging enabled? */ +/* Simple test of logging. Can I start TokuFT with logging enabled? */ #include #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log2_abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log2_abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log2_abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log2_abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ /* Like test_log2 except abort. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log3_abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log3_abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log3_abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log3_abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log4_abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log4_abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log4_abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log4_abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log4.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log4.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log5_abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log5_abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log5_abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log5_abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log5.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log5.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log5.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log5.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log6a_abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log6a_abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log6a_abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log6a_abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log6_abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log6_abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log6_abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log6_abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log6.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log6.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log6.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log6.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log7.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log7.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log7.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log7.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ /* This test_log7 is like test_log5 except maxcount is larger. */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log8.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log8.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log8.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log8.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ /* This test_log8 inserts to a db, closes, reopens, and inserts more to db. We want to make sure that the recovery of the buffers works. */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log9.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log9.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_log9.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_log9.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ /* This test_log8 inserts to a db, closes, reopens, and inserts more to db. We want to make sure that the recovery of the buffers works. */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_logflush.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_logflush.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_logflush.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_logflush.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_logmax.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_logmax.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_logmax.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_logmax.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_memcmp_magic.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_memcmp_magic.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_memcmp_magic.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_memcmp_magic.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,219 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include "test.h" + +#include "util/dbt.h" + +static void test_memcmp_magic(void) { + int r; + + DB_ENV *env; + r = db_env_create(&env, 0); CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r); + + DB *db; + r = db_create(&db, env, 0); CKERR(r); + + // Can't set the memcmp magic to 0 (since it's used as a sentinel for `none') + r = db->set_memcmp_magic(db, 0); CKERR2(r, EINVAL); + + // Should be ok to set it more than once, even to different things, before opening. + r = db->set_memcmp_magic(db, 1); CKERR(r); + r = db->set_memcmp_magic(db, 2); CKERR(r); + r = db->open(db, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR(r); + + // Can't set the memcmp magic after opening. + r = db->set_memcmp_magic(db, 0); CKERR2(r, EINVAL); + r = db->set_memcmp_magic(db, 1); CKERR2(r, EINVAL); + + DB *db2; + r = db_create(&db2, env, 0); CKERR(r); + r = db2->set_memcmp_magic(db2, 3); CKERR(r); // ..we can try setting it to something different + // ..but it should fail to open + r = db2->open(db2, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR2(r, EINVAL); + r = db2->set_memcmp_magic(db2, 2); CKERR(r); + r = db2->open(db2, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR(r); + + r = db2->close(db2, 0); + r = db->close(db, 0); CKERR(r); + + // dbremove opens its own handle internally. ensure that the open + // operation succeeds (and so does dbremove) despite the fact the + // internal open does not set the memcmp magic + r = env->dbremove(env, NULL, "db", "db", 0); CKERR(r); + r = env->close(env, 0); CKERR(r); +} + +static int comparison_function_unused(DB *UU(db), const DBT *UU(a), const DBT *UU(b)) { + // We're testing that the memcmp magic gets used so the real + // comparison function should never get called. + invariant(false); + return 0; +} + +static int getf_key_cb(const DBT *key, const DBT *UU(val), void *extra) { + DBT *dbt = reinterpret_cast(extra); + toku_clone_dbt(dbt, *key); + return 0; +} + +static void test_memcmp_magic_sort_order(void) { + int r; + + // Verify that randomly generated integer keys are sorted in memcmp + // order when packed as little endian, even with an environment-wide + // comparison function that sorts as though keys are big-endian ints. + + DB_ENV *env; + r = db_env_create(&env, 0); CKERR(r); + r = env->set_default_bt_compare(env, comparison_function_unused); CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r); + + const int magic = 49; + + DB *db; + r = db_create(&db, env, 0); CKERR(r); + r = db->set_memcmp_magic(db, magic); CKERR(r); + r = db->open(db, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR(r); + + for (int i = 0; i < 10000; i++) { + char buf[1 + sizeof(int)]; + // Serialize key to first have the magic byte, then the little-endian key. + int k = toku_htonl(random()); + buf[0] = magic; + memcpy(&buf[1], &k, sizeof(int)); + + DBT key; + dbt_init(&key, buf, sizeof(buf)); + r = db->put(db, NULL, &key, &key, 0); CKERR(r); + } + + DB_TXN *txn; + env->txn_begin(env, NULL, &txn, 0); + DBC *dbc; + db->cursor(db, txn, &dbc, 0); + DBT prev_dbt, curr_dbt; + memset(&curr_dbt, 0, sizeof(DBT)); + memset(&prev_dbt, 0, sizeof(DBT)); + while (dbc->c_getf_next(dbc, 0, getf_key_cb, &curr_dbt)) { + invariant(curr_dbt.size == sizeof(int)); + if (prev_dbt.data != NULL) { + // Each key should be >= to the last using memcmp + int c = memcmp(prev_dbt.data, curr_dbt.data, sizeof(int)); + invariant(c <= 0); + } + toku_destroy_dbt(&prev_dbt); + prev_dbt = curr_dbt; + } + toku_destroy_dbt(&curr_dbt); + toku_destroy_dbt(&prev_dbt); + dbc->c_close(dbc); + txn->commit(txn, 0); + + r = db->close(db, 0); CKERR(r); + + // dbremove opens its own handle internally. ensure that the open + // operation succeeds (and so does dbremove) despite the fact the + // internal open does not set the memcmp magic + r = env->dbremove(env, NULL, "db", "db", 0); CKERR(r); + r = env->close(env, 0); CKERR(r); +} + +int +test_main(int argc, char *const argv[]) { + parse_args(argc, argv); + + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + + test_memcmp_magic(); + test_memcmp_magic_sort_order(); + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_mostly_seq.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_mostly_seq.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_mostly_seq.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_mostly_seq.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_multiple_checkpoints_block_commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_multiple_checkpoints_block_commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_multiple_checkpoints_block_commit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_multiple_checkpoints_block_commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_nested.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_nested.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_nested.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_nested.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-nested-xopen-eclose.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-nested-xopen-eclose.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-nested-xopen-eclose.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-nested-xopen-eclose.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_nodup_set.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_nodup_set.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_nodup_set.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_nodup_set.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-prepare2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-prepare2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-prepare2.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-prepare2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-prepare3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-prepare3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-prepare3.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-prepare3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-prepare.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-prepare.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-prepare.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-prepare.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_query.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_query.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_query.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_query.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_rand_insert.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_rand_insert.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_rand_insert.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_rand_insert.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_read_txn_invalid_ops.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_read_txn_invalid_ops.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_read_txn_invalid_ops.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_read_txn_invalid_ops.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_redirect_func.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_redirect_func.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_redirect_func.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_redirect_func.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_restrict.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_restrict.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_restrict.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_restrict.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_reverse_compare_fun.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_reverse_compare_fun.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_reverse_compare_fun.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_reverse_compare_fun.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-rollinclude.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-rollinclude.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-rollinclude.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-rollinclude.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_set_func_malloc.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_set_func_malloc.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_set_func_malloc.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_set_func_malloc.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_simple_read_txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_simple_read_txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_simple_read_txn.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_simple_read_txn.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress0.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress0.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress0.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress0.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress1.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress4.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress4.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress5.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress5.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress5.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress5.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress6.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress6.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress6.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress6.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress7.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress7.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress7.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress7.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -108,7 +108,7 @@ // if (verbose) printf("starting creation of pthreads\n"); - const int num_threads = 4 + cli_args->num_update_threads + cli_args->num_ptquery_threads; + const int num_threads = 5 + cli_args->num_update_threads + cli_args->num_ptquery_threads; struct arg myargs[num_threads]; for (int i = 0; i < num_threads; i++) { arg_init(&myargs[i], dbp, env, cli_args); @@ -129,19 +129,21 @@ myargs[1].operation_extra = &soe[1]; myargs[1].operation = scan_op; - // make the guy that runs HOT in the background + // make the guys that run hot optimize, keyrange, and frag stats in the background myargs[2].operation = hot_op; myargs[3].operation = keyrange_op; + myargs[4].operation = frag_op; + myargs[4].sleep_ms = 100; struct update_op_args uoe = get_update_op_args(cli_args, NULL); // make the guy that updates the db - for (int i = 4; i < 4 + cli_args->num_update_threads; ++i) { + for (int i = 5; i < 5 + cli_args->num_update_threads; ++i) { myargs[i].operation_extra = &uoe; myargs[i].operation = update_op; } // make the guy that does point queries - for (int i = 4 + cli_args->num_update_threads; i < num_threads; i++) { + for (int i = 5 + cli_args->num_update_threads; i < num_threads; i++) { myargs[i].operation = ptquery_op; } run_workers(myargs, num_threads, cli_args->num_seconds, false, cli_args); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress_openclose.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress_openclose.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress_openclose.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress_openclose.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress_with_verify.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress_with_verify.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_stress_with_verify.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_stress_with_verify.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_thread_flags.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_thread_flags.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_thread_flags.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_thread_flags.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_thread_insert.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_thread_insert.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_thread_insert.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_thread_insert.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_transactional_descriptor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_transactional_descriptor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_transactional_descriptor.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_transactional_descriptor.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt4.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt4.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_abort5a.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_abort5a.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_abort5a.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_abort5a.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_abort5.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_abort5.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_abort5.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_abort5.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_abort6.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_abort6.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_abort6.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_abort6.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_abort7.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_abort7.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_abort7.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_abort7.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_begin_commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_begin_commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_begin_commit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_begin_commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_close_before_commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_close_before_commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_close_before_commit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_close_before_commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_cursor_last.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_cursor_last.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_cursor_last.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_cursor_last.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested1.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #include #include #include -#include +#include #define MAX_NEST MAX_NESTED_TRANSACTIONS diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -87,15 +87,18 @@ */ #ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." -#include "test.h" -#include +#include +#include #include #include #include #include -#include -#include + +#include "src/tests/test.h" + +#include + #define MAX_NEST MAX_TRANSACTION_RECORDS #define MAX_SIZE MAX_TRANSACTION_RECORDS diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #include #include #include -#include +#include #define MAX_NEST MAX_TRANSACTION_RECORDS #define MAX_SIZE MAX_TRANSACTION_RECORDS diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested4.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested4.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #include #include #include -#include +#include #define MAX_NEST MAX_TRANSACTION_RECORDS #define MAX_SIZE MAX_TRANSACTION_RECORDS diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested5.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested5.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested5.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested5.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ #include #include #include -#include +#include #define MAX_NEST MAX_TRANSACTION_RECORDS #define MAX_SIZE (MAX_TRANSACTION_RECORDS + 1) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested_abort2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested_abort2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested_abort2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested_abort2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested_abort3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested_abort3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested_abort3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested_abort3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested_abort4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested_abort4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested_abort4.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested_abort4.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested_abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested_abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_nested_abort.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_nested_abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_recover3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_recover3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_txn_recover3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_txn_recover3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_unused_memory_crash.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_unused_memory_crash.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_unused_memory_crash.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_unused_memory_crash.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_abort_works.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_abort_works.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_abort_works.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_abort_works.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_abort_works.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_abort_works.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_abort_works.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_abort_works.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_calls_back.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_calls_back.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_calls_back.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_calls_back.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_can_delete_elements.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_can_delete_elements.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_can_delete_elements.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_can_delete_elements.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_changes_values.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_changes_values.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_changes_values.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_changes_values.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_indexer.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_indexer.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_indexer.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_indexer.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_loader.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_loader.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_loader.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_loader.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_nested_updates.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_nested_updates.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_nested_updates.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_nested_updates.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_previously_deleted.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_previously_deleted.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_previously_deleted.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_previously_deleted.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_stress.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_stress.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_stress.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_stress.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_update_fun_has_choices.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_update_fun_has_choices.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_update_fun_has_choices.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_update_fun_has_choices.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_with_empty_table.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_with_empty_table.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_broadcast_with_empty_table.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_broadcast_with_empty_table.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_calls_back.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_calls_back.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_calls_back.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_calls_back.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_can_delete_elements.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_can_delete_elements.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_can_delete_elements.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_can_delete_elements.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_changes_values.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_changes_values.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_changes_values.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_changes_values.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_nested_updates.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_nested_updates.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_nested_updates.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_nested_updates.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_nonexistent_keys.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_nonexistent_keys.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_nonexistent_keys.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_nonexistent_keys.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_previously_deleted.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_previously_deleted.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_previously_deleted.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_previously_deleted.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_updates_single_key.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_updates_single_key.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_updates_single_key.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_updates_single_key.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_stress.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_stress.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_stress.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_stress.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_concurrently.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_concurrently.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_concurrently.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_concurrently.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_with_empty_table.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_with_empty_table.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_update_with_empty_table.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_update_with_empty_table.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_weakxaction.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_weakxaction.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_weakxaction.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_weakxaction.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-xopen-eclose.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-xopen-eclose.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test-xopen-eclose.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test-xopen-eclose.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_zero_length_keys.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_zero_length_keys.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/test_zero_length_keys.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/test_zero_length_keys.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,6 +86,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." #ident "$Id$" @@ -102,9 +104,6 @@ // with keys in the range [0, table_size - 1] unless disperse_keys is true, // then the keys are scrambled up in the integer key space. -#ifndef _THREADED_STRESS_TEST_HELPERS_H_ -#define _THREADED_STRESS_TEST_HELPERS_H_ - #include "toku_config.h" #include "test.h" @@ -123,7 +122,7 @@ #include -#include +#include #include #include @@ -209,6 +208,7 @@ bool nocrashstatus; // do not print engine status upon crash bool prelock_updates; // update threads perform serial updates on a prelocked range bool disperse_keys; // spread the keys out during a load (by reversing the bits in the loop index) to make a wide tree we can spread out random inserts into + bool memcmp_keys; // pack keys big endian and use the builtin key comparison function in the fractal tree bool direct_io; // use direct I/O const char *print_engine_status; // print engine status rows matching a simple regex "a|b|c", matching strings where a or b or c is a subtring. }; @@ -833,12 +833,13 @@ } invariant(key >= 0); if (args->key_size == sizeof(int)) { - const int key32 = key; + const int key32 = args->memcmp_keys ? toku_htonl(key) : key; memcpy(data, &key32, sizeof(key32)); } else { invariant(args->key_size >= sizeof(key)); - memcpy(data, &key, sizeof(key)); - memset(data + sizeof(key), 0, args->key_size - sizeof(key)); + const int64_t key64 = args->memcmp_keys ? toku_htonl(key) : key; + memcpy(data, &key64, sizeof(key64)); + memset(data + sizeof(key64), 0, args->key_size - sizeof(key64)); } } @@ -1076,6 +1077,16 @@ return r; } +static int UU() frag_op(DB_TXN *UU(txn), ARG arg, void* UU(operation_extra), void *UU(stats_extra)) { + int db_index = myrandom_r(arg->random_data)%arg->cli->num_DBs; + DB *db = arg->dbp[db_index]; + + TOKU_DB_FRAGMENTATION_S frag; + int r = db->get_fragmentation(db, &frag); + invariant_zero(r); + return r; +} + static void UU() get_key_after_bytes_callback(const DBT *UU(end_key), uint64_t UU(skipped), void *UU(extra)) { // nothing } @@ -1966,7 +1977,9 @@ db_env_set_num_bucket_mutexes(env_args.num_bucket_mutexes); r = db_env_create(&env, 0); assert(r == 0); r = env->set_redzone(env, 0); CKERR(r); - r = env->set_default_bt_compare(env, bt_compare); CKERR(r); + if (!cli_args->memcmp_keys) { + r = env->set_default_bt_compare(env, bt_compare); CKERR(r); + } r = env->set_lk_max_memory(env, env_args.lk_max_memory); CKERR(r); r = env->set_cachesize(env, env_args.cachetable_size / (1 << 30), env_args.cachetable_size % (1 << 30), 1); CKERR(r); r = env->set_lg_bsize(env, env_args.rollback_node_size); CKERR(r); @@ -2164,7 +2177,9 @@ db_env_set_num_bucket_mutexes(env_args.num_bucket_mutexes); r = db_env_create(&env, 0); assert(r == 0); r = env->set_redzone(env, 0); CKERR(r); - r = env->set_default_bt_compare(env, bt_compare); CKERR(r); + if (!cli_args->memcmp_keys) { + r = env->set_default_bt_compare(env, bt_compare); CKERR(r); + } r = env->set_lk_max_memory(env, env_args.lk_max_memory); CKERR(r); env->set_update(env, env_args.update_function); r = env->set_cachesize(env, env_args.cachetable_size / (1 << 30), env_args.cachetable_size % (1 << 30), 1); CKERR(r); @@ -2282,6 +2297,7 @@ .nocrashstatus = false, .prelock_updates = false, .disperse_keys = false, + .memcmp_keys = false, .direct_io = false, }; DEFAULT_ARGS.env_args.envdir = TOKU_TEST_FILENAME; @@ -2669,6 +2685,7 @@ BOOL_ARG("nocrashstatus", nocrashstatus), BOOL_ARG("prelock_updates", prelock_updates), BOOL_ARG("disperse_keys", disperse_keys), + BOOL_ARG("memcmp_keys", memcmp_keys), BOOL_ARG("direct_io", direct_io), STRING_ARG("--envdir", env_args.envdir), @@ -2924,5 +2941,3 @@ // We want to control the row size and its compressibility. open_and_stress_tables(args, false, cmp); } - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/time_create_db.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/time_create_db.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/time_create_db.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/time_create_db.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/transactional_fileops.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/transactional_fileops.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/transactional_fileops.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/transactional_fileops.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update-multiple-data-diagonal.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update-multiple-data-diagonal.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update-multiple-data-diagonal.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update-multiple-data-diagonal.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update-multiple-key0.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update-multiple-key0.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update-multiple-key0.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update-multiple-key0.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update-multiple-nochange.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update-multiple-nochange.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update-multiple-nochange.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update-multiple-nochange.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer-array.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer-array.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer-array.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer-array.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade_simple.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade_simple.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade_simple.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade_simple.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-1.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-1.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-1.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-1.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -213,7 +213,7 @@ } } else { - fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION); + fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION); assert(0); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-2.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -193,7 +193,7 @@ src_db_dir = db_v5_dir; } else { - fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION); + fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION); assert(0); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-3.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-3.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-3.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-3.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ // Purpose of this test is to verify that dictionaries created with 4.2.0 -// can be properly truncated with TokuDB version 5.x or later. +// can be properly truncated with TokuFT version 5.x or later. #include "test.h" @@ -216,7 +216,7 @@ src_db_dir = db_v5_dir; } else { - fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION); + fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION); assert(0); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-4.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-4.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-4.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-4.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -306,7 +306,7 @@ src_db_dir = db_v5_dir; } else { - fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION); + fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION); assert(0); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-5.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-5.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-5.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-5.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -210,7 +210,7 @@ src_db_dir = db_v5_dir; } else { - fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION); + fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION); assert(0); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-6.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-6.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-6.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-6.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-7.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-7.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/upgrade-test-7.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/upgrade-test-7.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,8 @@ #ident "$Id$" -// Purpose of this test is to verify that an environment created by TokuDB 3.1.0 -// is properly rejected by the upgrade logic of TokuDB 5.x and later. +// Purpose of this test is to verify that an environment created by TokuFT 3.1.0 +// is properly rejected by the upgrade logic of TokuFT 5.x and later. #include "test.h" #include "toku_pthread.h" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,193 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#include "test.h" + +// Verify that a commit of a prepared txn in recovery retains a db that was created by it. +// The rollback file is dirty when the environment is closed. + +static void create_foo(DB_ENV *env, DB_TXN *txn) { + int r; + DB *db; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + r = db->close(db, 0); + CKERR(r); +} + +static void check_foo(DB_ENV *env) { + int r; + DB *db; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, 0); + CKERR(r); + r = db->close(db, 0); + CKERR(r); +} + +static void create_prepared_txn(void) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + create_foo(env, txn); + + TOKU_XA_XID xid = { 0x1234, 8, 9 }; + for (int i = 0; i < 8+9; i++) { + xid.data[i] = i; + } + r = txn->xa_prepare(txn, &xid); + CKERR(r); + + // discard the txn so that we can close the env and run xa recovery later + r = txn->discard(txn, 0); + CKERR(r); + + r = env->close(env, TOKUFT_DIRTY_SHUTDOWN); + CKERR(r); +} + +static void run_xa_recovery(void) { + int r; + + DB_ENV *env; + r = db_env_create(&env, 0); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + // get prepared xid + long count; + TOKU_XA_XID xid; + r = env->txn_xa_recover(env, &xid, 1, &count, DB_FIRST); + CKERR(r); + + // commit it + DB_TXN *txn = nullptr; + r = env->get_txn_from_xid(env, &xid, &txn); + CKERR(r); + r = txn->commit(txn, 0); + CKERR(r); + + check_foo(env); + + r = env->close(env, 0); + CKERR(r); +} + +int test_main (int argc, char *const argv[]) { + default_parse_args(argc, argv); + + // init the env directory + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + // run the test + create_prepared_txn(); + run_xa_recovery(); + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,193 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#include "test.h" + +// Verify that an abort of a prepared txn in recovery deletes a db created by it. +// The rollback file is dirty when the environment is closed. + +static void create_foo(DB_ENV *env, DB_TXN *txn) { + int r; + DB *db; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + r = db->close(db, 0); + CKERR(r); +} + +static void check_foo(DB_ENV *env) { + int r; + DB *db; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, 0); + CKERR2(r, ENOENT); + r = db->close(db, 0); + CKERR(r); +} + +static void create_prepared_txn(void) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + create_foo(env, txn); + + TOKU_XA_XID xid = { 0x1234, 8, 9 }; + for (int i = 0; i < 8+9; i++) { + xid.data[i] = i; + } + r = txn->xa_prepare(txn, &xid); + CKERR(r); + + // discard the txn so that we can close the env and run xa recovery later + r = txn->discard(txn, 0); + CKERR(r); + + r = env->close(env, TOKUFT_DIRTY_SHUTDOWN); + CKERR(r); +} + +static void run_xa_recovery(void) { + int r; + + DB_ENV *env; + r = db_env_create(&env, 0); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + // get prepared xid + long count; + TOKU_XA_XID xid; + r = env->txn_xa_recover(env, &xid, 1, &count, DB_FIRST); + CKERR(r); + + // abort it + DB_TXN *txn = nullptr; + r = env->get_txn_from_xid(env, &xid, &txn); + CKERR(r); + r = txn->abort(txn); + CKERR(r); + + check_foo(env); + + r = env->close(env, 0); + CKERR(r); +} + +int test_main (int argc, char *const argv[]) { + default_parse_args(argc, argv); + + // init the env directory + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + // run the test + create_prepared_txn(); + run_xa_recovery(); + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,195 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#include "test.h" + +// Verify that an abort of a prepared txn in recovery removes a db created by it. +// A checkpoint is taken between the db creation and the txn prepare. + +static void create_foo(DB_ENV *env, DB_TXN *txn) { + int r; + DB *db; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + r = db->close(db, 0); + CKERR(r); +} + +static void check_foo(DB_ENV *env) { + int r; + DB *db; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, 0); + CKERR2(r, ENOENT); + r = db->close(db, 0); + CKERR(r); +} + +static void create_prepared_txn(void) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + create_foo(env, txn); + r = env->txn_checkpoint(env, 0, 0, 0); + CKERR(r); + + TOKU_XA_XID xid = { 0x1234, 8, 9 }; + for (int i = 0; i < 8+9; i++) { + xid.data[i] = i; + } + r = txn->xa_prepare(txn, &xid); + CKERR(r); + + // discard the txn so that we can close the env and run xa recovery later + r = txn->discard(txn, 0); + CKERR(r); + + r = env->close(env, TOKUFT_DIRTY_SHUTDOWN); + CKERR(r); +} + +static void run_xa_recovery(void) { + int r; + + DB_ENV *env; + r = db_env_create(&env, 0); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + // get prepared xid + long count; + TOKU_XA_XID xid; + r = env->txn_xa_recover(env, &xid, 1, &count, DB_FIRST); + CKERR(r); + + // abort it + DB_TXN *txn = nullptr; + r = env->get_txn_from_xid(env, &xid, &txn); + CKERR(r); + r = txn->abort(txn); + CKERR(r); + + check_foo(env); + + r = env->close(env, 0); + CKERR(r); +} + +int test_main (int argc, char *const argv[]) { + default_parse_args(argc, argv); + + // init the env directory + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + // run the test + create_prepared_txn(); + run_xa_recovery(); + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,196 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#include "test.h" + +// Verify that a commit of a prepared txn in recovery retains a db created by it. +// A checkpoint is taken between the db creation and the txn prepare. + +static void create_foo(DB_ENV *env, DB_TXN *txn) { + int r; + DB *db; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + r = db->close(db, 0); + CKERR(r); +} + +static void check_foo(DB_ENV *env) { + int r; + DB *db; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, 0); + CKERR(r); + r = db->close(db, 0); + CKERR(r); +} + +static void create_prepared_txn(void) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + create_foo(env, txn); + + r = env->txn_checkpoint(env, 0, 0, 0); + CKERR(r); + + TOKU_XA_XID xid = { 0x1234, 8, 9 }; + for (int i = 0; i < 8+9; i++) { + xid.data[i] = i; + } + r = txn->xa_prepare(txn, &xid); + CKERR(r); + + // discard the txn so that we can close the env and run xa recovery later + r = txn->discard(txn, 0); + CKERR(r); + + r = env->close(env, TOKUFT_DIRTY_SHUTDOWN); + CKERR(r); +} + +static void run_xa_recovery(void) { + int r; + + DB_ENV *env; + r = db_env_create(&env, 0); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + // get prepared xid + long count; + TOKU_XA_XID xid; + r = env->txn_xa_recover(env, &xid, 1, &count, DB_FIRST); + CKERR(r); + + // commit it + DB_TXN *txn = nullptr; + r = env->get_txn_from_xid(env, &xid, &txn); + CKERR(r); + r = txn->commit(txn, 0); + CKERR(r); + + check_foo(env); + + r = env->close(env, 0); + CKERR(r); +} + +int test_main (int argc, char *const argv[]) { + default_parse_args(argc, argv); + + // init the env directory + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + // run the test + create_prepared_txn(); + run_xa_recovery(); + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/zombie_db.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/zombie_db.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/tests/zombie_db.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/tests/zombie_db.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/toku_patent.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/toku_patent.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/toku_patent.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/toku_patent.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -115,7 +115,7 @@ \n\ COPYRIGHT NOTICE:\n\ \n\ - TokuDB, Tokutek Fractal Tree Indexing Library.\n\ + TokuFT, Tokutek Fractal Tree Indexing Library.\n\ Copyright (C) 2007-2013 Tokutek, Inc.\n\ \n\ DISCLAIMER:\n\ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,51 +92,42 @@ extern const char *toku_patent_string; const char *toku_copyright_string = "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."; -#include -#include -#include +#include #include -#include #include -#include -#include -#include #include -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ydb.h" -#include "ydb-internal.h" -#include "ydb_cursor.h" -#include "ydb_row_lock.h" -#include "ydb_env_func.h" -#include "ydb_db.h" -#include "ydb_write.h" -#include "ydb_txn.h" -#include "loader.h" -#include "indexer.h" + +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_portability.h" +#include "portability/toku_pthread.h" +#include "portability/toku_stdlib.h" + +#include "ft/ft-flusher.h" +#include "ft/cachetable/cachetable.h" +#include "ft/cachetable/checkpoint.h" +#include "ft/logger/log.h" +#include "ft/loader/loader.h" +#include "ft/log_header.h" +#include "ft/ft.h" +#include "ft/txn/txn_manager.h" +#include "src/ydb.h" +#include "src/ydb-internal.h" +#include "src/ydb_cursor.h" +#include "src/ydb_row_lock.h" +#include "src/ydb_env_func.h" +#include "src/ydb_db.h" +#include "src/ydb_write.h" +#include "src/ydb_txn.h" +#include "src/loader.h" +#include "src/indexer.h" +#include "util/status.h" +#include "util/context.h" // Include ydb_lib.cc here so that its constructor/destructor gets put into // ydb.o, to make sure they don't get erased at link time (when linking to -// a static libtokudb.a that was compiled with gcc). See #5094. +// a static libtokufractaltree.a that was compiled with gcc). See #5094. #include "ydb_lib.cc" #ifdef TOKUTRACE @@ -197,7 +188,7 @@ static YDB_LAYER_STATUS_S ydb_layer_status; #define STATUS_VALUE(x) ydb_layer_status.status[x].value.num -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_layer_status, k, c, t, l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_layer_status, k, c, t, l, inc) static void ydb_layer_status_init (void) { @@ -263,14 +254,14 @@ env_fs_report_in_yellow(DB_ENV *UU(env)) { char tbuf[26]; time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb file system space is low\n", ctime_r(&tnow, tbuf)); fflush(stderr); + fprintf(stderr, "%.24s TokuFT file system space is low\n", ctime_r(&tnow, tbuf)); fflush(stderr); } static void env_fs_report_in_red(DB_ENV *UU(env)) { char tbuf[26]; time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb file system space is really low and access is restricted\n", ctime_r(&tnow, tbuf)); fflush(stderr); + fprintf(stderr, "%.24s TokuFT file system space is really low and access is restricted\n", ctime_r(&tnow, tbuf)); fflush(stderr); } static inline uint64_t @@ -279,7 +270,7 @@ } #define ZONEREPORTLIMIT 12 -// Check the available space in the file systems used by tokudb and erect barriers when available space gets low. +// Check the available space in the file systems used by tokuft and erect barriers when available space gets low. static int env_fs_poller(void *arg) { DB_ENV *env = (DB_ENV *) arg; @@ -456,7 +447,7 @@ static int ydb_do_recovery (DB_ENV *env) { assert(env->i->real_log_dir); - int r = tokudb_recover(env, + int r = tokuft_recover(env, toku_keep_prepared_txn_callback, keep_cachetable_callback, env->i->logger, @@ -470,33 +461,12 @@ static int needs_recovery (DB_ENV *env) { assert(env->i->real_log_dir); - int recovery_needed = tokudb_needs_recovery(env->i->real_log_dir, true); + int recovery_needed = tokuft_needs_recovery(env->i->real_log_dir, true); return recovery_needed ? DB_RUNRECOVERY : 0; } static int toku_env_txn_checkpoint(DB_ENV * env, uint32_t kbyte, uint32_t min, uint32_t flags); -// Instruct db to use the default (built-in) key comparison function -// by setting the flag bits in the db and ft structs -static int -db_use_builtin_key_cmp(DB *db) { - HANDLE_PANICKED_DB(db); - int r = 0; - if (db_opened(db)) - r = toku_ydb_do_error(db->dbenv, EINVAL, "Comparison functions cannot be set after DB open.\n"); - else if (db->i->key_compare_was_set) - r = toku_ydb_do_error(db->dbenv, EINVAL, "Key comparison function already set.\n"); - else { - uint32_t tflags; - toku_ft_get_flags(db->i->ft_handle, &tflags); - - tflags |= TOKU_DB_KEYCMP_BUILTIN; - toku_ft_set_flags(db->i->ft_handle, tflags); - db->i->key_compare_was_set = true; - } - return r; -} - // Keys used in persistent environment dictionary: // Following keys added in version 12 static const char * orig_env_ver_key = "original_version"; @@ -553,7 +523,7 @@ static PERSISTENT_UPGRADE_STATUS_S persistent_upgrade_status; -#define PERSISTENT_UPGRADE_STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(persistent_upgrade_status, k, c, t, "upgrade: " l, inc) +#define PERSISTENT_UPGRADE_STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(persistent_upgrade_status, k, c, t, "upgrade: " l, inc) static void persistent_upgrade_status_init (void) { @@ -703,7 +673,7 @@ // return 0 if log exists or ENOENT if log does not exist static int ydb_recover_log_exists(DB_ENV *env) { - int r = tokudb_recover_log_exists(env->i->real_log_dir); + int r = tokuft_recover_log_exists(env->i->real_log_dir); return r; } @@ -866,20 +836,20 @@ HANDLE_EXTRA_FLAGS(env, flags, DB_CREATE|DB_PRIVATE|DB_INIT_LOG|DB_INIT_TXN|DB_RECOVER|DB_INIT_MPOOL|DB_INIT_LOCK|DB_THREAD); - // DB_CREATE means create if env does not exist, and Tokudb requires it because - // Tokudb requries DB_PRIVATE. + // DB_CREATE means create if env does not exist, and TokuFT requires it because + // TokuFT requries DB_PRIVATE. if ((flags & DB_PRIVATE) && !(flags & DB_CREATE)) { r = toku_ydb_do_error(env, ENOENT, "DB_PRIVATE requires DB_CREATE (seems gratuitous to us, but that's BDB's behavior\n"); goto cleanup; } if (!(flags & DB_PRIVATE)) { - r = toku_ydb_do_error(env, ENOENT, "TokuDB requires DB_PRIVATE\n"); + r = toku_ydb_do_error(env, ENOENT, "TokuFT requires DB_PRIVATE\n"); goto cleanup; } if ((flags & DB_INIT_LOG) && !(flags & DB_INIT_TXN)) { - r = toku_ydb_do_error(env, EINVAL, "TokuDB requires transactions for logging\n"); + r = toku_ydb_do_error(env, EINVAL, "TokuFT requires transactions for logging\n"); goto cleanup; } @@ -991,13 +961,13 @@ // This is probably correct, but it will be pain... // if ((flags & DB_THREAD)==0) { -// r = toku_ydb_do_error(env, EINVAL, "TokuDB requires DB_THREAD"); +// r = toku_ydb_do_error(env, EINVAL, "TokuFT requires DB_THREAD"); // goto cleanup; // } unused_flags &= ~DB_THREAD; if (unused_flags!=0) { - r = toku_ydb_do_error(env, EINVAL, "Extra flags not understood by tokudb: %u\n", unused_flags); + r = toku_ydb_do_error(env, EINVAL, "Extra flags not understood by tokuft: %u\n", unused_flags); goto cleanup; } @@ -1036,7 +1006,7 @@ { r = toku_db_create(&env->i->persistent_environment, env, 0); assert_zero(r); - r = db_use_builtin_key_cmp(env->i->persistent_environment); + r = toku_db_use_builtin_key_cmp(env->i->persistent_environment); assert_zero(r); r = toku_db_open_iname(env->i->persistent_environment, txn, toku_product_name_strings.environmentdictionary, DB_CREATE, mode); if (r != 0) { @@ -1074,7 +1044,7 @@ { r = toku_db_create(&env->i->directory, env, 0); assert_zero(r); - r = db_use_builtin_key_cmp(env->i->directory); + r = toku_db_use_builtin_key_cmp(env->i->directory); assert_zero(r); r = toku_db_open_iname(env->i->directory, txn, toku_product_name_strings.fileopsdirectory, DB_CREATE, mode); if (r != 0) { @@ -1124,6 +1094,12 @@ env_close(DB_ENV * env, uint32_t flags) { int r = 0; const char * err_msg = NULL; + bool clean_shutdown = true; + + if (flags & TOKUFT_DIRTY_SHUTDOWN) { + clean_shutdown = false; + flags &= ~TOKUFT_DIRTY_SHUTDOWN; + } most_recent_env = NULL; // Set most_recent_env to NULL so that we don't have a dangling pointer (and if there's an error, the toku assert code would try to look at the env.) @@ -1162,24 +1138,30 @@ } env_fsync_log_cron_destroy(env); if (env->i->cachetable) { + toku_cachetable_prepare_close(env->i->cachetable); toku_cachetable_minicron_shutdown(env->i->cachetable); if (env->i->logger) { - CHECKPOINTER cp = toku_cachetable_get_checkpointer(env->i->cachetable); - r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, SHUTDOWN_CHECKPOINT); - if (r) { - err_msg = "Cannot close environment (error during checkpoint)\n"; - toku_ydb_do_error(env, r, "%s", err_msg); - goto panic_and_quit_early; - } - toku_logger_close_rollback(env->i->logger); - //Do a second checkpoint now that the rollback cachefile is closed. - r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, SHUTDOWN_CHECKPOINT); - if (r) { - err_msg = "Cannot close environment (error during checkpoint)\n"; - toku_ydb_do_error(env, r, "%s", err_msg); - goto panic_and_quit_early; + CHECKPOINTER cp = nullptr; + if (clean_shutdown) { + cp = toku_cachetable_get_checkpointer(env->i->cachetable); + r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, SHUTDOWN_CHECKPOINT); + if (r) { + err_msg = "Cannot close environment (error during checkpoint)\n"; + toku_ydb_do_error(env, r, "%s", err_msg); + goto panic_and_quit_early; + } + } + toku_logger_close_rollback_check_empty(env->i->logger, clean_shutdown); + if (clean_shutdown) { + //Do a second checkpoint now that the rollback cachefile is closed. + r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, SHUTDOWN_CHECKPOINT); + if (r) { + err_msg = "Cannot close environment (error during checkpoint)\n"; + toku_ydb_do_error(env, r, "%s", err_msg); + goto panic_and_quit_early; + } + toku_logger_shutdown(env->i->logger); } - toku_logger_shutdown(env->i->logger); } toku_cachetable_close(&env->i->cachetable); } @@ -1230,7 +1212,7 @@ unlock_single_process(env); toku_free(env->i); toku_free(env); - toku_sync_fetch_and_add(&tokudb_num_envs, -1); + toku_sync_fetch_and_add(&tokuft_num_envs, -1); if (flags != 0) { r = EINVAL; } @@ -1405,7 +1387,7 @@ flags &= ~DB_AUTO_COMMIT; } if (flags != 0 && onoff) { - return toku_ydb_do_error(env, EINVAL, "TokuDB does not (yet) support any nonzero ENV flags other than DB_AUTO_COMMIT\n"); + return toku_ydb_do_error(env, EINVAL, "TokuFT does not (yet) support any nonzero ENV flags other than DB_AUTO_COMMIT\n"); } if (onoff) env->i->open_flags |= change; else env->i->open_flags &= ~change; @@ -1451,7 +1433,7 @@ static int env_set_lk_detect(DB_ENV * env, uint32_t UU(detect)) { HANDLE_PANICKED_ENV(env); - return toku_ydb_do_error(env, EINVAL, "TokuDB does not (yet) support set_lk_detect\n"); + return toku_ydb_do_error(env, EINVAL, "TokuFT does not (yet) support set_lk_detect\n"); } static int @@ -1796,7 +1778,7 @@ static FS_STATUS_S fsstat; -#define FS_STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(fsstat, k, c, t, "filesystem: " l, inc) +#define FS_STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(fsstat, k, c, t, "filesystem: " l, inc) static void fs_status_init(void) { @@ -1867,7 +1849,7 @@ static MEMORY_STATUS_S memory_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(memory_status, k, c, t, "memory: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(memory_status, k, c, t, "memory: " l, inc) static void memory_status_init(void) { @@ -2464,7 +2446,7 @@ const int r = lt_map->fetch(which_lt, &ranges); invariant_zero(r); current_db = locked_get_db_by_dict_id(env, ranges.lt->get_dict_id()); - iter.create(ranges.buffer); + iter = toku::range_buffer::iterator(ranges.buffer); } DB_ENV *env; @@ -2694,7 +2676,7 @@ *envp = result; r = 0; - toku_sync_fetch_and_add(&tokudb_num_envs, 1); + toku_sync_fetch_and_add(&tokuft_num_envs, 1); cleanup: if (r!=0) { if (result) { @@ -3079,15 +3061,15 @@ case TOKUDB_OUT_OF_LOCKS: return "Out of locks"; case TOKUDB_DICTIONARY_TOO_OLD: - return "Dictionary too old for this version of TokuDB"; + return "Dictionary too old for this version of TokuFT"; case TOKUDB_DICTIONARY_TOO_NEW: - return "Dictionary too new for this version of TokuDB"; + return "Dictionary too new for this version of TokuFT"; case TOKUDB_CANCELED: return "User cancelled operation"; case TOKUDB_NO_DATA: return "Ran out of data (not EOF)"; case TOKUDB_HUGE_PAGES_ENABLED: - return "Transparent huge pages are enabled but TokuDB's memory allocator will oversubscribe main memory with transparent huge pages. This check can be disabled by setting the environment variable TOKU_HUGE_PAGES_OK."; + return "Transparent huge pages are enabled but TokuFT's memory allocator will oversubscribe main memory with transparent huge pages. This check can be disabled by setting the environment variable TOKU_HUGE_PAGES_OK."; } static char unknown_result[100]; // Race condition if two threads call this at the same time. However even in a bad case, it should be some sort of null-terminated string. diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_cursor.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_cursor.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_cursor.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_cursor.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,6 +89,8 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" +#include + #include #include #include @@ -97,6 +99,7 @@ #include "ydb-internal.h" #include "ydb_cursor.h" #include "ydb_row_lock.h" +#include "ft/cursor.h" static YDB_C_LAYER_STATUS_S ydb_c_layer_status; #ifdef STATUS_VALUE @@ -104,7 +107,7 @@ #endif #define STATUS_VALUE(x) ydb_c_layer_status.status[x].value.num -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_c_layer_status, k, c, t, l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_c_layer_status, k, c, t, l, inc) static void ydb_c_layer_status_init (void) { @@ -133,8 +136,8 @@ } static inline bool -c_uninitialized(DBC* c) { - return toku_ft_cursor_uninitialized(dbc_struct_i(c)->c); +c_uninitialized(DBC *c) { + return toku_ft_cursor_uninitialized(dbc_ftcursor(c)); } typedef struct query_context_wrapped_t { @@ -200,7 +203,7 @@ static void query_context_base_init(QUERY_CONTEXT_BASE context, DBC *c, uint32_t flag, bool is_write_op, YDB_CALLBACK_FUNCTION f, void *extra) { - context->c = dbc_struct_i(c)->c; + context->c = dbc_ftcursor(c); context->txn = dbc_struct_i(c)->txn; context->db = c->dbp; context->f = f; @@ -247,7 +250,7 @@ context->input_val = val; } -static int c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_first_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static void c_query_context_init(QUERY_CONTEXT context, DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -277,7 +280,7 @@ c_query_context_init(&context, c, flag, f, extra); while (r == 0) { //toku_ft_cursor_first will call c_getf_first_callback(..., context) (if query is successful) - r = toku_ft_cursor_first(dbc_struct_i(c)->c, c_getf_first_callback, &context); + r = toku_ft_cursor_first(dbc_ftcursor(c), c_getf_first_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -290,7 +293,7 @@ //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_first_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT super_context = (QUERY_CONTEXT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -317,7 +320,7 @@ return r; } -static int c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_last_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_last(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -328,7 +331,7 @@ c_query_context_init(&context, c, flag, f, extra); while (r == 0) { //toku_ft_cursor_last will call c_getf_last_callback(..., context) (if query is successful) - r = toku_ft_cursor_last(dbc_struct_i(c)->c, c_getf_last_callback, &context); + r = toku_ft_cursor_last(dbc_ftcursor(c), c_getf_last_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -341,7 +344,7 @@ //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_last_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT super_context = (QUERY_CONTEXT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -368,7 +371,7 @@ return r; } -static int c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_next_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_next(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -383,7 +386,7 @@ c_query_context_init(&context, c, flag, f, extra); while (r == 0) { //toku_ft_cursor_next will call c_getf_next_callback(..., context) (if query is successful) - r = toku_ft_cursor_next(dbc_struct_i(c)->c, c_getf_next_callback, &context); + r = toku_ft_cursor_next(dbc_ftcursor(c), c_getf_next_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -397,7 +400,7 @@ //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_next_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT super_context = (QUERY_CONTEXT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -427,7 +430,7 @@ return r; } -static int c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_prev_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_prev(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -442,7 +445,7 @@ c_query_context_init(&context, c, flag, f, extra); while (r == 0) { //toku_ft_cursor_prev will call c_getf_prev_callback(..., context) (if query is successful) - r = toku_ft_cursor_prev(dbc_struct_i(c)->c, c_getf_prev_callback, &context); + r = toku_ft_cursor_prev(dbc_ftcursor(c), c_getf_prev_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -456,7 +459,7 @@ //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_prev_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT super_context = (QUERY_CONTEXT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -485,7 +488,7 @@ return r; } -static int c_getf_current_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_current_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_current(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -495,14 +498,14 @@ QUERY_CONTEXT_S context; //Describes the context of this query. c_query_context_init(&context, c, flag, f, extra); //toku_ft_cursor_current will call c_getf_current_callback(..., context) (if query is successful) - int r = toku_ft_cursor_current(dbc_struct_i(c)->c, DB_CURRENT, c_getf_current_callback, &context); + int r = toku_ft_cursor_current(dbc_ftcursor(c), DB_CURRENT, c_getf_current_callback, &context); c_query_context_destroy(&context); return r; } //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_current_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_current_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT super_context = (QUERY_CONTEXT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -522,7 +525,7 @@ return r; } -static int c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_set_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); int toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -534,7 +537,7 @@ query_context_with_input_init(&context, c, flag, key, NULL, f, extra); while (r == 0) { //toku_ft_cursor_set will call c_getf_set_callback(..., context) (if query is successful) - r = toku_ft_cursor_set(dbc_struct_i(c)->c, key, c_getf_set_callback, &context); + r = toku_ft_cursor_set(dbc_ftcursor(c), key, c_getf_set_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -547,7 +550,7 @@ //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_set_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT_WITH_INPUT super_context = (QUERY_CONTEXT_WITH_INPUT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -575,7 +578,7 @@ return r; } -static int c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_set_range_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_set_range(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -587,7 +590,7 @@ query_context_with_input_init(&context, c, flag, key, NULL, f, extra); while (r == 0) { //toku_ft_cursor_set_range will call c_getf_set_range_callback(..., context) (if query is successful) - r = toku_ft_cursor_set_range(dbc_struct_i(c)->c, key, nullptr, c_getf_set_range_callback, &context); + r = toku_ft_cursor_set_range(dbc_ftcursor(c), key, nullptr, c_getf_set_range_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -600,7 +603,7 @@ //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_set_range_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT_WITH_INPUT super_context = (QUERY_CONTEXT_WITH_INPUT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -641,7 +644,7 @@ query_context_with_input_init(&context, c, flag, key, NULL, f, extra); while (r == 0) { //toku_ft_cursor_set_range will call c_getf_set_range_callback(..., context) (if query is successful) - r = toku_ft_cursor_set_range(dbc_struct_i(c)->c, key, key_bound, c_getf_set_range_callback, &context); + r = toku_ft_cursor_set_range(dbc_ftcursor(c), key, key_bound, c_getf_set_range_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -652,7 +655,7 @@ return r; } -static int c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_set_range_reverse_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_set_range_reverse(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -664,7 +667,7 @@ query_context_with_input_init(&context, c, flag, key, NULL, f, extra); while (r == 0) { //toku_ft_cursor_set_range_reverse will call c_getf_set_range_reverse_callback(..., context) (if query is successful) - r = toku_ft_cursor_set_range_reverse(dbc_struct_i(c)->c, key, c_getf_set_range_reverse_callback, &context); + r = toku_ft_cursor_set_range_reverse(dbc_ftcursor(c), key, c_getf_set_range_reverse_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -677,7 +680,7 @@ //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_set_range_reverse_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT_WITH_INPUT super_context = (QUERY_CONTEXT_WITH_INPUT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -708,14 +711,19 @@ return r; } -// Close a cursor. -int -toku_c_close(DBC * c) { + +int toku_c_close_internal(DBC *c) { HANDLE_PANICKED_DB(c->dbp); HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c); - toku_ft_cursor_close(dbc_struct_i(c)->c); + toku_ft_cursor_destroy(dbc_ftcursor(c)); toku_sdbt_cleanup(&dbc_struct_i(c)->skey_s); toku_sdbt_cleanup(&dbc_struct_i(c)->sval_s); + return 0; +} + +// Close a cursor. +int toku_c_close(DBC *c) { + toku_c_close_internal(c); toku_free(c); return 0; } @@ -739,7 +747,7 @@ DB *db = dbc->dbp; DB_TXN *txn = dbc_struct_i(dbc)->txn; HANDLE_PANICKED_DB(db); - toku_ft_cursor_set_range_lock(dbc_struct_i(dbc)->c, left_key, right_key, + toku_ft_cursor_set_range_lock(dbc_ftcursor(dbc), left_key, right_key, (left_key == toku_dbt_negative_infinity()), (right_key == toku_dbt_positive_infinity()), out_of_range_error); @@ -757,12 +765,12 @@ static void c_remove_restriction(DBC *dbc) { - toku_ft_cursor_remove_restriction(dbc_struct_i(dbc)->c); + toku_ft_cursor_remove_restriction(dbc_ftcursor(dbc)); } static void c_set_check_interrupt_callback(DBC* dbc, bool (*interrupt_callback)(void*), void *extra) { - toku_ft_cursor_set_check_interrupt_cb(dbc_struct_i(dbc)->c, interrupt_callback, extra); + toku_ft_cursor_set_check_interrupt_cb(dbc_ftcursor(dbc), interrupt_callback, extra); } int @@ -828,7 +836,7 @@ } int -toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC ** c, uint32_t flags, int is_temporary_cursor) { +toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC *c, uint32_t flags, int is_temporary_cursor) { HANDLE_PANICKED_DB(db); HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn); DB_ENV* env = db->dbenv; @@ -841,13 +849,7 @@ ); } - int r = 0; - - struct __toku_dbc_external *XMALLOC(eresult); // so the internal stuff is stuck on the end - memset(eresult, 0, sizeof(*eresult)); - DBC *result = &eresult->external_part; - -#define SCRS(name) result->name = name +#define SCRS(name) c->name = name SCRS(c_getf_first); SCRS(c_getf_last); SCRS(c_getf_next); @@ -861,59 +863,49 @@ SCRS(c_set_check_interrupt_callback); #undef SCRS - result->c_get = toku_c_get; - result->c_getf_set = toku_c_getf_set; - result->c_close = toku_c_close; - - result->dbp = db; - - dbc_struct_i(result)->txn = txn; - dbc_struct_i(result)->skey_s = (struct simple_dbt){0,0}; - dbc_struct_i(result)->sval_s = (struct simple_dbt){0,0}; + c->c_get = toku_c_get; + c->c_getf_set = toku_c_getf_set; + c->c_close = toku_c_close; + + c->dbp = db; + + dbc_struct_i(c)->txn = txn; + dbc_struct_i(c)->skey_s = (struct simple_dbt){0,0}; + dbc_struct_i(c)->sval_s = (struct simple_dbt){0,0}; if (is_temporary_cursor) { - dbc_struct_i(result)->skey = &db->i->skey; - dbc_struct_i(result)->sval = &db->i->sval; + dbc_struct_i(c)->skey = &db->i->skey; + dbc_struct_i(c)->sval = &db->i->sval; } else { - dbc_struct_i(result)->skey = &dbc_struct_i(result)->skey_s; - dbc_struct_i(result)->sval = &dbc_struct_i(result)->sval_s; + dbc_struct_i(c)->skey = &dbc_struct_i(c)->skey_s; + dbc_struct_i(c)->sval = &dbc_struct_i(c)->sval_s; } if (flags & DB_SERIALIZABLE) { - dbc_struct_i(result)->iso = TOKU_ISO_SERIALIZABLE; + dbc_struct_i(c)->iso = TOKU_ISO_SERIALIZABLE; } else { - dbc_struct_i(result)->iso = txn ? db_txn_struct_i(txn)->iso : TOKU_ISO_SERIALIZABLE; + dbc_struct_i(c)->iso = txn ? db_txn_struct_i(txn)->iso : TOKU_ISO_SERIALIZABLE; } - dbc_struct_i(result)->rmw = (flags & DB_RMW) != 0; + dbc_struct_i(c)->rmw = (flags & DB_RMW) != 0; bool is_snapshot_read = false; if (txn) { - is_snapshot_read = (dbc_struct_i(result)->iso == TOKU_ISO_READ_COMMITTED || - dbc_struct_i(result)->iso == TOKU_ISO_SNAPSHOT); + is_snapshot_read = (dbc_struct_i(c)->iso == TOKU_ISO_READ_COMMITTED || + dbc_struct_i(c)->iso == TOKU_ISO_SNAPSHOT); } - r = toku_ft_cursor( + int r = toku_ft_cursor_create( db->i->ft_handle, - &dbc_struct_i(result)->c, + dbc_ftcursor(c), txn ? db_txn_struct_i(txn)->tokutxn : NULL, is_snapshot_read, - ((flags & DBC_DISABLE_PREFETCHING) != 0) + ((flags & DBC_DISABLE_PREFETCHING) != 0), + is_temporary_cursor != 0 ); - assert(r == 0 || r == TOKUDB_MVCC_DICTIONARY_TOO_NEW); - if (r == 0) { - // Set the is_temporary_cursor boolean inside the ftnode so - // that a query only needing one cursor will not perform - // unecessary malloc calls. - if (is_temporary_cursor) { - toku_ft_cursor_set_temporary(dbc_struct_i(result)->c); - } - - *c = result; - } - else { - toku_free(result); + if (r != 0) { + invariant(r == TOKUDB_MVCC_DICTIONARY_TOO_NEW); } return r; } static inline int -autotxn_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) { +autotxn_db_cursor(DB *db, DB_TXN *txn, DBC *c, uint32_t flags) { if (!txn && (db->dbenv->i->open_flags & DB_INIT_TXN)) { return toku_ydb_do_error(db->dbenv, EINVAL, "Cursors in a transaction environment must have transactions.\n"); @@ -922,9 +914,14 @@ } // Create a cursor on a db. -int -toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) { - int r = autotxn_db_cursor(db, txn, c, flags); +int toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) { + DBC *XMALLOC(cursor); + int r = autotxn_db_cursor(db, txn, cursor, flags); + if (r == 0) { + *c = cursor; + } else { + toku_free(cursor); + } return r; } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_cursor.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_cursor.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_cursor.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_cursor.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,14 +86,12 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." // This file defines the public interface to the ydb library -#if !defined(TOKU_YDB_CURSOR_H) -#define TOKU_YDB_CURSOR_H - - typedef enum { YDB_C_LAYER_STATUS_NUM_ROWS = 0 /* number of rows in this status array */ } ydb_c_lock_layer_status_entry; @@ -107,10 +105,9 @@ int toku_c_get(DBC * c, DBT * key, DBT * data, uint32_t flag); int toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra); -int toku_c_close(DBC * c); -int toku_db_cursor_internal(DB *db, DB_TXN * txn, DBC **c, uint32_t flags, int is_temporary_cursor); -int toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags); - +int toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags); +int toku_db_cursor_internal(DB *db, DB_TXN * txn, DBC *c, uint32_t flags, int is_temporary_cursor); -#endif +int toku_c_close(DBC *c); +int toku_c_close_internal(DBC *c); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_db.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_db.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_db.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_db.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,14 +89,15 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" +#include + #include #include #include #include #include -#include -#include +#include #include "ydb_cursor.h" #include "ydb_row_lock.h" @@ -115,7 +116,7 @@ #endif #define STATUS_VALUE(x) ydb_db_layer_status.status[x].value.num -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_db_layer_status, k, c, t, l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_db_layer_status, k, c, t, l, inc) static void ydb_db_layer_status_init (void) { @@ -225,13 +226,13 @@ db_getf_set(DB *db, DB_TXN *txn, uint32_t flags, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) { HANDLE_PANICKED_DB(db); HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn); - DBC *c; + DBC c; uint32_t create_flags = flags & (DB_ISOLATION_FLAGS | DB_RMW); flags &= ~DB_ISOLATION_FLAGS; int r = toku_db_cursor_internal(db, txn, &c, create_flags | DBC_DISABLE_PREFETCHING, 1); if (r==0) { - r = toku_c_getf_set(c, flags, key, f, extra); - int r2 = toku_c_close(c); + r = toku_c_getf_set(&c, flags, key, f, extra); + int r2 = toku_c_close_internal(&c); if (r==0) r = r2; } return r; @@ -258,12 +259,12 @@ // And DB_GET_BOTH is no longer supported. #2862. if (flags != 0) return EINVAL; - DBC *dbc; + DBC dbc; r = toku_db_cursor_internal(db, txn, &dbc, iso_flags | DBC_DISABLE_PREFETCHING, 1); if (r!=0) return r; uint32_t c_get_flags = DB_SET; - r = toku_c_get(dbc, key, data, c_get_flags | lock_flags); - int r2 = toku_c_close(dbc); + r = toku_c_get(&dbc, key, data, c_get_flags | lock_flags); + int r2 = toku_c_close_internal(&dbc); return r ? r : r2; } @@ -390,10 +391,12 @@ // locktree's descriptor pointer if necessary static void db_set_descriptors(DB *db, FT_HANDLE ft_handle) { + const toku::comparator &cmp = toku_ft_get_comparator(ft_handle); db->descriptor = toku_ft_get_descriptor(ft_handle); db->cmp_descriptor = toku_ft_get_cmp_descriptor(ft_handle); + invariant(db->cmp_descriptor == cmp.get_descriptor()); if (db->i->lt) { - db->i->lt->set_descriptor(db->cmp_descriptor); + db->i->lt->set_comparator(cmp); } } @@ -430,8 +433,27 @@ toku_ft_handle_close(ft_handle); } -int -toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t flags, int mode) { +// Instruct db to use the default (built-in) key comparison function +// by setting the flag bits in the db and ft structs +int toku_db_use_builtin_key_cmp(DB *db) { + HANDLE_PANICKED_DB(db); + int r = 0; + if (db_opened(db)) { + r = toku_ydb_do_error(db->dbenv, EINVAL, "Comparison functions cannot be set after DB open.\n"); + } else if (db->i->key_compare_was_set) { + r = toku_ydb_do_error(db->dbenv, EINVAL, "Key comparison function already set.\n"); + } else { + uint32_t tflags; + toku_ft_get_flags(db->i->ft_handle, &tflags); + + tflags |= TOKU_DB_KEYCMP_BUILTIN; + toku_ft_set_flags(db->i->ft_handle, tflags); + db->i->key_compare_was_set = true; + } + return r; +} + +int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t flags, int mode) { //Set comparison functions if not yet set. HANDLE_READ_ONLY_TXN(txn); if (!db->i->key_compare_was_set && db->dbenv->i->bt_compare) { @@ -474,9 +496,9 @@ int r = toku_ft_handle_open(ft_handle, iname_in_env, is_db_create, is_db_excl, db->dbenv->i->cachetable, - txn ? db_txn_struct_i(txn)->tokutxn : NULL_TXN); + txn ? db_txn_struct_i(txn)->tokutxn : nullptr); if (r != 0) { - goto error_cleanup; + goto out; } // if the dictionary was opened as a blackhole, mark the @@ -497,26 +519,27 @@ .txn = txn, .ft_handle = db->i->ft_handle, }; - db->i->lt = db->dbenv->i->ltm.get_lt( - db->i->dict_id, - db->cmp_descriptor, - toku_ft_get_bt_compare(db->i->ft_handle), - &on_create_extra); + db->i->lt = db->dbenv->i->ltm.get_lt(db->i->dict_id, + toku_ft_get_comparator(db->i->ft_handle), + &on_create_extra); if (db->i->lt == nullptr) { r = errno; - if (r == 0) + if (r == 0) { r = EINVAL; - goto error_cleanup; + } + goto out; } } - return 0; + r = 0; -error_cleanup: - db->i->dict_id = DICTIONARY_ID_NONE; - db->i->opened = 0; - if (db->i->lt) { - db->dbenv->i->ltm.release_lt(db->i->lt); - db->i->lt = NULL; +out: + if (r != 0) { + db->i->dict_id = DICTIONARY_ID_NONE; + db->i->opened = 0; + if (db->i->lt) { + db->dbenv->i->ltm.release_lt(db->i->lt); + db->i->lt = nullptr; + } } return r; } @@ -565,11 +588,12 @@ HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn); int r = 0; TOKUTXN ttxn = txn ? db_txn_struct_i(txn)->tokutxn : NULL; - DBT old_descriptor; bool is_db_hot_index = ((flags & DB_IS_HOT_INDEX) != 0); bool update_cmp_descriptor = ((flags & DB_UPDATE_CMP_DESCRIPTOR) != 0); - toku_init_dbt(&old_descriptor); + DBT old_descriptor_dbt; + toku_init_dbt(&old_descriptor_dbt); + if (!db_opened(db) || !descriptor || (descriptor->size>0 && !descriptor->data)){ r = EINVAL; goto cleanup; @@ -582,23 +606,12 @@ if (r != 0) { goto cleanup; } } - // TODO: use toku_clone_dbt(&old-descriptor, db->descriptor); - old_descriptor.size = db->descriptor->dbt.size; - old_descriptor.data = toku_memdup(db->descriptor->dbt.data, db->descriptor->dbt.size); - - toku_ft_change_descriptor( - db->i->ft_handle, - &old_descriptor, - descriptor, - true, - ttxn, - update_cmp_descriptor - ); + toku_clone_dbt(&old_descriptor_dbt, db->descriptor->dbt); + toku_ft_change_descriptor(db->i->ft_handle, &old_descriptor_dbt, descriptor, + true, ttxn, update_cmp_descriptor); cleanup: - if (old_descriptor.data) { - toku_free(old_descriptor.data); - } + toku_destroy_dbt(&old_descriptor_dbt); return r; } @@ -713,6 +726,15 @@ } static int +toku_db_set_memcmp_magic(DB *db, uint8_t magic) { + HANDLE_PANICKED_DB(db); + if (db_opened(db)) { + return EINVAL; + } + return toku_ft_handle_set_memcmp_magic(db->i->ft_handle, magic); +} + +static int toku_db_get_fractal_tree_info64(DB *db, uint64_t *num_blocks_allocated, uint64_t *num_blocks_in_use, uint64_t *size_allocated, uint64_t *size_in_use) { HANDLE_PANICKED_DB(db); struct ftinfo64 ftinfo; @@ -950,7 +972,7 @@ }; static int -db_get_last_key_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) { +db_get_last_key_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) { if (!lock_only) { DBT keydbt; toku_fill_dbt(&keydbt, key, keylen); @@ -1109,6 +1131,7 @@ USDB(change_compression_method); USDB(set_fanout); USDB(get_fanout); + USDB(set_memcmp_magic); USDB(change_fanout); USDB(set_flags); USDB(get_flags); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_db.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_db.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_db.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_db.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_YDB_DB_H -#define TOKU_YDB_DB_H +#pragma once #include @@ -128,11 +127,11 @@ return db->i->opened != 0; } -static inline ft_compare_func -toku_db_get_compare_fun(DB* db) { - return toku_ft_get_bt_compare(db->i->ft_handle); +static inline const toku::comparator &toku_db_get_comparator(DB *db) { + return toku_ft_get_comparator(db->i->ft_handle); } +int toku_db_use_builtin_key_cmp(DB *db); int toku_db_pre_acquire_fileops_lock(DB *db, DB_TXN *txn); int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname, uint32_t flags, int mode); int toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn); @@ -173,5 +172,3 @@ } return r; } - -#endif /* TOKU_YDB_DB_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_env_func.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_env_func.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_env_func.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_env_func.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,17 +89,19 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" +#include + #include #include #include +#include #include #include #include -#include -#include -#include +#include +#include #include "ydb_env_func.h" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_env_func.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_env_func.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_env_func.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_env_func.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,10 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// This file defines the public interface to the ydb library - -#if !defined(TOKU_YDB_ENV_FUNC_H) -#define TOKU_YDB_ENV_FUNC_H extern void (*checkpoint_callback_f)(void*); extern void * checkpoint_callback_extra; @@ -105,5 +103,3 @@ // Test-only function void toku_env_increase_last_xid(DB_ENV *env, uint64_t increment); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,11 +88,8 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// This file defines the public interface to the ydb library - -#if !defined(TOKU_YDB_INTERFACE_H) -#define TOKU_YDB_INTERFACE_H +#pragma once // Initialize the ydb library globals. // Called when the ydb library is loaded. @@ -114,5 +111,3 @@ // test-only function extern "C" int toku_test_get_checkpointing_user_data_status(void) __attribute__((__visibility__("default"))); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb-internal.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb-internal.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb-internal.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb-internal.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef YDB_INTERNAL_H -#define YDB_INTERNAL_H /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,17 +86,22 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "$Id$" #include #include -#include -#include -#include +#include +#include +#include +#include +#include #include +#include #include #include @@ -276,7 +279,7 @@ #define db_txn_struct_i(x) (&((struct __toku_db_txn_external *)x)->internal_part) struct __toku_dbc_internal { - struct ft_cursor *c; + struct ft_cursor ftcursor; DB_TXN *txn; TOKU_ISOLATION iso; struct simple_dbt skey_s,sval_s; @@ -287,12 +290,21 @@ bool rmw; }; -struct __toku_dbc_external { - struct __toku_dbc external_part; - struct __toku_dbc_internal internal_part; -}; - -#define dbc_struct_i(x) (&((struct __toku_dbc_external *)x)->internal_part) +static_assert(sizeof(__toku_dbc_internal) <= sizeof(((DBC *) nullptr)->_internal), + "__toku_dbc_internal doesn't fit in the internal portion of a DBC"); + +static inline __toku_dbc_internal *dbc_struct_i(DBC *c) { + union dbc_union { + __toku_dbc_internal *dbc_internal; + char *buf; + } u; + u.buf = c->_internal; + return u.dbc_internal; +} + +static inline struct ft_cursor *dbc_ftcursor(DBC *c) { + return &dbc_struct_i(c)->ftcursor; +} static inline int env_opened(DB_ENV *env) { @@ -312,5 +324,3 @@ void env_panic(DB_ENV * env, int cause, const char * msg); void env_note_db_opened(DB_ENV *env, DB *db); void env_note_db_closed(DB_ENV *env, DB *db); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_lib.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_lib.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_lib.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_lib.cc 2014-10-08 13:19:52.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,14 +98,12 @@ #if defined(__GNUC__) -static void __attribute__((constructor)) libtokudb_init(void) { - // printf("%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__); +static void __attribute__((constructor)) libtokuft_init(void) { int r = toku_ydb_init(); assert(r==0); } -static void __attribute__((destructor)) libtokudb_destroy(void) { - // printf("%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__); +static void __attribute__((destructor)) libtokuft_destroy(void) { toku_ydb_destroy(); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_load.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_load.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_load.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_load.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,8 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: #ident "$Id$" -#ifndef YDB_LOAD_H -#define YDB_LOAD_H /* COPYING CONDITIONS NOTICE: @@ -32,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +87,7 @@ under this License. */ -#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." +#pragma once /* ydb functions used by loader */ @@ -113,5 +111,3 @@ char * new_inames_in_env[/*N*/], /* out */ LSN *load_lsn, bool mark_as_loader); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_row_lock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_row_lock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_row_lock.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_row_lock.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,6 +89,8 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" +#include + #include #include @@ -144,11 +146,11 @@ } // add a new lock range to this txn's row lock buffer - size_t old_num_bytes = ranges.buffer->get_num_bytes(); + size_t old_mem_size = ranges.buffer->total_memory_size(); ranges.buffer->append(left_key, right_key); - size_t new_num_bytes = ranges.buffer->get_num_bytes(); - invariant(new_num_bytes > old_num_bytes); - lt->get_manager()->note_mem_used(new_num_bytes - old_num_bytes); + size_t new_mem_size = ranges.buffer->total_memory_size(); + invariant(new_mem_size > old_mem_size); + lt->get_manager()->note_mem_used(new_mem_size - old_mem_size); toku_mutex_unlock(&db_txn_struct_i(txn)->txn_mutex); } @@ -201,17 +203,16 @@ // // We could theoretically steal the memory from the caller instead of copying // it, but it's simpler to have a callback API that doesn't transfer memory ownership. - lt->get_manager()->note_mem_released(ranges.buffer->get_num_bytes()); + lt->get_manager()->note_mem_released(ranges.buffer->total_memory_size()); ranges.buffer->destroy(); ranges.buffer->create(); - toku::range_buffer::iterator iter; + toku::range_buffer::iterator iter(&buffer); toku::range_buffer::iterator::record rec; - iter.create(&buffer); while (iter.current(&rec)) { ranges.buffer->append(rec.get_left_key(), rec.get_right_key()); iter.next(); } - lt->get_manager()->note_mem_used(ranges.buffer->get_num_bytes()); + lt->get_manager()->note_mem_used(ranges.buffer->total_memory_size()); } else { // In rare cases, we may not find the associated locktree, because we are // racing with the transaction trying to add this locktree to the lt map @@ -315,7 +316,7 @@ // release all of the locks this txn has ever successfully // acquired and stored in the range buffer for this locktree lt->release_locks(txnid, ranges->buffer); - lt->get_manager()->note_mem_released(ranges->buffer->get_num_bytes()); + lt->get_manager()->note_mem_released(ranges->buffer->total_memory_size()); ranges->buffer->destroy(); toku_free(ranges->buffer); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_row_lock.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_row_lock.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_row_lock.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_row_lock.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_YDB_ROW_LOCK_H -#define TOKU_YDB_ROW_LOCK_H +#pragma once #include @@ -113,5 +112,3 @@ void toku_db_grab_write_lock(DB *db, DBT *key, TOKUTXN tokutxn); void toku_db_release_lt_key_ranges(DB_TXN *txn, txn_lt_key_ranges *ranges); - -#endif /* TOKU_YDB_ROW_LOCK_H */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_txn.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_txn.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_txn.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_txn.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,15 +89,18 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" -#include +#include #include -#include -#include -#include +#include #include +#include +#include +#include + + #include "ydb-internal.h" #include "ydb_txn.h" #include "ydb_row_lock.h" @@ -205,12 +208,6 @@ return r; } -static uint32_t toku_txn_id(DB_TXN * txn) { - HANDLE_PANICKED_ENV(txn->mgrp); - abort(); - return (uint32_t) -1; -} - static int toku_txn_abort(DB_TXN * txn, TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) { HANDLE_PANICKED_ENV(txn->mgrp); @@ -387,6 +384,44 @@ return toku_txn_get_client_id(db_txn_struct_i(txn)->tokutxn); } +static int toku_txn_discard(DB_TXN *txn, uint32_t flags) { + // check parameters + if (flags != 0) + return EINVAL; + TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; + if (toku_txn_get_state(ttxn) != TOKUTXN_PREPARING) + return EINVAL; + + bool low_priority; + if (toku_is_big_tokutxn(ttxn)) { + low_priority = true; + toku_low_priority_multi_operation_client_lock(); + } else { + low_priority = false; + toku_multi_operation_client_lock(); + } + + // discard + toku_txn_discard_txn(ttxn); + + // complete + toku_txn_complete_txn(ttxn); + + // release locks + toku_txn_release_locks(txn); + + if (low_priority) { + toku_low_priority_multi_operation_client_unlock(); + } else { + toku_multi_operation_client_unlock(); + } + + // destroy + toku_txn_destroy(txn); + + return 0; +} + static inline void txn_func_init(DB_TXN *txn) { #define STXN(name) txn->name = locked_txn_ ## name STXN(abort); @@ -400,8 +435,8 @@ #define SUTXN(name) txn->name = toku_txn_ ## name SUTXN(prepare); SUTXN(xa_prepare); + SUTXN(discard); #undef SUTXN - txn->id = toku_txn_id; txn->id64 = toku_txn_id64; } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_txn.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_txn.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_txn.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_txn.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,10 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// This file defines the public interface to the ydb library - -#if !defined(TOKU_YDB_TXN_H) -#define TOKU_YDB_TXN_H // begin, commit, and abort use the multi operation lock // internally to synchronize with begin checkpoint. callers @@ -112,5 +110,3 @@ // Test-only function extern "C" void toku_increase_last_xid(DB_ENV *env, uint64_t increment) __attribute__((__visibility__("default"))); - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_write.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_write.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_write.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_write.cc 2014-10-08 13:19:51.000000000 +0000 @@ -28,7 +28,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +89,13 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" +#include + #include #include "ydb-internal.h" #include "indexer.h" #include -#include +#include #include "ydb_row_lock.h" #include "ydb_write.h" #include "ydb_db.h" @@ -106,7 +108,7 @@ #endif #define STATUS_VALUE(x) ydb_write_layer_status.status[x].value.num -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_write_layer_status, k, c, t, l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_write_layer_status, k, c, t, l, inc) static void ydb_write_layer_status_init (void) { @@ -951,8 +953,8 @@ } else if (idx_old == old_keys.size) { cmp = +1; } else { - ft_compare_func cmpfun = toku_db_get_compare_fun(db); - cmp = cmpfun(db, curr_old_key, curr_new_key); + const toku::comparator &cmpfn = toku_db_get_comparator(db); + cmp = cmpfn(curr_old_key, curr_new_key); } bool do_del = false; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_write.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_write.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/src/ydb_write.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/src/ydb_write.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,11 +88,8 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// This file defines the public interface to the ydb library - -#if !defined(TOKU_YDB_WRITE_H) -#define TOKU_YDB_WRITE_H +#pragma once typedef enum { YDB_LAYER_NUM_INSERTS = 0, @@ -119,7 +116,6 @@ void ydb_write_layer_get_status(YDB_WRITE_LAYER_STATUS statp); - int toku_db_del(DB *db, DB_TXN *txn, DBT *key, uint32_t flags, bool holds_mo_lock); int toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, uint32_t flags, bool holds_mo_lock); int autotxn_db_del(DB* db, DB_TXN* txn, DBT* key, uint32_t flags); @@ -159,8 +155,3 @@ uint32_t num_keys, DBT_ARRAY keys[], uint32_t num_vals, DBT_ARRAY vals[] ); - - - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/ba_replay.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/ba_replay.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/ba_replay.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/ba_replay.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,679 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +// Replay a block allocator trace against different strategies and compare +// the results + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "ft/serialize/block_allocator.h" + +using std::map; +using std::set; +using std::string; +using std::vector; + +static int verbose = false; + +static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) { + if (!pred) { + fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line); + abort(); + } +} + +static char *trim_whitespace(char *line) { + // skip leading whitespace + while (isspace(*line)) { + line++; + } + return line; +} + +static int64_t parse_number(char **ptr, int line_num, int base) { + *ptr = trim_whitespace(*ptr); + char *line = *ptr; + + char *new_ptr; + int64_t n = strtoll(line, &new_ptr, base); + ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num); + ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num); + *ptr = new_ptr; + return n; +} + +static uint64_t parse_uint64(char **ptr, int line_num) { + int64_t n = parse_number(ptr, line_num, 10); + // we happen to know that the uint64's we deal with will + // take less than 63 bits (they come from pointers) + return static_cast(n); +} + +static string parse_token(char **ptr, int line_num) { + *ptr = trim_whitespace(*ptr); + char *line = *ptr; + + // parse the first token, which represents the traced function + char token[64]; + int r = sscanf(*ptr, "%64s", token); + ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num); + *ptr += strlen(token); + return string(token); +} + +static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) { + *ptr = trim_whitespace(*ptr); + char *line = *ptr; + + uint64_t offset, size; + int bytes_read; + int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read); + ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num); + *ptr += bytes_read; + return block_allocator::blockpair(offset, size); +} + +static char *strip_newline(char *line, bool *found) { + char *ptr = strchr(line, '\n'); + if (ptr != nullptr) { + if (found != nullptr) { + *found = true; + } + *ptr = '\0'; + } + return line; +} + +static char *read_trace_line(FILE *file) { + const int buf_size = 4096; + char buf[buf_size]; + std::stringstream ss; + while (true) { + if (fgets(buf, buf_size, file) == nullptr) { + break; + } + bool has_newline = false; + ss << strip_newline(buf, &has_newline); + if (has_newline) { + // end of the line, we're done out + break; + } + } + std::string s = ss.str(); + return s.size() ? toku_strdup(s.c_str()) : nullptr; +} + +static vector canonicalize_trace_from(FILE *file) { + // new trace, canonicalized from a raw trace + vector canonicalized_trace; + + // raw allocator id -> canonical allocator id + // + // keeps track of allocators that were created as part of the trace, + // and therefore will be part of the canonicalized trace. + uint64_t allocator_id_seq_num = 0; + map allocator_ids; + + // allocated offset -> allocation seq num + // + uint64_t allocation_seq_num = 0; + static const uint64_t ASN_NONE = (uint64_t) -1; + typedef map offset_seq_map; + + // raw allocator id -> offset_seq_map that tracks its allocations + map offset_to_seq_num_maps; + + int line_num = 0; + char *line; + while ((line = read_trace_line(file)) != nullptr) { + line_num++; + char *ptr = line; + + string fn = parse_token(&ptr, line_num); + int64_t allocator_id = parse_number(&ptr, line_num, 16); + + std::stringstream ss; + if (fn.find("ba_trace_create") != string::npos) { + ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num); + ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs", + "corrupted trace: bad fn", line, line_num); + + // we only convert the allocator_id to an allocator_id_seq_num + // in the canonical trace and leave the rest of the line as-is. + allocator_ids[allocator_id] = allocator_id_seq_num; + ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl; + allocator_id_seq_num++; + + // First, read passed the reserve / alignment values. + (void) parse_uint64(&ptr, line_num); + (void) parse_uint64(&ptr, line_num); + if (fn == "ba_trace_create_from_blockpairs") { + // For each blockpair created by this traceline, add its offset to the offset seq map + // with asn ASN_NONE so that later canonicalizations of `free' know whether to write + // down the asn or the raw offset. + offset_seq_map *map = &offset_to_seq_num_maps[allocator_id]; + while (*trim_whitespace(ptr) != '\0') { + const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num); + (*map)[bp.offset] = ASN_NONE; + } + } + } else { + ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num); + uint64_t canonical_allocator_id = allocator_ids[allocator_id]; + + // this is the map that tracks allocations for this allocator + offset_seq_map *map = &offset_to_seq_num_maps[allocator_id]; + + if (fn == "ba_trace_alloc") { + const uint64_t size = parse_uint64(&ptr, line_num); + const uint64_t heat = parse_uint64(&ptr, line_num); + const uint64_t offset = parse_uint64(&ptr, line_num); + ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num); + + // remember that an allocation at `offset' has the current alloc seq num + (*map)[offset] = allocation_seq_num; + + // translate `offset = alloc(size)' to `asn = alloc(size)' + ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl; + allocation_seq_num++; + } else if (fn == "ba_trace_free") { + const uint64_t offset = parse_uint64(&ptr, line_num); + ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num); + + // get the alloc seq num for an allcation that occurred at `offset' + const uint64_t asn = (*map)[offset]; + map->erase(offset); + + // if there's an asn, then a corresponding ba_trace_alloc occurred and we should + // write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs + // and we write the original offset. + if (asn != ASN_NONE) { + ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl; + } else { + ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl; + } + } else if (fn == "ba_trace_destroy") { + // Remove this allocator from both maps + allocator_ids.erase(allocator_id); + offset_to_seq_num_maps.erase(allocator_id); + + // translate `destroy(ptr_id) to destroy(canonical_id)' + ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl; + } else { + ba_replay_assert(false, "corrupted trace: bad fn", line, line_num); + } + } + canonicalized_trace.push_back(ss.str()); + + toku_free(line); + } + + if (allocator_ids.size() != 0) { + fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running"); + } + + return canonicalized_trace; +} + +struct streaming_variance_calculator { + int64_t n_samples; + int64_t mean; + int64_t variance; + + // math credit: AoCP, Donald Knuth, '62 + void add_sample(int64_t x) { + n_samples++; + if (n_samples == 1) { + mean = x; + variance = 0; + } else { + int64_t old_mean = mean; + mean = old_mean + ((x - old_mean) / n_samples); + variance = (((n_samples - 1) * variance) + + ((x - old_mean) * (x - mean))) / n_samples; + } + } +}; + +struct canonical_trace_stats { + uint64_t n_lines_replayed; + + uint64_t n_create; + uint64_t n_create_from_blockpairs; + uint64_t n_alloc_hot; + uint64_t n_alloc_cold; + uint64_t n_free; + uint64_t n_destroy; + + struct streaming_variance_calculator alloc_hot_bytes; + struct streaming_variance_calculator alloc_cold_bytes; + + canonical_trace_stats() { + memset(this, 0, sizeof(*this)); + } +}; + +struct fragmentation_report { + TOKU_DB_FRAGMENTATION_S beginning; + TOKU_DB_FRAGMENTATION_S end; + fragmentation_report() { + memset(this, 0, sizeof(*this)); + } + void merge(const struct fragmentation_report &src_report) { + for (int i = 0; i < 2; i++) { + TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end; + const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end; + dst->file_size_bytes += src->file_size_bytes; + dst->data_bytes += src->data_bytes; + dst->data_blocks += src->data_blocks; + dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional; + dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional; + dst->unused_bytes += src->unused_bytes; + dst->unused_blocks += src->unused_blocks; + dst->largest_unused_block += src->largest_unused_block; + } + } +}; + +static void replay_canonicalized_trace(const vector &canonicalized_trace, + block_allocator::allocation_strategy strategy, + map *reports, + struct canonical_trace_stats *stats) { + // maps an allocator id to its block allocator + map allocator_map; + + // maps allocation seq num to allocated offset + map seq_num_to_offset; + + for (vector::const_iterator it = canonicalized_trace.begin(); + it != canonicalized_trace.end(); it++) { + const int line_num = stats->n_lines_replayed++; + + char *line = toku_strdup(it->c_str()); + line = strip_newline(line, nullptr); + + char *ptr = trim_whitespace(line); + + // canonical allocator id is in base 10, not 16 + string fn = parse_token(&ptr, line_num); + int64_t allocator_id = parse_number(&ptr, line_num, 10); + + if (fn.find("ba_trace_create") != string::npos) { + const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num); + const uint64_t alignment = parse_uint64(&ptr, line_num); + ba_replay_assert(allocator_map.count(allocator_id) == 0, + "corrupted canonical trace: double create", line, line_num); + + block_allocator *ba = new block_allocator(); + if (fn == "ba_trace_create") { + ba->create(reserve_at_beginning, alignment); + stats->n_create++; + } else { + ba_replay_assert(fn == "ba_trace_create_from_blockpairs", + "corrupted canonical trace: bad create fn", line, line_num); + vector pairs; + while (*trim_whitespace(ptr) != '\0') { + const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num); + pairs.push_back(bp); + } + ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size()); + stats->n_create_from_blockpairs++; + } + ba->set_strategy(strategy); + + TOKU_DB_FRAGMENTATION_S report; + ba->get_statistics(&report); + (*reports)[allocator_id].beginning = report; + allocator_map[allocator_id] = ba; + } else { + ba_replay_assert(allocator_map.count(allocator_id) > 0, + "corrupted canonical trace: no such allocator", line, line_num); + + block_allocator *ba = allocator_map[allocator_id]; + if (fn == "ba_trace_alloc") { + // replay an `alloc' whose result will be associated with a certain asn + const uint64_t size = parse_uint64(&ptr, line_num); + const uint64_t heat = parse_uint64(&ptr, line_num); + const uint64_t asn = parse_uint64(&ptr, line_num); + ba_replay_assert(seq_num_to_offset.count(asn) == 0, + "corrupted canonical trace: double alloc (asn in use)", line, line_num); + + uint64_t offset; + ba->alloc_block(size, heat, &offset); + seq_num_to_offset[asn] = offset; + heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++; + heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size); + } else if (fn == "ba_trace_free_asn") { + // replay a `free' on a block whose offset is the result of an alloc with an asn + const uint64_t asn = parse_uint64(&ptr, line_num); + ba_replay_assert(seq_num_to_offset.count(asn) == 1, + "corrupted canonical trace: double free (asn unused)", line, line_num); + + const uint64_t offset = seq_num_to_offset[asn]; + ba->free_block(offset); + seq_num_to_offset.erase(asn); + stats->n_free++; + } else if (fn == "ba_trace_free_offset") { + // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs + const uint64_t offset = parse_uint64(&ptr, line_num); + ba->free_block(offset); + stats->n_free++; + } else if (fn == "ba_trace_destroy") { + TOKU_DB_FRAGMENTATION_S report; + ba->get_statistics(&report); + ba->destroy(); + (*reports)[allocator_id].end = report; + allocator_map.erase(allocator_id); + stats->n_destroy++; + } else { + ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num); + } + } + + toku_free(line); + } +} + +static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) { + switch (strategy) { + case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT: + return "first-fit"; + case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT: + return "best-fit"; + case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE: + return "heat-zone"; + case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT: + return "padded-fit"; + default: + abort(); + } +} + +static block_allocator::allocation_strategy cstring_to_strategy(const char *str) { + if (strcmp(str, "first-fit") == 0) { + return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT; + } + if (strcmp(str, "best-fit") == 0) { + return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT; + } + if (strcmp(str, "heat-zone") == 0) { + return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE; + } + if (strcmp(str, "padded-fit") != 0) { + fprintf(stderr, "bad strategy string: %s\n", str); + abort(); + } + return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT; +} + +static void print_result_verbose(uint64_t allocator_id, + block_allocator::allocation_strategy strategy, + const struct fragmentation_report &report) { + if (report.end.data_bytes + report.end.unused_bytes + + report.beginning.data_bytes + report.beginning.unused_bytes + < 32UL * 1024 * 1024) { + printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id); + return; + } + + printf(" allocator_id: %20" PRId64 "\n", allocator_id); + printf(" strategy: %20s\n", strategy_to_cstring(strategy)); + + for (int i = 0; i < 2; i++) { + const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end; + printf("%s\n", i == 0 ? "BEFORE" : "AFTER"); + + uint64_t total_bytes = r->data_bytes + r->unused_bytes; + uint64_t total_blocks = r->data_blocks + r->unused_blocks; + + // byte statistics + printf(" total bytes: %20" PRId64 "\n", total_bytes); + printf(" used bytes: %20" PRId64 " (%.3lf)\n", r->data_bytes, + static_cast(r->data_bytes) / total_bytes); + printf(" unused bytes: %20" PRId64 " (%.3lf)\n", r->unused_bytes, + static_cast(r->unused_bytes) / total_bytes); + + // block statistics + printf(" total blocks: %20" PRId64 "\n", total_blocks); + printf(" used blocks: %20" PRId64 " (%.3lf)\n", r->data_blocks, + static_cast(r->data_blocks) / total_blocks); + printf(" unused blocks: %20" PRId64 " (%.3lf)\n", r->unused_blocks, + static_cast(r->unused_blocks) / total_blocks); + + // misc + printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block); + } +} + +static void print_result(uint64_t allocator_id, + block_allocator::allocation_strategy strategy, + const struct fragmentation_report &report) { + const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning; + const TOKU_DB_FRAGMENTATION_S *end = &report.end; + + uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes; + uint64_t total_end_bytes = end->data_bytes + end->unused_bytes; + if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) { + if (verbose) { + printf("\n"); + printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id); + } + return; + } + printf("\n"); + if (verbose) { + print_result_verbose(allocator_id, strategy, report); + } else { + printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n", + strategy_to_cstring(strategy), allocator_id, + static_cast(report.end.data_bytes) / total_end_bytes, + static_cast(report.beginning.data_bytes) / total_beginning_bytes); + } +} + +static int only_aggregate_reports; + +static struct option getopt_options[] = { + { "verbose", no_argument, &verbose, 1 }, + { "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 }, + { "include-strategy", required_argument, nullptr, 'i' }, + { "exclude-strategy", required_argument, nullptr, 'x' }, + { nullptr, 0, nullptr, 0 }, +}; + +int main(int argc, char *argv[]) { + int opt; + set candidate_strategies, excluded_strategies; + while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) { + switch (opt) { + case 0: + break; + case 'i': + candidate_strategies.insert(cstring_to_strategy(optarg)); + break; + case 'x': + excluded_strategies.insert(cstring_to_strategy(optarg)); + break; + case '?': + default: + abort(); + }; + } + // Default to everything if nothing was explicitly included. + if (candidate_strategies.empty()) { + candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT); + candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT); + candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT); + candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE); + } + // ..but remove anything that was explicitly excluded + for (set::const_iterator it = excluded_strategies.begin(); + it != excluded_strategies.end(); it++) { + candidate_strategies.erase(*it); + } + + // Run the real trace + // + // First, read the raw trace from stdin + vector canonicalized_trace = canonicalize_trace_from(stdin); + + if (!only_aggregate_reports) { + printf("\n"); + printf("Individual reports, by allocator:\n"); + } + + struct canonical_trace_stats stats; + map reports_by_strategy; + for (set::const_iterator it = candidate_strategies.begin(); + it != candidate_strategies.end(); it++) { + const block_allocator::allocation_strategy strategy(*it); + + // replay the canonicalized trace against the current strategy. + // + // we provided the allocator map so we can gather statistics later + struct canonical_trace_stats dummy_stats; + map reports; + replay_canonicalized_trace(canonicalized_trace, strategy, &reports, + // Only need to gather canonical trace stats once + it == candidate_strategies.begin() ? &stats : &dummy_stats); + + struct fragmentation_report aggregate_report; + memset(&aggregate_report, 0, sizeof(aggregate_report)); + for (map::iterator rp = reports.begin(); + rp != reports.end(); rp++) { + const struct fragmentation_report &report = rp->second; + aggregate_report.merge(report); + if (!only_aggregate_reports) { + print_result(rp->first, strategy, report); + } + } + reports_by_strategy[strategy] = aggregate_report; + } + + printf("\n"); + printf("Aggregate reports, by strategy:\n"); + + for (map::iterator it = reports_by_strategy.begin(); + it != reports_by_strategy.end(); it++) { + print_result(0, it->first, it->second); + } + + printf("\n"); + printf("Overall trace stats:\n"); + printf("\n"); + printf(" n_lines_played: %15" PRIu64 "\n", stats.n_lines_replayed); + printf(" n_create: %15" PRIu64 "\n", stats.n_create); + printf(" n_create_from_blockpairs: %15" PRIu64 "\n", stats.n_create_from_blockpairs); + printf(" n_alloc_hot: %15" PRIu64 "\n", stats.n_alloc_hot); + printf(" n_alloc_cold: %15" PRIu64 "\n", stats.n_alloc_cold); + printf(" n_free: %15" PRIu64 "\n", stats.n_free); + printf(" n_destroy: %15" PRIu64 "\n", stats.n_destroy); + printf("\n"); + printf(" avg_alloc_hot: %15" PRIu64 "\n", stats.alloc_hot_bytes.mean); + printf(" stddev_alloc_hot: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance)); + printf(" avg_alloc_cold: %15" PRIu64 "\n", stats.alloc_cold_bytes.mean); + printf(" stddev_alloc_cold: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance)); + printf("\n"); + + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/CMakeLists.txt mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/CMakeLists.txt 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/CMakeLists.txt 2014-10-08 13:19:52.000000000 +0000 @@ -1,11 +1,20 @@ set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO) -set(utils tokudb_gen tokudb_load tokudb_dump) -foreach(util ${utils}) - add_executable(${util} ${util}.cc) - set_target_properties(${util} PROPERTIES - COMPILE_DEFINITIONS "IS_TDB=1;USE_TDB=1;TDB_IS_STATIC=1") - target_link_libraries(${util} ${LIBTOKUDB}_static ft_static z lzma ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) +set(tools tokudb_dump tokuftdump tdb_logprint tdb-recover ftverify ba_replay) +foreach(tool ${tools}) + add_executable(${tool} ${tool}.cc) + add_dependencies(${tool} install_tdb_h) + target_link_libraries(${tool} ${LIBTOKUDB}_static ft_static z lzma ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) + + add_space_separated_property(TARGET ${tool} COMPILE_FLAGS -fvisibility=hidden) +endforeach(tool) + +# link in math.h library just for this tool. +target_link_libraries(ftverify m) + +install( + TARGETS tokuftdump + DESTINATION bin + COMPONENT Server + ) - add_space_separated_property(TARGET ${util} COMPILE_FLAGS -fvisibility=hidden) -endforeach(util) diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/ftverify.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/ftverify.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/ftverify.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/ftverify.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,507 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +//////////////////////////////////////////////////////////////////// +// ftverify - Command line tool that checks the validity of a given +// fractal tree file, one block at a time. +//////////////////////////////////////////////////////////////////// + +#include + +#include "portability/toku_assert.h" +#include "portability/toku_list.h" +#include "portability/toku_portability.h" + +#include "ft/serialize/block_allocator.h" +#include "ft/ft-internal.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_layout_version.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/node.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/sub_block.h" +#include "util/threadpool.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +static int num_cores = 0; // cache the number of cores for the parallelization +static struct toku_thread_pool *ft_pool = NULL; +static FILE *outf; +static double pct = 0.5; + +// Struct for reporting sub block stats. +struct verify_block_extra { + BLOCKNUM b; + int n_sub_blocks; + uint32_t header_length; + uint32_t calc_xsum; + uint32_t stored_xsum; + bool header_valid; + bool sub_blocks_valid; + struct sub_block_info *sub_block_results; +}; + +// Initialization function for the sub block stats. +static void +init_verify_block_extra(BLOCKNUM b, struct verify_block_extra *e) +{ + static const struct verify_block_extra default_vbe = + { + .b = { 0 }, + .n_sub_blocks = 0, + .header_length = 0, + .calc_xsum = 0, + .stored_xsum = 0, + .header_valid = true, + .sub_blocks_valid = true, + .sub_block_results = NULL + }; + *e = default_vbe; + e->b = b; +} + +// Reports percentage of completed blocks. +static void +report(int64_t blocks_done, int64_t blocks_failed, int64_t total_blocks) +{ + int64_t blocks_per_report = llrint(pct * total_blocks / 100.0); + if (blocks_per_report < 1) { + blocks_per_report = 1; + } + if (blocks_done % blocks_per_report == 0) { + double pct_actually_done = (100.0 * blocks_done) / total_blocks; + printf("% 3.3lf%% | %" PRId64 " blocks checked, %" PRId64 " bad block(s) detected\n", + pct_actually_done, blocks_done, blocks_failed); + fflush(stdout); + } +} + +// Helper function to deserialize one of the two headers for the ft +// we are checking. +static void +deserialize_headers(int fd, struct ft **h1p, struct ft **h2p) +{ + struct rbuf rb_0; + struct rbuf rb_1; + uint64_t checkpoint_count_0; + uint64_t checkpoint_count_1; + LSN checkpoint_lsn_0; + LSN checkpoint_lsn_1; + uint32_t version_0, version_1; + bool h0_acceptable = false; + bool h1_acceptable = false; + int r0, r1; + int r; + + { + toku_off_t header_0_off = 0; + r0 = deserialize_ft_from_fd_into_rbuf( + fd, + header_0_off, + &rb_0, + &checkpoint_count_0, + &checkpoint_lsn_0, + &version_0 + ); + if ((r0==0) && (checkpoint_lsn_0.lsn <= MAX_LSN.lsn)) { + h0_acceptable = true; + } + } + { + toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + r1 = deserialize_ft_from_fd_into_rbuf( + fd, + header_1_off, + &rb_1, + &checkpoint_count_1, + &checkpoint_lsn_1, + &version_1 + ); + if ((r1==0) && (checkpoint_lsn_1.lsn <= MAX_LSN.lsn)) { + h1_acceptable = true; + } + } + + // If either header is too new, the dictionary is unreadable + if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) { + fprintf(stderr, "This dictionary was created with a version of TokuFT that is too new. Aborting.\n"); + abort(); + } + if (h0_acceptable) { + printf("Found dictionary header 1 with LSN %" PRIu64 "\n", checkpoint_lsn_0.lsn); + r = deserialize_ft_versioned(fd, &rb_0, h1p, version_0); + + if (r != 0) { + printf("---Header Error----\n"); + } + + } else { + *h1p = NULL; + } + if (h1_acceptable) { + printf("Found dictionary header 2 with LSN %" PRIu64 "\n", checkpoint_lsn_1.lsn); + r = deserialize_ft_versioned(fd, &rb_1, h2p, version_1); + if (r != 0) { + printf("---Header Error----\n"); + } + } else { + *h2p = NULL; + } + + if (rb_0.buf) toku_free(rb_0.buf); + if (rb_1.buf) toku_free(rb_1.buf); +} + +// Helper struct for tracking block checking progress. +struct check_block_table_extra { + int fd; + int64_t blocks_done, blocks_failed, total_blocks; + struct ft *h; +}; + +// Check non-upgraded (legacy) node. +// NOTE: These nodes have less checksumming than more +// recent nodes. This effectively means that we are +// skipping over these nodes. +static int +check_old_node(FTNODE node, struct rbuf *rb, int version) +{ + int r = 0; + read_legacy_node_info(node, rb, version); + // For version 14 nodes, advance the buffer to the end + // and verify the checksum. + if (version == FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) { + // Advance the buffer to the end. + rb->ndone = rb->size - 4; + r = check_legacy_end_checksum(rb); + } + + return r; +} + +// Read, decompress, and check the given block. +static int +check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void *extra) +{ + int r = 0; + int failure = 0; + struct check_block_table_extra *CAST_FROM_VOIDP(cbte, extra); + int fd = cbte->fd; + FT ft = cbte->h; + + struct verify_block_extra be; + init_verify_block_extra(blocknum, &be); + + // Let's read the block off of disk and fill a buffer with that + // block. + struct rbuf rb = RBUF_INITIALIZER; + read_block_from_fd_into_rbuf(fd, blocknum, ft, &rb); + + // Allocate the node. + FTNODE XMALLOC(node); + + initialize_ftnode(node, blocknum); + + r = read_and_check_magic(&rb); + if (r == DB_BADFORMAT) { + printf(" Magic failed.\n"); + failure++; + } + + r = read_and_check_version(node, &rb); + if (r != 0) { + printf(" Version check failed.\n"); + failure++; + } + + int version = node->layout_version_read_from_disk; + + //////////////////////////// + // UPGRADE FORK GOES HERE // + //////////////////////////// + + // Check nodes before major layout changes in version 15. + // All newer versions should follow the same layout, for now. + // This predicate would need to be changed if the layout + // of the nodes on disk does indeed change in the future. + if (version < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) + { + struct rbuf nrb; + // Use old decompression method for legacy nodes. + r = decompress_from_raw_block_into_rbuf(rb.buf, rb.size, &nrb, blocknum); + if (r != 0) { + failure++; + goto cleanup; + } + + // Check the end-to-end checksum. + r = check_old_node(node, &nrb, version); + if (r != 0) { + failure++; + } + goto cleanup; + } + + read_node_info(node, &rb, version); + + FTNODE_DISK_DATA ndd; + allocate_and_read_partition_offsets(node, &rb, &ndd); + + r = check_node_info_checksum(&rb); + if (r == TOKUDB_BAD_CHECKSUM) { + printf(" Node info checksum failed.\n"); + failure++; + } + + // Get the partition info sub block. + struct sub_block sb; + sub_block_init(&sb); + r = read_compressed_sub_block(&rb, &sb); + if (r != 0) { + printf(" Partition info checksum failed.\n"); + failure++; + } + + just_decompress_sub_block(&sb); + + // If we want to inspect the data inside the partitions, we need + // to call setup_ftnode_partitions(node, bfe, true) + + // TODO: Create function for this. + // Using the node info, decompress all the keys and pivots to + // detect any corruptions. + for (int i = 0; i < node->n_children; ++i) { + uint32_t curr_offset = BP_START(ndd,i); + uint32_t curr_size = BP_SIZE(ndd,i); + struct rbuf curr_rbuf = {.buf = NULL, .size = 0, .ndone = 0}; + rbuf_init(&curr_rbuf, rb.buf + curr_offset, curr_size); + struct sub_block curr_sb; + sub_block_init(&curr_sb); + + r = read_compressed_sub_block(&rb, &sb); + if (r != 0) { + printf(" Compressed child partition %d checksum failed.\n", i); + failure++; + } + just_decompress_sub_block(&sb); + + r = verify_ftnode_sub_block(&sb); + if (r != 0) { + printf(" Uncompressed child partition %d checksum failed.\n", i); + failure++; + } + + // If needed, we can print row and/or pivot info at this + // point. + } + +cleanup: + // Cleanup and error incrementing. + if (failure) { + cbte->blocks_failed++; + } + + cbte->blocks_done++; + + if (node) { + toku_free(node); + } + + // Print the status of this block to the console. + report(cbte->blocks_done, cbte->blocks_failed, cbte->total_blocks); + // We need to ALWAYS return 0 if we want to continue iterating + // through the nodes in the file. + r = 0; + return r; +} + +// This calls toku_blocktable_iterate on the given block table. +// Passes our check_block() function to be called as we iterate over +// the block table. This will print any interesting failures and +// update us on our progress. +static void check_block_table(int fd, block_table *bt, struct ft *h) { + int64_t num_blocks = bt->get_blocks_in_use_unlocked(); + printf("Starting verification of checkpoint containing"); + printf(" %" PRId64 " blocks.\n", num_blocks); + fflush(stdout); + + struct check_block_table_extra extra = { .fd = fd, + .blocks_done = 0, + .blocks_failed = 0, + .total_blocks = num_blocks, + .h = h }; + int r = bt->iterate(block_table::TRANSLATION_CURRENT, + check_block, + &extra, + true, + true); + if (r != 0) { + // We can print more information here if necessary. + } + + assert(extra.blocks_done == extra.total_blocks); + printf("Finished verification. "); + printf(" %" PRId64 " blocks checked,", extra.blocks_done); + printf(" %" PRId64 " bad block(s) detected\n", extra.blocks_failed); + fflush(stdout); +} + +int +main(int argc, char const * const argv[]) +{ + // open the file + int r = 0; + int dictfd; + const char *dictfname, *outfname; + if (argc < 3 || argc > 4) { + fprintf(stderr, "%s: Invalid arguments.\n", argv[0]); + fprintf(stderr, "Usage: %s [report%%]\n", argv[0]); + r = EX_USAGE; + goto exit; + } + + assert(argc == 3 || argc == 4); + dictfname = argv[1]; + outfname = argv[2]; + if (argc == 4) { + set_errno(0); + pct = strtod(argv[3], NULL); + assert_zero(get_maybe_error_errno()); + assert(pct > 0.0 && pct <= 100.0); + } + + // Open the file as read-only. + dictfd = open(dictfname, O_RDONLY | O_BINARY, S_IRWXU | S_IRWXG | S_IRWXO); + if (dictfd < 0) { + perror(dictfname); + fflush(stderr); + abort(); + } + outf = fopen(outfname, "w"); + if (!outf) { + perror(outfname); + fflush(stderr); + abort(); + } + + // body of toku_ft_serialize_init(); + num_cores = toku_os_get_number_active_processors(); + r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r); + assert_zero(r); + + // deserialize the header(s) + struct ft *h1, *h2; + deserialize_headers(dictfd, &h1, &h2); + + // walk over the block table and check blocks + if (h1) { + printf("Checking dictionary from header 1.\n"); + check_block_table(dictfd, &h1->blocktable, h1); + } + if (h2) { + printf("Checking dictionary from header 2.\n"); + check_block_table(dictfd, &h2->blocktable, h2); + } + if (h1 == NULL && h2 == NULL) { + printf("Both headers have a corruption and could not be used.\n"); + } + + toku_thread_pool_destroy(&ft_pool); +exit: + return r; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/parseTraceFiles.py mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/parseTraceFiles.py --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/parseTraceFiles.py 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/parseTraceFiles.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,82 +0,0 @@ -#!/usr/bin/env python - -import sys -try: - data = open(sys.argv[1]) -except: - print "Could not open '%s'" % (sys.argv[1][0]) - exit(0) - -ts_factor = 1. -ts_prev = 0. - -threadlist = [] - -for line in data: - line = line.rstrip("\n") - vals = line.split() - [n, tid, ts, funcline] = vals[0:4] - # 'note' is all text following funcline - note = '' - for v in vals[4:-1]: - note += v+' ' - note += vals[-1] - - if ( note == 'calibrate done' ): - ts_factor = float(ts) - ts_prev - print "Factor = ", ts_factor, "("+str(ts_factor/1000000000)[0:4]+"GHz)" - - time = (float(ts)-ts_prev)/ts_factor - - # create a list of threads - # - each thread has a list of pairs, where time is the accumulated time for that note - # - search threadlist for thread_id (tid) - # - if found, search corresponding list of pairs for the current note - # - if found, update (+=) the time - # - if not found, create a new pair - # - if not found, create a new thread, entry - found_thread = 0 - for thread in threadlist: - if tid == thread[0]: - found_thread = 1 - notetimelist = thread[1] - found_note = 0 - for notetime in notetimelist: - if note == notetime[0]: - found_note = 1 - notetime[1] += time - break - if found_note == 0: - thread[1].append([note, time]) - break - if found_thread == 0: - notetime = [] - notetime.append([note, time]) - threadlist.append([tid, notetime]) - - ts_prev = float(ts) - -# trim out unneeded -for thread in threadlist: - trimlist = [] - for notetime in thread[1]: - if notetime[0][0:9] == 'calibrate': - trimlist.append(notetime) - for notetime in trimlist: - thread[1].remove(notetime) -print '' - -# sum times to calculate percent (of 100) -total_time = 0 -for thread in threadlist: - for [note, time] in thread[1]: - total_time += time - -print ' thread operation time(sec) percent' -for thread in threadlist: - print 'tid : %5s' % thread[0] - for [note, time] in thread[1]: - print ' %20s %f %5d' % (note, time, 100. * time/total_time) - - - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tdb_logprint.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tdb_logprint.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tdb_logprint.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tdb_logprint.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,128 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +/* Dump the log from stdin to stdout. */ + +#include + +#include "ft/log_header.h" +#include "ft/logger/logger.h" + +static void newmain (int count) { + int i; + uint32_t version; + int r = toku_read_and_print_logmagic(stdin, &version); + for (i=0; i!=count; i++) { + r = toku_logprint_one_record(stdout, stdin); + if (r==EOF) break; + if (r!=0) { + fflush(stdout); + fprintf(stderr, "Problem in log err=%d\n", r); + exit(1); + } + } +} + +int main (int argc, char *const argv[]) { + int count=-1; + while (argc>1) { + if (strcmp(argv[1], "--oldcode")==0) { + fprintf(stderr,"Old code no longer works.\n"); + exit(1); + } else { + count = atoi(argv[1]); + } + argc--; argv++; + } + newmain(count); + return 0; +} + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tdb-recover.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tdb-recover.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tdb-recover.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tdb-recover.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,133 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +/* Recover an env. The logs are in argv[1]. The new database is created in the cwd. */ + +// Test: +// cd ../src/tests/tmpdir +// ../../../ft/recover ../dir.test_log2.c.tdb + +#include "ft/ft-ops.h" +#include "ft/logger/recover.h" + +static int recovery_main(int argc, const char *const argv[]); + +int main(int argc, const char *const argv[]) { + int r = toku_ft_layer_init(); + assert(r == 0); + r = recovery_main(argc, argv); + toku_ft_layer_destroy(); + return r; +} + +int recovery_main (int argc, const char *const argv[]) { + const char *data_dir, *log_dir; + if (argc==3) { + data_dir = argv[1]; + log_dir = argv[2]; + } else if (argc==2) { + data_dir = log_dir = argv[1]; + } else { + printf("Usage: %s [ ]\n", argv[0]); + return(1); + } + + int r = tokuft_recover(nullptr, + nullptr, + nullptr, + nullptr, + data_dir, log_dir, nullptr, nullptr, nullptr, nullptr, 0); + if (r!=0) { + fprintf(stderr, "Recovery failed\n"); + return(1); + } + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokudb_common_funcs.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokudb_common_funcs.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokudb_common_funcs.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokudb_common_funcs.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,337 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -#if !defined(TOKUDB_COMMON_FUNCS_H) -#define TOKUDB_COMMON_FUNCS_H - -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." - -#include "tokudb_common.h" - -//DB_ENV->err disabled since it does not use db_strerror -#define PRINT_ERROR(retval, ...) \ -do { \ -if (0) g.dbenv->err(g.dbenv, retval, __VA_ARGS__); \ -else { \ - fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \ - fprintf(stderr, "%s: %s:", g.progname, db_strerror(retval)); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, "\n"); \ - fflush(stderr); \ -} \ -} while (0) - -//DB_ENV->err disabled since it does not use db_strerror, errx does not exist. -#define PRINT_ERRORX(...) \ -do { \ -if (0) g.dbenv->err(g.dbenv, 0, __VA_ARGS__); \ -else { \ - fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \ - fprintf(stderr, "%s: ", g.progname); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, "\n"); \ - fflush(stderr); \ -} \ -} while (0) - -int strtoint32 (char* str, int32_t* num, int32_t min, int32_t max, int base); -int strtouint32 (char* str, uint32_t* num, uint32_t min, uint32_t max, int base); -int strtoint64 (char* str, int64_t* num, int64_t min, int64_t max, int base); -int strtouint64 (char* str, uint64_t* num, uint64_t min, uint64_t max, int base); - -/* - * Convert a string to an integer of type "type". - * - * - * Sets errno and returns: - * EINVAL: str == NULL, num == NULL, or string not of the form [ \t]*[+-]?[0-9]+ - * ERANGE: value out of range specified. (Range of [min, max]) - * - * *num is unchanged on error. - * Returns: - * - */ -#define DEF_STR_TO(name, type, bigtype, strtofunc, frmt) \ -int name(char* str, type* num, type min, type max, int base) \ -{ \ - char* test; \ - bigtype value; \ - \ - assert(str); \ - assert(num); \ - assert(min <= max); \ - assert(g.dbenv || g.progname); \ - assert(base == 0 || (base >= 2 && base <= 36)); \ - \ - errno = 0; \ - while (isspace(*str)) str++; \ - value = strtofunc(str, &test, base); \ - if ((*test != '\0' && *test != '\n') || test == str) { \ - PRINT_ERRORX("%s: Invalid numeric argument\n", str); \ - errno = EINVAL; \ - goto error; \ - } \ - if (errno != 0) { \ - PRINT_ERROR(errno, "%s\n", str); \ - } \ - if (value < min) { \ - PRINT_ERRORX("%s: Less than minimum value (%" frmt ")\n", str, min); \ - goto error; \ - } \ - if (value > max) { \ - PRINT_ERRORX("%s: Greater than maximum value (%" frmt ")\n", str, max); \ - goto error; \ - } \ - *num = value; \ - return EXIT_SUCCESS; \ -error: \ - return errno; \ -} - -DEF_STR_TO(strtoint32, int32_t, int64_t, strtoll, PRId32) -DEF_STR_TO(strtouint32, uint32_t, uint64_t, strtoull, PRIu32) -DEF_STR_TO(strtoint64, int64_t, int64_t, strtoll, PRId64) -DEF_STR_TO(strtouint64, uint64_t, uint64_t, strtoull, PRIu64) - -static inline void -outputbyte(uint8_t ch) -{ - if (g.plaintext) { - if (ch == '\\') printf("\\\\"); - else if (isprint(ch)) printf("%c", ch); - else printf("\\%02x", ch); - } - else printf("%02x", ch); -} - -static inline void -outputstring(char* str) -{ - char* p; - - for (p = str; *p != '\0'; p++) { - outputbyte((uint8_t)*p); - } -} - -static inline void -outputplaintextstring(char* str) -{ - bool old_plaintext = g.plaintext; - g.plaintext = true; - outputstring(str); - g.plaintext = old_plaintext; -} - -static inline int -hextoint(int ch) -{ - if (ch >= '0' && ch <= '9') { - return ch - '0'; - } - if (ch >= 'a' && ch <= 'z') { - return ch - 'a' + 10; - } - if (ch >= 'A' && ch <= 'Z') { - return ch - 'A' + 10; - } - return EOF; -} - -static inline int -printabletocstring(char* inputstr, char** poutputstr) -{ - char highch; - char lowch; - char nextch; - char* cstring; - - assert(inputstr); - assert(poutputstr); - assert(*poutputstr == NULL); - - cstring = (char*)toku_malloc((strlen(inputstr) + 1) * sizeof(char)); - if (cstring == NULL) { - PRINT_ERROR(errno, "printabletocstring"); - goto error; - } - - for (*poutputstr = cstring; *inputstr != '\0'; inputstr++) { - if (*inputstr == '\\') { - if ((highch = *++inputstr) == '\\') { - *cstring++ = '\\'; - continue; - } - if (highch == '\0' || (lowch = *++inputstr) == '\0') { - PRINT_ERROR(0, "unexpected end of input data or key/data pair"); - goto error; - } - if (!isxdigit(highch)) { - PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", highch); - goto error; - } - if (!isxdigit(lowch)) { - PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", lowch); - goto error; - } - nextch = (char)((hextoint(highch) << 4) | hextoint(lowch)); - if (nextch == '\0') { - /* Database names are c strings, and cannot have extra NULL terminators. */ - PRINT_ERROR(0, "Unexpected '\\00' in input.\n"); - goto error; - } - *cstring++ = nextch; - } - else *cstring++ = *inputstr; - } - /* Terminate the string. */ - *cstring = '\0'; - return EXIT_SUCCESS; - -error: - PRINT_ERROR(0, "Quitting out due to errors.\n"); - return EXIT_FAILURE; -} - -static inline int -verify_library_version(void) -{ - int major; - int minor; - - db_version(&major, &minor, NULL); - if (major != DB_VERSION_MAJOR || minor != DB_VERSION_MINOR) { - PRINT_ERRORX("version %d.%d doesn't match library version %d.%d\n", - DB_VERSION_MAJOR, DB_VERSION_MINOR, major, minor); - return EXIT_FAILURE; - } - return EXIT_SUCCESS; -} - -static int last_caught = 0; - -static void catch_signal(int which_signal) { - last_caught = which_signal; - if (last_caught == 0) last_caught = SIGINT; -} - -static inline void -init_catch_signals(void) { - signal(SIGINT, catch_signal); - signal(SIGTERM, catch_signal); -#ifdef SIGHUP - signal(SIGHUP, catch_signal); -#endif -#ifdef SIGPIPE - signal(SIGPIPE, catch_signal); -#endif -} - -static inline int -caught_any_signals(void) { - return last_caught != 0; -} - -static inline void -resend_signals(void) { - if (last_caught) { - signal(last_caught, SIG_DFL); - raise(last_caught); - } -} - -#include -static int test_main (int argc, char *const argv[]); -int -main(int argc, char *const argv[]) { - int r; - r = test_main(argc, argv); - return r; -} - -#endif /* #if !defined(TOKUDB_COMMON_H) */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokudb_common.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokudb_common.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokudb_common.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokudb_common.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,109 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -#if !defined(TOKUDB_COMMON_H) -#define TOKUDB_COMMON_H - -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." - -#include -#include -#include -#include -#include -#include -#include - -#define SET_BITS(bitvector, bits) ((bitvector) |= (bits)) -#define REMOVE_BITS(bitvector, bits) ((bitvector) &= ~(bits)) -#define IS_SET_ANY(bitvector, bits) ((bitvector) & (bits)) -#define IS_SET_ALL(bitvector, bits) (((bitvector) & (bits)) == (bits)) - -#define IS_POWER_OF_2(num) ((num) > 0 && ((num) & ((num) - 1)) == 0) - -#endif /* #if !defined(TOKUDB_COMMON_H) */ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokudb_dump.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokudb_dump.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokudb_dump.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokudb_dump.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +88,10 @@ #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." +#include + +#include +#include #include #include #include @@ -97,8 +101,8 @@ #include #include #include -#include -#include "tokudb_common.h" +#include +#include typedef struct { bool leadingspace; @@ -120,7 +124,245 @@ } dump_globals; dump_globals g; -#include "tokudb_common_funcs.h" + +#define SET_BITS(bitvector, bits) ((bitvector) |= (bits)) +#define REMOVE_BITS(bitvector, bits) ((bitvector) &= ~(bits)) +#define IS_SET_ANY(bitvector, bits) ((bitvector) & (bits)) +#define IS_SET_ALL(bitvector, bits) (((bitvector) & (bits)) == (bits)) + +#define IS_POWER_OF_2(num) ((num) > 0 && ((num) & ((num) - 1)) == 0) + +//DB_ENV->err disabled since it does not use db_strerror +#define PRINT_ERROR(retval, ...) \ +do { \ +if (0) g.dbenv->err(g.dbenv, retval, __VA_ARGS__); \ +else { \ + fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \ + fprintf(stderr, "%s: %s:", g.progname, db_strerror(retval)); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + fflush(stderr); \ +} \ +} while (0) + +//DB_ENV->err disabled since it does not use db_strerror, errx does not exist. +#define PRINT_ERRORX(...) \ +do { \ +if (0) g.dbenv->err(g.dbenv, 0, __VA_ARGS__); \ +else { \ + fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \ + fprintf(stderr, "%s: ", g.progname); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + fflush(stderr); \ +} \ +} while (0) + +int strtoint32 (char* str, int32_t* num, int32_t min, int32_t max, int base); +int strtouint32 (char* str, uint32_t* num, uint32_t min, uint32_t max, int base); +int strtoint64 (char* str, int64_t* num, int64_t min, int64_t max, int base); +int strtouint64 (char* str, uint64_t* num, uint64_t min, uint64_t max, int base); + +/* + * Convert a string to an integer of type "type". + * + * + * Sets errno and returns: + * EINVAL: str == NULL, num == NULL, or string not of the form [ \t]*[+-]?[0-9]+ + * ERANGE: value out of range specified. (Range of [min, max]) + * + * *num is unchanged on error. + * Returns: + * + */ +#define DEF_STR_TO(name, type, bigtype, strtofunc, frmt) \ +int name(char* str, type* num, type min, type max, int base) \ +{ \ + char* test; \ + bigtype value; \ + \ + assert(str); \ + assert(num); \ + assert(min <= max); \ + assert(g.dbenv || g.progname); \ + assert(base == 0 || (base >= 2 && base <= 36)); \ + \ + errno = 0; \ + while (isspace(*str)) str++; \ + value = strtofunc(str, &test, base); \ + if ((*test != '\0' && *test != '\n') || test == str) { \ + PRINT_ERRORX("%s: Invalid numeric argument\n", str); \ + errno = EINVAL; \ + goto error; \ + } \ + if (errno != 0) { \ + PRINT_ERROR(errno, "%s\n", str); \ + } \ + if (value < min) { \ + PRINT_ERRORX("%s: Less than minimum value (%" frmt ")\n", str, min); \ + goto error; \ + } \ + if (value > max) { \ + PRINT_ERRORX("%s: Greater than maximum value (%" frmt ")\n", str, max); \ + goto error; \ + } \ + *num = value; \ + return EXIT_SUCCESS; \ +error: \ + return errno; \ +} + +DEF_STR_TO(strtoint32, int32_t, int64_t, strtoll, PRId32) +DEF_STR_TO(strtouint32, uint32_t, uint64_t, strtoull, PRIu32) +DEF_STR_TO(strtoint64, int64_t, int64_t, strtoll, PRId64) +DEF_STR_TO(strtouint64, uint64_t, uint64_t, strtoull, PRIu64) + +static inline void +outputbyte(uint8_t ch) +{ + if (g.plaintext) { + if (ch == '\\') printf("\\\\"); + else if (isprint(ch)) printf("%c", ch); + else printf("\\%02x", ch); + } + else printf("%02x", ch); +} + +static inline void +outputstring(char* str) +{ + char* p; + + for (p = str; *p != '\0'; p++) { + outputbyte((uint8_t)*p); + } +} + +static inline void +outputplaintextstring(char* str) +{ + bool old_plaintext = g.plaintext; + g.plaintext = true; + outputstring(str); + g.plaintext = old_plaintext; +} + +static inline int +hextoint(int ch) +{ + if (ch >= '0' && ch <= '9') { + return ch - '0'; + } + if (ch >= 'a' && ch <= 'z') { + return ch - 'a' + 10; + } + if (ch >= 'A' && ch <= 'Z') { + return ch - 'A' + 10; + } + return EOF; +} + +static inline int +printabletocstring(char* inputstr, char** poutputstr) +{ + char highch; + char lowch; + char nextch; + char* cstring; + + assert(inputstr); + assert(poutputstr); + assert(*poutputstr == NULL); + + cstring = (char*)toku_malloc((strlen(inputstr) + 1) * sizeof(char)); + if (cstring == NULL) { + PRINT_ERROR(errno, "printabletocstring"); + goto error; + } + + for (*poutputstr = cstring; *inputstr != '\0'; inputstr++) { + if (*inputstr == '\\') { + if ((highch = *++inputstr) == '\\') { + *cstring++ = '\\'; + continue; + } + if (highch == '\0' || (lowch = *++inputstr) == '\0') { + PRINT_ERROR(0, "unexpected end of input data or key/data pair"); + goto error; + } + if (!isxdigit(highch)) { + PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", highch); + goto error; + } + if (!isxdigit(lowch)) { + PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", lowch); + goto error; + } + nextch = (char)((hextoint(highch) << 4) | hextoint(lowch)); + if (nextch == '\0') { + /* Database names are c strings, and cannot have extra NULL terminators. */ + PRINT_ERROR(0, "Unexpected '\\00' in input.\n"); + goto error; + } + *cstring++ = nextch; + } + else *cstring++ = *inputstr; + } + /* Terminate the string. */ + *cstring = '\0'; + return EXIT_SUCCESS; + +error: + PRINT_ERROR(0, "Quitting out due to errors.\n"); + return EXIT_FAILURE; +} + +static inline int +verify_library_version(void) +{ + int major; + int minor; + + db_version(&major, &minor, NULL); + if (major != DB_VERSION_MAJOR || minor != DB_VERSION_MINOR) { + PRINT_ERRORX("version %d.%d doesn't match library version %d.%d\n", + DB_VERSION_MAJOR, DB_VERSION_MINOR, major, minor); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +static int last_caught = 0; + +static void catch_signal(int which_signal) { + last_caught = which_signal; + if (last_caught == 0) last_caught = SIGINT; +} + +static inline void +init_catch_signals(void) { + signal(SIGINT, catch_signal); + signal(SIGTERM, catch_signal); +#ifdef SIGHUP + signal(SIGHUP, catch_signal); +#endif +#ifdef SIGPIPE + signal(SIGPIPE, catch_signal); +#endif +} + +static inline int +caught_any_signals(void) { + return last_caught != 0; +} + +static inline void +resend_signals(void) { + if (last_caught) { + signal(last_caught, SIG_DFL); + raise(last_caught); + } +} static int usage (void); static int create_init_env(void); @@ -131,7 +373,7 @@ static int dump_header (void); static int close_database (void); -int test_main(int argc, char *const argv[]) { +int main(int argc, char *const argv[]) { int ch; int retval; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokudb_gen.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokudb_gen.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokudb_gen.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokudb_gen.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,471 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "tokudb_common.h" - -typedef struct { - DB_ENV* dbenv; - bool plaintext; - char* progname; -} gen_globals; - -gen_globals g; -#include "tokudb_common_funcs.h" - -static int usage(void); -static void generate_keys(void); -static int get_delimiter(char* str); - - - -char dbt_delimiter = '\n'; -char sort_delimiter[3]; -uint32_t lengthmin = 0; -bool set_lengthmin = false; -uint32_t lengthlimit = 0; -bool set_lengthlimit= false; -uint64_t numkeys = 0; -bool set_numkeys = false; -bool header = true; -bool footer = true; -bool justheader = false; -bool justfooter = false; -bool outputkeys = true; -uint32_t seed = 1; -bool set_seed = false; -bool printableonly = false; -bool leadingspace = true; -bool force_unique = true; -bool dupsort = false; - -static int test_main (int argc, char *const argv[]) { - int ch; - - /* Set up the globals. */ - memset(&g, 0, sizeof(g)); - - g.progname = argv[0]; - - if (verify_library_version() != 0) goto error; - - strcpy(sort_delimiter, ""); - - while ((ch = getopt(argc, argv, "PpTo:r:m:M:n:uVhHfFd:s:DS")) != EOF) { - switch (ch) { - case ('P'): { - printableonly = true; - break; - } - case ('p'): { - g.plaintext = true; - leadingspace = true; - break; - } - case ('T'): { - g.plaintext = true; - leadingspace = false; - header = false; - footer = false; - break; - } - case ('o'): { - if (freopen(optarg, "w", stdout) == NULL) { - PRINT_ERROR(errno, "%s: reopen\n", optarg); - goto error; - } - break; - } - case ('r'): { - if (strtouint32(optarg, &seed, 0, UINT32_MAX, 10)) { - PRINT_ERRORX("%s: (-r) Random seed invalid.", optarg); - goto error; - } - set_seed = true; - break; - } - case ('m'): { - if (strtouint32(optarg, &lengthmin, 0, UINT32_MAX, 10)) { - PRINT_ERRORX("%s: (-m) Min length of keys/values invalid.", optarg); - goto error; - } - set_lengthmin = true; - break; - } - case ('M'): { - if (strtouint32(optarg, &lengthlimit, 1, UINT32_MAX, 10)) { - PRINT_ERRORX("%s: (-M) Limit of key/value length invalid.", optarg); - goto error; - } - set_lengthlimit = true; - break; - } - case ('n'): { - if (strtouint64(optarg, &numkeys, 0, UINT64_MAX, 10)) { - PRINT_ERRORX("%s: (-n) Number of keys to generate invalid.", optarg); - goto error; - } - set_numkeys = true; - break; - } - case ('u'): { - force_unique = false; - break; - } - case ('h'): { - header = false; - break; - } - case ('H'): { - justheader = true; - break; - } - case ('f'): { - footer = false; - break; - } - case ('F'): { - justfooter = true; - break; - } - case ('d'): { - int temp = get_delimiter(optarg); - if (temp == EOF) { - PRINT_ERRORX("%s: (-d) Key (or value) delimiter must be one character.", - optarg); - goto error; - } - if (isxdigit(temp)) { - PRINT_ERRORX("%c: (-d) Key (or value) delimiter cannot be a hex digit.", - temp); - goto error; - } - dbt_delimiter = (char)temp; - break; - } - case ('s'): { - int temp = get_delimiter(optarg); - if (temp == EOF) { - PRINT_ERRORX("%s: (-s) Sorting (Between key/value pairs) delimiter must be one character.", - optarg); - goto error; - } - if (isxdigit(temp)) { - PRINT_ERRORX("%c: (-s) Sorting (Between key/value pairs) delimiter cannot be a hex digit.", - temp); - goto error; - } - sort_delimiter[0] = (char)temp; - sort_delimiter[1] = '\0'; - break; - } - case ('V'): { - printf("%s\n", db_version(NULL, NULL, NULL)); - return EXIT_SUCCESS; - } - case 'D': { - fprintf(stderr, "Duplicates no longer supported by tokudb\n"); - return EXIT_FAILURE; - } - case 'S': { - fprintf(stderr, "Dupsort no longer supported by tokudb\n"); - return EXIT_FAILURE; - } - case ('?'): - default: { - return (usage()); - } - } - } - argc -= optind; - argv += optind; - - if (justheader && !header) { - PRINT_ERRORX("The -h and -H options may not both be specified.\n"); - goto error; - } - if (justfooter && !footer) { - PRINT_ERRORX("The -f and -F options may not both be specified.\n"); - goto error; - } - if (justfooter && justheader) { - PRINT_ERRORX("The -H and -F options may not both be specified.\n"); - goto error; - } - if (justfooter && header) { - PRINT_ERRORX("-F implies -h\n"); - header = false; - } - if (justheader && footer) { - PRINT_ERRORX("-H implies -f\n"); - footer = false; - } - if (!leadingspace) { - if (footer) { - PRINT_ERRORX("-p implies -f\n"); - footer = false; - } - if (header) { - PRINT_ERRORX("-p implies -h\n"); - header = false; - } - } - if (justfooter || justheader) outputkeys = false; - else if (!set_numkeys) - { - PRINT_ERRORX("Using default number of keys. (-n 1024).\n"); - numkeys = 1024; - } - if (outputkeys && !set_seed) { - PRINT_ERRORX("Using default seed. (-r 1).\n"); - seed = 1; - } - if (outputkeys && !set_lengthmin) { - PRINT_ERRORX("Using default lengthmin. (-m 0).\n"); - lengthmin = 0; - } - if (outputkeys && !set_lengthlimit) { - PRINT_ERRORX("Using default lengthlimit. (-M 1024).\n"); - lengthlimit = 1024; - } - if (outputkeys && lengthmin >= lengthlimit) { - PRINT_ERRORX("Max key size must be greater than min key size.\n"); - goto error; - } - - if (argc != 0) { - return usage(); - } - if (header) { - printf("VERSION=3\n"); - printf("format=%s\n", g.plaintext ? "print" : "bytevalue"); - printf("type=btree\n"); - // printf("db_pagesize=%d\n", 4096); //Don't write pagesize which would be useless. - if (dupsort) - printf("dupsort=%d\n", dupsort); - printf("HEADER=END\n"); - } - if (outputkeys) generate_keys(); - if (footer) printf("DATA=END\n"); - return EXIT_SUCCESS; - -error: - fprintf(stderr, "Quitting out due to errors.\n"); - return EXIT_FAILURE; -} - -static int usage() -{ - fprintf(stderr, - "usage: %s [-PpTuVhHfFDS] [-o output] [-r seed] [-m minsize] [-M limitsize]\n" - " %*s[-n numpairs] [-d delimiter] [-s delimiter]\n", - g.progname, (int)strlen(g.progname) + 1, ""); - return EXIT_FAILURE; -} - -static uint8_t randbyte(void) -{ - static uint32_t numsavedbits = 0; - static uint64_t savedbits = 0; - uint8_t retval; - - if (numsavedbits < 8) { - savedbits |= ((uint64_t)random()) << numsavedbits; - numsavedbits += 31; /* Random generates 31 random bits. */ - } - retval = savedbits & 0xff; - numsavedbits -= 8; - savedbits >>= 8; - return retval; -} - -/* Almost-uniformly random int from [0,limit) */ -static int32_t random_below(int32_t limit) -{ - assert(limit > 0); - return random() % limit; -} - -static void generate_keys() -{ - bool usedemptykey = false; - uint64_t numgenerated = 0; - uint64_t totalsize = 0; - char identifier[24]; /* 8 bytes * 2 = 16; 16+1=17; 17+null terminator = 18. Extra padding. */ - int length; - int i; - uint8_t ch; - - srandom(seed); - while (numgenerated < numkeys) { - numgenerated++; - - /* Each key is preceded by a space (unless using -T). */ - if (leadingspace) printf(" "); - - /* Generate a key. */ - { - /* Pick a key length. */ - length = random_below(lengthlimit - lengthmin) + lengthmin; - - /* Output 'length' random bytes. */ - for (i = 0; i < length; i++) { - do {ch = randbyte();} - while (printableonly && !isprint(ch)); - outputbyte(ch); - } - totalsize += length; - if (force_unique) { - if (length == 0 && !usedemptykey) usedemptykey = true; - else { - /* Append identifier to ensure uniqueness. */ - sprintf(identifier, "x%" PRIx64, numgenerated); - outputstring(identifier); - totalsize += strlen(identifier); - } - } - } - printf("%c", dbt_delimiter); - - /* Each value is preceded by a space (unless using -T). */ - if (leadingspace) printf(" "); - - /* Generate a value. */ - { - /* Pick a key length. */ - length = random_below(lengthlimit - lengthmin) + lengthmin; - - /* Output 'length' random bytes. */ - for (i = 0; i < length; i++) { - do {ch = randbyte();} - while (printableonly && !isprint(ch)); - outputbyte(ch); - } - totalsize += length; - } - printf("%c", dbt_delimiter); - - printf("%s", sort_delimiter); - } -} - -int get_delimiter(char* str) -{ - if (strlen(str) == 2 && str[0] == '\\') { - switch (str[1]) { - case ('a'): return '\a'; - case ('b'): return '\b'; -#ifndef __ICL - case ('e'): return '\e'; -#endif - case ('f'): return '\f'; - case ('n'): return '\n'; - case ('r'): return '\r'; - case ('t'): return '\t'; - case ('v'): return '\v'; - case ('0'): return '\0'; - case ('\\'): return '\\'; - default: return EOF; - } - } - if (strlen(str) == 1) return str[0]; - return EOF; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokudb_load.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokudb_load.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokudb_load.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokudb_load.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,977 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "tokudb_common.h" - -typedef struct { - bool leadingspace; - bool plaintext; - bool overwritekeys; - bool header; - bool eof; - bool keys; - bool is_private; - char* progname; - char* homedir; - char* database; - char* subdatabase; - char** config_options; - int32_t version; - int exitcode; - uint64_t linenumber; - DBTYPE dbtype; - DB* db; - DB_ENV* dbenv; - struct { - char* data[2]; - } get_dbt; - struct { - char* data; - } read_header; -} load_globals; - -load_globals g; -#include "tokudb_common_funcs.h" - -static int usage (void); -static int load_database (void); -static int create_init_env(void); -static int read_header (void); -static int open_database (void); -static int read_keys (void); -static int apply_commandline_options(void); -static int close_database (void); -static int doublechararray(char** pmem, uint64_t* size); - -int test_main(int argc, char *const argv[]) { - int ch; - int retval; - char** next_config_option; - - /* Set up the globals. */ - memset(&g, 0, sizeof(g)); - g.leadingspace = true; - g.overwritekeys = true; - g.dbtype = DB_UNKNOWN; - //g.dbtype = DB_BTREE; - g.progname = argv[0]; - g.header = true; - - if (verify_library_version() != 0) goto error; - - next_config_option = g.config_options = (char**) calloc(argc, sizeof(char*)); - if (next_config_option == NULL) { - PRINT_ERROR(errno, "main: calloc\n"); - goto error; - } - while ((ch = getopt(argc, argv, "c:f:h:nP:r:Tt:V")) != EOF) { - switch (ch) { - case ('c'): { - *next_config_option++ = optarg; - break; - } - case ('f'): { - if (freopen(optarg, "r", stdin) == NULL) { - fprintf(stderr, - "%s: %s: reopen: %s\n", - g.progname, optarg, strerror(errno)); - goto error; - } - break; - } - case ('h'): { - g.homedir = optarg; - break; - } - case ('n'): { - /* g.overwritekeys = false; */ - PRINT_ERRORX("-%c option not supported.\n", ch); - goto error; - } - case ('P'): { - /* Clear password. */ - memset(optarg, 0, strlen(optarg)); - PRINT_ERRORX("-%c option not supported.\n", ch); - goto error; - } - case ('r'): { - PRINT_ERRORX("-%c option not supported.\n", ch); - goto error; - } - case ('T'): { - g.plaintext = true; - g.leadingspace = false; - g.header = false; - break; - } - case ('t'): { - if (!strcmp(optarg, "btree")) { - g.dbtype = DB_BTREE; - break; - } - if (!strcmp(optarg, "hash") || !strcmp(optarg, "recno") || !strcmp(optarg, "queue")) { - fprintf(stderr, "%s: db type %s not supported.\n", g.progname, optarg); - goto error; - } - fprintf(stderr, "%s: Unrecognized db type %s.\n", g.progname, optarg); - goto error; - } - case ('V'): { - printf("%s\n", db_version(NULL, NULL, NULL)); - goto cleanup; - } - case ('?'): - default: { - g.exitcode = usage(); - goto cleanup; - } - } - } - argc -= optind; - argv += optind; - - if (argc != 1) { - g.exitcode = usage(); - goto cleanup; - } - init_catch_signals(); - - g.database = argv[0]; - if (create_init_env() != 0) goto error; - if (caught_any_signals()) goto cleanup; - while (!g.eof) { - if (load_database() != 0) goto error; - if (caught_any_signals()) goto cleanup; - } - if (false) { -error: - g.exitcode = EXIT_FAILURE; - fprintf(stderr, "%s: Quitting out due to errors.\n", g.progname); - } -cleanup: - if (g.dbenv && (retval = g.dbenv->close(g.dbenv, 0)) != 0) { - g.exitcode = EXIT_FAILURE; - fprintf(stderr, "%s: dbenv->close: %s\n", g.progname, db_strerror(retval)); - } - if (g.config_options) toku_free(g.config_options); - if (g.subdatabase) toku_free(g.subdatabase); - if (g.read_header.data) toku_free(g.read_header.data); - if (g.get_dbt.data[0]) toku_free(g.get_dbt.data[0]); - if (g.get_dbt.data[1]) toku_free(g.get_dbt.data[1]); - resend_signals(); - - return g.exitcode; -} - -int load_database() -{ - int retval; - - /* Create a database handle. */ - retval = db_create(&g.db, g.dbenv, 0); - if (retval != 0) { - PRINT_ERROR(retval, "db_create"); - return EXIT_FAILURE; - } - - if (g.header && read_header() != 0) goto error; - if (g.eof) goto cleanup; - if (caught_any_signals()) goto cleanup; - if (apply_commandline_options() != 0) goto error; - if (g.eof) goto cleanup; - if (caught_any_signals()) goto cleanup; - - /* - TODO: If/when supporting encryption - if (g.password && (retval = db->set_flags(db, DB_ENCRYPT))) { - PRINT_ERROR(ret, "DB->set_flags: DB_ENCRYPT"); - goto error; - } - */ - if (open_database() != 0) goto error; - if (g.eof) goto cleanup; - if (caught_any_signals()) goto cleanup; - if (read_keys() != 0) goto error; - if (g.eof) goto cleanup; - if (caught_any_signals()) goto cleanup; - - if (false) { -error: - g.exitcode = EXIT_FAILURE; - } -cleanup: - - if (close_database() != 0) g.exitcode = EXIT_FAILURE; - - return g.exitcode; -} - -int usage() -{ - fprintf(stderr, - "usage: %s [-TV] [-c name=value] [-f file] [-h home] [-t btree] db_file\n", - g.progname); - return EXIT_FAILURE; -} - -int create_init_env() -{ - int retval; - DB_ENV* dbenv; - int flags; - //TODO: Experiments to determine right cache size for tokudb, or maybe command line argument. - //int cache = 1 << 20; /* 1 megabyte */ - - retval = db_env_create(&dbenv, 0); - if (retval) { - fprintf(stderr, "%s: db_dbenv_create: %s\n", g.progname, db_strerror(retval)); - goto error; - } - ///TODO: UNCOMMENT/IMPLEMENT dbenv->set_errfile(dbenv, stderr); - dbenv->set_errpfx(dbenv, g.progname); - /* - TODO: If/when supporting encryption - if (g.password && (retval = dbenv->set_encrypt(dbenv, g.password, DB_ENCRYPT_AES))) { - PRINT_ERROR(retval, "set_passwd"); - goto error; - } - */ - - /* Open the dbenvironment. */ - g.is_private = false; - flags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL|DB_INIT_TXN|DB_INIT_LOG; ///TODO: UNCOMMENT/IMPLEMENT | DB_USE_ENVIRON; - //TODO: Transactions.. SET_BITS(flags, DB_INIT_TXN); - - /* - ///TODO: UNCOMMENT/IMPLEMENT Notes: We require DB_PRIVATE - if (!dbenv->open(dbenv, g.homedir, flags, 0)) goto success; - */ - - /* - ///TODO: UNCOMMENT/IMPLEMENT - retval = dbenv->set_cachesize(dbenv, 0, cache, 1); - if (retval) { - PRINT_ERROR(retval, "DB_ENV->set_cachesize"); - goto error; - } - */ - g.is_private = true; - //TODO: Do we want to support transactions/logging even in single-process mode? - //Maybe if the db already exists. - //If db does not exist.. makes sense not to log or have transactions - //REMOVE_BITS(flags, DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN); - SET_BITS(flags, DB_CREATE | DB_PRIVATE); - - retval = dbenv->open(dbenv, g.homedir ? g.homedir : ".", flags, 0); - if (retval) { - PRINT_ERROR(retval, "DB_ENV->open"); - goto error; - } - g.dbenv = dbenv; - return EXIT_SUCCESS; - -error: - return EXIT_FAILURE; -} - -#define PARSE_NUMBER(match, dbfunction) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 1, INT32_MAX, 10)) goto error; \ - if ((retval = dbfunction(db, num)) != 0) goto printerror; \ - continue; \ -} -#define PARSE_UNSUPPORTEDNUMBER(match, dbfunction) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 1, INT32_MAX, 10)) goto error; \ - PRINT_ERRORX("%s option not supported.\n", field); \ - goto error; \ -} -#define PARSE_IGNOREDNUMBER(match, dbfunction) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 1, INT32_MAX, 10)) goto error; \ - PRINT_ERRORX("%s option not supported yet (ignored).\n", field); \ - continue; \ -} - -#define PARSE_FLAG(match, flag) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 0, 1, 10)) { \ - PRINT_ERRORX("%s: boolean name=value pairs require a value of 0 or 1", \ - field); \ - goto error; \ - } \ - if ((retval = db->set_flags(db, flag)) != 0) { \ - PRINT_ERROR(retval, "set_flags: %s", field); \ - goto error; \ - } \ - continue; \ -} - -#define PARSE_UNSUPPORTEDFLAG(match, flag) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 0, 1, 10)) { \ - PRINT_ERRORX("%s: boolean name=value pairs require a value of 0 or 1", \ - field); \ - goto error; \ - } \ - PRINT_ERRORX("%s option not supported.\n", field); \ - goto error; \ -} - -#define PARSE_IGNOREDFLAG(match, flag) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 0, 1, 10)) { \ - PRINT_ERRORX("%s: boolean name=value pairs require a value of 0 or 1", \ - field); \ - goto error; \ - } \ - PRINT_ERRORX("%s option not supported yet (ignored).\n", field); \ - continue; \ -} - -#define PARSE_CHAR(match, dbfunction) \ -if (!strcmp(field, match)) { \ - if (strlen(value) != 1) { \ - PRINT_ERRORX("%s=%s: Expected 1-byte value", \ - field, value); \ - goto error; \ - } \ - if ((retval = dbfunction(db, value[0])) != 0) { \ - goto printerror; \ - } \ - continue; \ -} - -#define PARSE_UNSUPPORTEDCHAR(match, dbfunction) \ -if (!strcmp(field, match)) { \ - if (strlen(value) != 1) { \ - PRINT_ERRORX("%s=%s: Expected 1-byte value", \ - field, value); \ - goto error; \ - } \ - PRINT_ERRORX("%s option not supported.\n", field); \ - goto error; \ -} - -#define PARSE_COMMON_CONFIGURATIONS() \ - PARSE_IGNOREDNUMBER( "bt_minkey", db->set_bt_minkey); \ - PARSE_IGNOREDFLAG( "chksum", DB_CHKSUM); \ - PARSE_IGNOREDNUMBER( "db_lorder", db->set_lorder); \ - PARSE_IGNOREDNUMBER( "db_pagesize", db->set_pagesize); \ - PARSE_UNSUPPORTEDNUMBER("extentsize", db->set_q_extentsize); \ - PARSE_UNSUPPORTEDNUMBER("h_ffactor", db->set_h_ffactor); \ - PARSE_UNSUPPORTEDNUMBER("h_nelem", db->set_h_nelem); \ - PARSE_UNSUPPORTEDNUMBER("re_len", db->set_re_len); \ - PARSE_UNSUPPORTEDCHAR( "re_pad", db->set_re_pad); \ - PARSE_UNSUPPORTEDFLAG( "recnum", DB_RECNUM); \ - PARSE_UNSUPPORTEDFLAG( "renumber", DB_RENUMBER); - - - -int read_header() -{ - static uint64_t datasize = 1 << 10; - uint64_t idx = 0; - char* field; - char* value; - int ch; - int32_t num; - int retval; - int r; - - assert(g.header); - - if (g.read_header.data == NULL && (g.read_header.data = (char*)toku_malloc(datasize * sizeof(char))) == NULL) { - PRINT_ERROR(errno, "read_header: malloc"); - goto error; - } - while (!g.eof) { - if (caught_any_signals()) goto success; - g.linenumber++; - idx = 0; - /* Read a line. */ - while (true) { - if ((ch = getchar()) == EOF) { - g.eof = true; - if (ferror(stdin)) goto formaterror; - break; - } - if (ch == '\n') break; - - g.read_header.data[idx] = (char)ch; - idx++; - - /* Ensure room exists for next character/null terminator. */ - if (idx == datasize && doublechararray(&g.read_header.data, &datasize)) goto error; - } - if (idx == 0 && g.eof) goto success; - g.read_header.data[idx] = '\0'; - - field = g.read_header.data; - if ((value = strchr(g.read_header.data, '=')) == NULL) goto formaterror; - value[0] = '\0'; - value++; - - if (field[0] == '\0' || value[0] == '\0') goto formaterror; - - if (!strcmp(field, "HEADER")) break; - if (!strcmp(field, "VERSION")) { - if (strtoint32(value, &g.version, 1, INT32_MAX, 10)) goto error; - if (g.version != 3) { - PRINT_ERRORX("line %" PRIu64 ": VERSION %d is unsupported", g.linenumber, g.version); - goto error; - } - continue; - } - if (!strcmp(field, "format")) { - if (!strcmp(value, "bytevalue")) { - g.plaintext = false; - continue; - } - if (!strcmp(value, "print")) { - g.plaintext = true; - continue; - } - goto formaterror; - } - if (!strcmp(field, "type")) { - if (!strcmp(value, "btree")) { - g.dbtype = DB_BTREE; - continue; - } - if (!strcmp(value, "hash") || strcmp(value, "recno") || strcmp(value, "queue")) { - PRINT_ERRORX("db type %s not supported.\n", value); - goto error; - } - PRINT_ERRORX("line %" PRIu64 ": unknown type %s", g.linenumber, value); - goto error; - } - if (!strcmp(field, "database") || !strcmp(field, "subdatabase")) { - if (g.subdatabase != NULL) { - toku_free(g.subdatabase); - g.subdatabase = NULL; - } - if ((retval = printabletocstring(value, &g.subdatabase))) { - PRINT_ERROR(retval, "error reading db name"); - goto error; - } - continue; - } - if (!strcmp(field, "keys")) { - int32_t temp; - if (strtoint32(value, &temp, 0, 1, 10)) { - PRINT_ERROR(0, - "%s: boolean name=value pairs require a value of 0 or 1", - field); - goto error; - } - g.keys = (bool)temp; - if (!g.keys) { - PRINT_ERRORX("keys=0 not supported"); - goto error; - } - continue; - } - PARSE_COMMON_CONFIGURATIONS(); - - PRINT_ERRORX("unknown input-file header configuration keyword \"%s\"", field); - goto error; - } -success: - r = 0; - - if (false) { -formaterror: - r = EXIT_FAILURE; - PRINT_ERRORX("line %" PRIu64 ": unexpected format", g.linenumber); - } - if (false) { -error: - r = EXIT_FAILURE; - } - return r; -} - -int apply_commandline_options() -{ - int r = -1; - unsigned idx; - char* field; - char* value = NULL; - int32_t num; - int retval; - - for (idx = 0; g.config_options[idx]; idx++) { - if (value) { - /* Restore the field=value format. */ - value[-1] = '='; - value = NULL; - } - field = g.config_options[idx]; - - if ((value = strchr(field, '=')) == NULL) { - PRINT_ERRORX("command-line configuration uses name=value format"); - goto error; - } - value[0] = '\0'; - value++; - - if (field[0] == '\0' || value[0] == '\0') { - PRINT_ERRORX("command-line configuration uses name=value format"); - goto error; - } - - if (!strcmp(field, "database") || !strcmp(field, "subdatabase")) { - if (g.subdatabase != NULL) { - toku_free(g.subdatabase); - g.subdatabase = NULL; - } - if ((retval = printabletocstring(value, &g.subdatabase))) { - PRINT_ERROR(retval, "error reading db name"); - goto error; - } - continue; - } - if (!strcmp(field, "keys")) { - int32_t temp; - if (strtoint32(value, &temp, 0, 1, 10)) { - PRINT_ERROR(0, - "%s: boolean name=value pairs require a value of 0 or 1", - field); - goto error; - } - g.keys = (bool)temp; - if (!g.keys) { - PRINT_ERRORX("keys=0 not supported"); - goto error; - } - continue; - } - PARSE_COMMON_CONFIGURATIONS(); - - PRINT_ERRORX("unknown input-file header configuration keyword \"%s\"", field); - goto error; - } - if (value) { - /* Restore the field=value format. */ - value[-1] = '='; - value = NULL; - } - r = 0; - -error: - return r; -} - -int open_database() -{ - DB* db = g.db; - int retval; - - int open_flags = 0; - //TODO: Transaction auto commit stuff - //if (TXN_ON(dbenv)) SET_BITS(open_flags, DB_AUTO_COMMIT); - - //Try to see if it exists first. - retval = db->open(db, NULL, g.database, g.subdatabase, g.dbtype, open_flags, 0666); - if (retval == ENOENT) { - //Does not exist and we did not specify a type. - //TODO: Uncomment when DB_UNKNOWN + db->get_type are implemented. - /* - if (g.dbtype == DB_UNKNOWN) { - PRINT_ERRORX("no database type specified"); - goto error; - }*/ - SET_BITS(open_flags, DB_CREATE); - //Try creating it. - retval = db->open(db, NULL, g.database, g.subdatabase, g.dbtype, open_flags, 0666); - } - if (retval != 0) { - PRINT_ERROR(retval, "DB->open: %s", g.database); - goto error; - } - //TODO: Uncomment when DB_UNKNOWN + db->get_type are implemented. - /* - if ((retval = db->get_type(db, &opened_type)) != 0) { - PRINT_ERROR(retval, "DB->get_type"); - goto error; - } - if (opened_type != DB_BTREE) { - PRINT_ERRORX("Unsupported db type %d\n", opened_type); - goto error; - } - if (g.dbtype != DB_UNKNOWN && opened_type != g.dbtype) { - PRINT_ERRORX("DBTYPE %d does not match opened DBTYPE %d.\n", g.dbtype, opened_type); - goto error; - }*/ - return EXIT_SUCCESS; -error: - fprintf(stderr, "Quitting out due to errors.\n"); - return EXIT_FAILURE; -} - -int doublechararray(char** pmem, uint64_t* size) -{ - assert(pmem); - assert(size); - assert(IS_POWER_OF_2(*size)); - - *size <<= 1; - if (*size == 0) { - /* Overflowed uint64_t. */ - PRINT_ERRORX("Line %" PRIu64 ": Line too long.\n", g.linenumber); - goto error; - } - if ((*pmem = (char*)toku_realloc(*pmem, *size)) == NULL) { - PRINT_ERROR(errno, "doublechararray: realloc"); - goto error; - } - return EXIT_SUCCESS; - -error: - return EXIT_FAILURE; -} - -static int get_dbt(DBT* pdbt) -{ - /* Need to store a key and value. */ - static uint64_t datasize[2] = {1 << 10, 1 << 10}; - static int which = 0; - char* datum; - uint64_t idx = 0; - int highch; - int lowch; - - /* *pdbt should have been memset to 0 before being called. */ - which = 1 - which; - if (g.get_dbt.data[which] == NULL && - (g.get_dbt.data[which] = (char*)toku_malloc(datasize[which] * sizeof(char))) == NULL) { - PRINT_ERROR(errno, "get_dbt: malloc"); - goto error; - } - - datum = g.get_dbt.data[which]; - - if (g.plaintext) { - int firstch; - int nextch = EOF; - - for (firstch = getchar(); firstch != EOF; firstch = getchar()) { - switch (firstch) { - case ('\n'): { - /* Done reading this key/value. */ - nextch = EOF; - break; - } - case ('\\'): { - /* Escaped \ or two hex digits. */ - highch = getchar(); - if (highch == '\\') { - nextch = '\\'; - break; - } - else if (highch == EOF) { - g.eof = true; - PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file (2 hex digits per byte).\n", g.linenumber); - goto error; - } - else if (!isxdigit(highch)) { - PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, highch); - goto error; - } - - lowch = getchar(); - if (lowch == EOF) { - g.eof = true; - PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file (2 hex digits per byte).\n", g.linenumber); - goto error; - } - else if (!isxdigit(lowch)) { - PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, lowch); - goto error; - } - - nextch = (hextoint(highch) << 4) | hextoint(lowch); - break; - } - default: { - if (isprint(firstch)) { - nextch = firstch; - break; - } - PRINT_ERRORX("Line %" PRIu64 ": Nonprintable character found.", g.linenumber); - goto error; - } - } - if (nextch == EOF) { - break; - } - if (idx == datasize[which]) { - /* Overflow, double the memory. */ - if (doublechararray(&g.get_dbt.data[which], &datasize[which])) goto error; - datum = g.get_dbt.data[which]; - } - datum[idx] = (char)nextch; - idx++; - } - if (firstch == EOF) g.eof = true; - } - else { - for (highch = getchar(); highch != EOF; highch = getchar()) { - if (highch == '\n') { - /* Done reading this key/value. */ - break; - } - - lowch = getchar(); - if (lowch == EOF) { - g.eof = true; - PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file (2 hex digits per byte).\n", g.linenumber); - goto error; - } - if (!isxdigit(highch)) { - PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, highch); - goto error; - } - if (!isxdigit(lowch)) { - PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, lowch); - goto error; - } - if (idx == datasize[which]) { - /* Overflow, double the memory. */ - if (doublechararray(&g.get_dbt.data[which], &datasize[which])) goto error; - datum = g.get_dbt.data[which]; - } - datum[idx] = (char)((hextoint(highch) << 4) | hextoint(lowch)); - idx++; - } - if (highch == EOF) g.eof = true; - } - - /* Done reading. */ - pdbt->size = idx; - pdbt->data = (void*)datum; - return EXIT_SUCCESS; -error: - return EXIT_FAILURE; -} - -static int insert_pair(DBT* key, DBT* data) -{ - DB* db = g.db; - - int retval = db->put(db, NULL, key, data, g.overwritekeys ? 0 : DB_NOOVERWRITE); - if (retval != 0) { - //TODO: Check for transaction failures/etc.. retry if necessary. - PRINT_ERROR(retval, "DB->put"); - if (!(retval == DB_KEYEXIST && g.overwritekeys)) goto error; - } - return EXIT_SUCCESS; -error: - return EXIT_FAILURE; -} - -int read_keys() -{ - DBT key; - DBT data; - int spacech; - - char footer[sizeof("ATA=END\n")]; - - memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); - - - //TODO: Start transaction/end transaction/abort/retry/etc - - if (!g.leadingspace) { - assert(g.plaintext); - while (!g.eof) { - if (caught_any_signals()) goto success; - g.linenumber++; - if (get_dbt(&key) != 0) goto error; - if (g.eof) { - if (key.size == 0) { - //Last entry had no newline. Done. - break; - } - PRINT_ERRORX("Line %" PRIu64 ": Key exists but value missing.", g.linenumber); - goto error; - } - g.linenumber++; - if (get_dbt(&data) != 0) goto error; - if (insert_pair(&key, &data) != 0) goto error; - } - } - else while (!g.eof) { - if (caught_any_signals()) goto success; - g.linenumber++; - spacech = getchar(); - switch (spacech) { - case (EOF): { - /* Done. */ - g.eof = true; - goto success; - } - case (' '): { - /* Time to read a key. */ - if (get_dbt(&key) != 0) goto error; - break; - } - case ('D'): { - if (fgets(footer, sizeof("ATA=END\n"), stdin) != NULL && - (!strcmp(footer, "ATA=END") || !strcmp(footer, "ATA=END\n"))) - { - goto success; - } - goto unexpectedinput; - } - default: { -unexpectedinput: - PRINT_ERRORX("Line %" PRIu64 ": Unexpected input while reading key.\n", g.linenumber); - goto error; - } - } - - if (g.eof) { - PRINT_ERRORX("Line %" PRIu64 ": Key exists but value missing.", g.linenumber); - goto error; - } - g.linenumber++; - spacech = getchar(); - switch (spacech) { - case (EOF): { - g.eof = true; - PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file while reading value.\n", g.linenumber); - goto error; - } - case (' '): { - /* Time to read a key. */ - if (get_dbt(&data) != 0) goto error; - break; - } - default: { - PRINT_ERRORX("Line %" PRIu64 ": Unexpected input while reading value.\n", g.linenumber); - goto error; - } - } - if (insert_pair(&key, &data) != 0) goto error; - } -success: - return EXIT_SUCCESS; -error: - return EXIT_FAILURE; -} - -int close_database() -{ - DB* db = g.db; - int retval; - - assert(db); - if ((retval = db->close(db, 0)) != 0) { - PRINT_ERROR(retval, "DB->close"); - goto error; - } - return EXIT_SUCCESS; -error: - return EXIT_FAILURE; -} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokuftdump.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokuftdump.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/tools/tokuftdump.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/tools/tokuftdump.cc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,726 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// Dump a fractal tree file + +#include + +#include +#include +#include +#include +#include +#include + +#include "ft/serialize/block_table.h" +#include "ft/cachetable/cachetable.h" +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/node.h" + +static int do_dump_data = 1; +static int do_interactive = 0; +static int do_header = 0; +static int do_fragmentation = 0; +static int do_garbage = 0; +static int do_translation_table = 0; +static int do_rootnode = 0; +static int do_node = 0; +static BLOCKNUM do_node_num; +static int do_tsv = 0; + +static const char *arg0; +static const char *fname; + +static void format_time(const uint64_t time_int, char *buf) { + time_t timer = (time_t) time_int; + ctime_r(&timer, buf); + assert(buf[24] == '\n'); + buf[24] = 0; +} + +static void print_item(const void *val, uint32_t len) { + printf("\""); + uint32_t i; + for (i=0; idbt.size); + simple_hex_dump((unsigned char*) d->dbt.data, d->dbt.size); + printf("\n"); +} + +static void open_header(int fd, FT *header, CACHEFILE cf) { + FT ft = NULL; + int r; + r = toku_deserialize_ft_from (fd, MAX_LSN, &ft); + if (r != 0) { + fprintf(stderr, "%s: can not deserialize from %s error %d\n", arg0, fname, r); + exit(1); + } + assert_zero(r); + ft->cf = cf; + *header = ft; +} + +static void dump_header(FT ft) { + char timestr[26]; + printf("ft:\n"); + printf(" layout_version=%d\n", ft->h->layout_version); + printf(" layout_version_original=%d\n", ft->h->layout_version_original); + printf(" layout_version_read_from_disk=%d\n", ft->layout_version_read_from_disk); + printf(" build_id=%d\n", ft->h->build_id); + printf(" build_id_original=%d\n", ft->h->build_id_original); + format_time(ft->h->time_of_creation, timestr); + printf(" time_of_creation= %" PRIu64 " %s\n", ft->h->time_of_creation, timestr); + format_time(ft->h->time_of_last_modification, timestr); + printf(" time_of_last_modification=%" PRIu64 " %s\n", ft->h->time_of_last_modification, timestr); + printf(" dirty=%d\n", ft->h->dirty); + printf(" checkpoint_count=%" PRId64 "\n", ft->h->checkpoint_count); + printf(" checkpoint_lsn=%" PRId64 "\n", ft->h->checkpoint_lsn.lsn); + printf(" nodesize=%u\n", ft->h->nodesize); + printf(" basementnodesize=%u\n", ft->h->basementnodesize); + printf(" compression_method=%u\n", (unsigned) ft->h->compression_method); + printf(" unnamed_root=%" PRId64 "\n", ft->h->root_blocknum.b); + printf(" flags=%u\n", ft->h->flags); + dump_descriptor(&ft->descriptor); + printf(" estimated numrows=%" PRId64 "\n", ft->in_memory_stats.numrows); + printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes); +} + +static int print_le(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t idx UU(), void *const ai UU()) { + print_klpair(stdout, key, keylen, le); + printf("\n"); + return 0; +} + +static void dump_node(int fd, BLOCKNUM blocknum, FT ft) { + FTNODE n; + FTNODE_DISK_DATA ndd = nullptr; + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + int r = toku_deserialize_ftnode_from (fd, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); + assert_zero(r); + assert(n!=0); + printf("ftnode\n"); + DISKOFF disksize, diskoffset; + ft->blocktable.translate_blocknum_to_offset_size(blocknum, &diskoffset, &disksize); + printf(" diskoffset =%" PRId64 "\n", diskoffset); + printf(" disksize =%" PRId64 "\n", disksize); + printf(" serialize_size =%u\n", toku_serialize_ftnode_size(n)); + printf(" flags =%u\n", n->flags); + printf(" blocknum=%" PRId64 "\n", n->blocknum.b); + //printf(" log_lsn =%lld\n", n->log_lsn.lsn); // The log_lsn is a memory-only value. + printf(" height =%d\n", n->height); + printf(" layout_version=%d\n", n->layout_version); + printf(" layout_version_original=%d\n", n->layout_version_original); + printf(" layout_version_read_from_disk=%d\n", n->layout_version_read_from_disk); + printf(" build_id=%d\n", n->build_id); + printf(" max_msn_applied_to_node_on_disk=%" PRId64 " (0x%" PRIx64 ")\n", n->max_msn_applied_to_node_on_disk.msn, n->max_msn_applied_to_node_on_disk.msn); + printf(" io time %lf decompress time %lf deserialize time %lf\n", + tokutime_to_seconds(bfe.io_time), + tokutime_to_seconds(bfe.decompress_time), + tokutime_to_seconds(bfe.deserialize_time)); + + printf(" n_children=%d\n", n->n_children); + printf(" pivotkeys.total_size()=%u\n", (unsigned) n->pivotkeys.total_size()); + + printf(" pivots:\n"); + for (int i=0; in_children-1; i++) { + const DBT piv = n->pivotkeys.get_pivot(i); + printf(" pivot %2d:", i); + if (n->flags) + printf(" flags=%x ", n->flags); + print_item(piv.data, piv.size); + printf("\n"); + } + printf(" children:\n"); + for (int i=0; in_children; i++) { + printf(" child %d: ", i); + if (n->height > 0) { + printf("%" PRId64 "\n", BP_BLOCKNUM(n, i).b); + NONLEAF_CHILDINFO bnc = BNC(n, i); + unsigned int n_bytes = toku_bnc_nbytesinbuf(bnc); + int n_entries = toku_bnc_n_entries(bnc); + if (n_bytes > 0 || n_entries > 0) { + printf(" buffer contains %u bytes (%d items)\n", n_bytes, n_entries); + } + if (do_dump_data) { + struct dump_data_fn { + int operator()(const ft_msg &msg, bool UU(is_fresh)) { + enum ft_msg_type type = (enum ft_msg_type) msg.type(); + MSN msn = msg.msn(); + XIDS xids = msg.xids(); + const void *key = msg.kdbt()->data; + const void *data = msg.vdbt()->data; + uint32_t keylen = msg.kdbt()->size; + uint32_t datalen = msg.vdbt()->size; + printf(" msn=%" PRIu64 " (0x%" PRIx64 ") ", msn.msn, msn.msn); + printf(" TYPE="); + switch (type) { + case FT_NONE: printf("NONE"); goto ok; + case FT_INSERT: printf("INSERT"); goto ok; + case FT_INSERT_NO_OVERWRITE: printf("INSERT_NO_OVERWRITE"); goto ok; + case FT_DELETE_ANY: printf("DELETE_ANY"); goto ok; + case FT_ABORT_ANY: printf("ABORT_ANY"); goto ok; + case FT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok; + case FT_COMMIT_BROADCAST_ALL: printf("COMMIT_BROADCAST_ALL"); goto ok; + case FT_COMMIT_BROADCAST_TXN: printf("COMMIT_BROADCAST_TXN"); goto ok; + case FT_ABORT_BROADCAST_TXN: printf("ABORT_BROADCAST_TXN"); goto ok; + case FT_OPTIMIZE: printf("OPTIMIZE"); goto ok; + case FT_OPTIMIZE_FOR_UPGRADE: printf("OPTIMIZE_FOR_UPGRADE"); goto ok; + case FT_UPDATE: printf("UPDATE"); goto ok; + case FT_UPDATE_BROADCAST_ALL: printf("UPDATE_BROADCAST_ALL"); goto ok; + } + printf("HUH?"); +ok: + printf(" xid="); + toku_xids_fprintf(stdout, xids); + printf(" "); + print_item(key, keylen); + if (datalen>0) { + printf(" "); + print_item(data, datalen); + } + printf("\n"); + return 0; + } + } dump_fn; + bnc->msg_buffer.iterate(dump_fn); + } + } else { + printf(" n_bytes_in_buffer= %" PRIu64 "", BLB_DATA(n, i)->get_disk_size()); + printf(" items_in_buffer=%u\n", BLB_DATA(n, i)->num_klpairs()); + if (do_dump_data) { + BLB_DATA(n, i)->iterate(NULL); + } + } + } + toku_ftnode_free(&n); + toku_free(ndd); +} + +static void dump_block_translation(FT ft, uint64_t offset) { + ft->blocktable.blocknum_dump_translation(make_blocknum(offset)); +} + +static void dump_fragmentation(int UU(f), FT ft, int tsv) { + int64_t used_space; + int64_t total_space; + ft->blocktable.internal_fragmentation(&total_space, &used_space); + int64_t fragsizes = total_space - used_space; + + if (tsv) { + printf("%" PRId64 "\t%" PRId64 "\t%" PRId64 "\t%.1f\n", used_space, total_space, fragsizes, + 100. * ((double)fragsizes / (double)(total_space))); + } else { + printf("used_size\t%" PRId64 "\n", used_space); + printf("total_size\t%" PRId64 "\n", total_space); + printf("fragsizes\t%" PRId64 "\n", fragsizes); + printf("fragmentation\t%.1f\n", 100. * ((double)fragsizes / (double)(total_space))); + } +} + +typedef struct { + int fd; + FT ft; + uint64_t blocksizes; + uint64_t leafsizes; + uint64_t leafblocks; +} frag_help_extra; + +static int nodesizes_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void *extra) { + frag_help_extra *CAST_FROM_VOIDP(info, extra); + FTNODE n; + FTNODE_DISK_DATA ndd = NULL; + ftnode_fetch_extra bfe; + bfe.create_for_full_read(info->ft); + int r = toku_deserialize_ftnode_from(info->fd, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); + if (r==0) { + info->blocksizes += size; + if (n->height == 0) { + info->leafsizes += size; + info->leafblocks++; + } + toku_ftnode_free(&n); + toku_free(ndd); + } + return 0; +} + +static void dump_nodesizes(int fd, FT ft) { + frag_help_extra info; + memset(&info, 0, sizeof(info)); + info.fd = fd; + info.ft = ft; + ft->blocktable.iterate(block_table::TRANSLATION_CHECKPOINTED, + nodesizes_helper, &info, true, true); + printf("leafblocks\t%" PRIu64 "\n", info.leafblocks); + printf("blocksizes\t%" PRIu64 "\n", info.blocksizes); + printf("leafsizes\t%" PRIu64 "\n", info.leafsizes); +} + +static void dump_garbage_stats(int fd, FT ft) { + assert(fd == toku_cachefile_get_fd(ft->cf)); + uint64_t total_space = 0; + uint64_t used_space = 0; + toku_ft_get_garbage(ft, &total_space, &used_space); + printf("garbage total size\t%" PRIu64 "\n", total_space); + printf("garbage used size\t%" PRIu64 "\n", used_space); +} + +typedef struct __dump_node_extra { + int fd; + FT ft; +} dump_node_extra; + +static int dump_node_wrapper(BLOCKNUM b, int64_t UU(size), int64_t UU(address), void *extra) { + dump_node_extra *CAST_FROM_VOIDP(info, extra); + dump_node(info->fd, b, info->ft); + return 0; +} + +static uint32_t get_unaligned_uint32(unsigned char *p) { + uint32_t n; + memcpy(&n, p, sizeof n); + return n; +} + +struct dump_sub_block { + uint32_t compressed_size; + uint32_t uncompressed_size; + uint32_t xsum; +}; + +static void sub_block_deserialize(struct dump_sub_block *sb, unsigned char *sub_block_header) { + sb->compressed_size = toku_dtoh32(get_unaligned_uint32(sub_block_header+0)); + sb->uncompressed_size = toku_dtoh32(get_unaligned_uint32(sub_block_header+4)); + sb->xsum = toku_dtoh32(get_unaligned_uint32(sub_block_header+8)); +} + +static void verify_block(unsigned char *cp, uint64_t file_offset, uint64_t size) { + // verify the header checksum + const size_t node_header = 8 + sizeof (uint32_t) + sizeof (uint32_t) + sizeof (uint32_t); + + printf("%.8s layout_version=%u %u build=%d\n", cp, get_unaligned_uint32(cp+8), get_unaligned_uint32(cp+12), get_unaligned_uint32(cp+16)); + + unsigned char *sub_block_header = &cp[node_header]; + uint32_t n_sub_blocks = toku_dtoh32(get_unaligned_uint32(&sub_block_header[0])); + uint32_t header_length = node_header + n_sub_blocks * sizeof (struct dump_sub_block); + header_length += sizeof (uint32_t); // CRC + if (header_length > size) { + printf("header length too big: %u\n", header_length); + return; + } + uint32_t header_xsum = toku_x1764_memory(cp, header_length); + uint32_t expected_xsum = toku_dtoh32(get_unaligned_uint32(&cp[header_length])); + if (header_xsum != expected_xsum) { + printf("header checksum failed: %u %u\n", header_xsum, expected_xsum); + return; + } + + // deserialize the sub block header + struct dump_sub_block sub_block[n_sub_blocks]; + sub_block_header += sizeof (uint32_t); + for (uint32_t i = 0 ; i < n_sub_blocks; i++) { + sub_block_deserialize(&sub_block[i], sub_block_header); + sub_block_header += sizeof (struct dump_sub_block); + } + + // verify the sub block header + uint32_t offset = header_length + 4; + for (uint32_t i = 0 ; i < n_sub_blocks; i++) { + uint32_t xsum = toku_x1764_memory(cp + offset, sub_block[i].compressed_size); + printf("%u: %u %u %u", i, sub_block[i].compressed_size, sub_block[i].uncompressed_size, sub_block[i].xsum); + if (xsum != sub_block[i].xsum) + printf(" fail %u offset %" PRIu64, xsum, file_offset + offset); + printf("\n"); + offset += sub_block[i].compressed_size; + } + if (offset != size) + printf("offset %u expected %" PRIu64 "\n", offset, size); +} + +static void dump_block(int fd, BLOCKNUM blocknum, FT ft) { + DISKOFF offset, size; + ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); + printf("%" PRId64 " at %" PRId64 " size %" PRId64 "\n", blocknum.b, offset, size); + + unsigned char *CAST_FROM_VOIDP(vp, toku_malloc(size)); + uint64_t r = pread(fd, vp, size, offset); + if (r == (uint64_t)size) { + verify_block(vp, offset, size); + } + toku_free(vp); +} + +static void dump_file(int fd, uint64_t offset, uint64_t size, FILE *outfp) { + unsigned char *XMALLOC_N(size, vp); + uint64_t r = pread(fd, vp, size, offset); + if (r == size) { + if (outfp == stdout) { + hex_dump(vp, offset, size); + } else { + size_t wrote = fwrite(vp, size, 1, outfp); + assert(wrote == 1); + } + } + toku_free(vp); +} + +static void set_file(int fd, uint64_t offset, unsigned char newc) { + toku_os_pwrite(fd, &newc, sizeof newc, offset); +} + +static int readline(char *line, int maxline) { + int i = 0; + int c; + while ((c = getchar()) != EOF && c != '\n' && i < maxline) { + line[i++] = (char)c; + } + line[i++] = 0; + return c == EOF ? EOF : i; +} + +static int split_fields(char *line, char *fields[], int maxfields) { + int i; + for (i=0; i"); fflush(stdout); + enum { maxline = 64}; + char line[maxline+1]; + int r = readline(line, maxline); + if (r == EOF) + break; + const int maxfields = 4; + char *fields[maxfields]; + int nfields = split_fields(line, fields, maxfields); + if (nfields == 0) + continue; + if (strcmp(fields[0], "help") == 0) { + interactive_help(); + } else if (strcmp(fields[0], "header") == 0) { + toku_ft_free(ft); + open_header(fd, &ft, cf); + dump_header(ft); + } else if (strcmp(fields[0], "block") == 0 && nfields == 2) { + BLOCKNUM blocknum = make_blocknum(getuint64(fields[1])); + dump_block(fd, blocknum, ft); + } else if (strcmp(fields[0], "node") == 0 && nfields == 2) { + BLOCKNUM off = make_blocknum(getuint64(fields[1])); + dump_node(fd, off, ft); + } else if (strcmp(fields[0], "dumpdata") == 0 && nfields == 2) { + do_dump_data = strtol(fields[1], NULL, 10); + } else if (strcmp(fields[0], "block_translation") == 0 || strcmp(fields[0], "bx") == 0) { + uint64_t offset = 0; + if (nfields == 2) + offset = getuint64(fields[1]); + dump_block_translation(ft, offset); + } else if (strcmp(fields[0], "fragmentation") == 0) { + dump_fragmentation(fd, ft, do_tsv); + } else if (strcmp(fields[0], "nodesizes") == 0) { + dump_nodesizes(fd, ft); + } else if (strcmp(fields[0], "garbage") == 0) { + dump_garbage_stats(fd, ft); + } else if (strcmp(fields[0], "file") == 0 && nfields >= 3) { + uint64_t offset = getuint64(fields[1]); + uint64_t size = getuint64(fields[2]); + FILE *outfp = stdout; + if (nfields >= 4) + outfp = fopen(fields[3], "w"); + dump_file(fd, offset, size, outfp); + } else if (strcmp(fields[0], "setfile") == 0 && nfields == 3) { + uint64_t offset = getuint64(fields[1]); + unsigned char newc = getuint64(fields[2]); + set_file(fd, offset, newc); + } else if (strcmp(fields[0], "quit") == 0 || strcmp(fields[0], "q") == 0) { + break; + } + } +} + +static int usage(void) { + fprintf(stderr, "Usage: %s ", arg0); + fprintf(stderr, "--interactive "); + fprintf(stderr, "--nodata "); + fprintf(stderr, "--dumpdata 0|1 "); + fprintf(stderr, "--header "); + fprintf(stderr, "--rootnode "); + fprintf(stderr, "--node N "); + fprintf(stderr, "--fragmentation "); + fprintf(stderr, "--garbage "); + fprintf(stderr, "--tsv "); + fprintf(stderr, "--translation-table "); + fprintf(stderr, "--tsv "); + fprintf(stderr, "filename \n"); + return 1; +} + +int main (int argc, const char *const argv[]) { + arg0 = argv[0]; + argc--; argv++; + while (argc>0) { + if (strcmp(argv[0], "--interactive") == 0 || strcmp(argv[0], "--i") == 0) { + do_interactive = 1; + } else if (strcmp(argv[0], "--nodata") == 0) { + do_dump_data = 0; + } else if (strcmp(argv[0], "--dumpdata") == 0 && argc > 1) { + argc--; argv++; + do_dump_data = atoi(argv[0]); + } else if (strcmp(argv[0], "--header") == 0) { + do_header = 1; + } else if (strcmp(argv[0], "--rootnode") == 0) { + do_rootnode = 1; + } else if (strcmp(argv[0], "--node") == 0 && argc > 1) { + argc--; argv++; + do_node = 1; + do_node_num = make_blocknum(getuint64(argv[0])); + } else if (strcmp(argv[0], "--fragmentation") == 0) { + do_fragmentation = 1; + } else if (strcmp(argv[0], "--garbage") == 0) { + do_garbage = 1; + } else if (strcmp(argv[0], "--tsv") == 0) { + do_tsv = 1; + } else if (strcmp(argv[0], "--translation-table") == 0) { + do_translation_table = 1; + } else if (strcmp(argv[0], "--help") == 0 || strcmp(argv[0], "-?") == 0 || strcmp(argv[0], "-h") == 0) { + return usage(); + } else { + break; + } + argc--; argv++; + } + if (argc != 1) + return usage(); + + int r = toku_ft_layer_init(); + assert_zero(r); + + fname = argv[0]; + int fd = open(fname, O_RDWR + O_BINARY); + if (fd < 0) { + fprintf(stderr, "%s: can not open %s errno %d\n", arg0, fname, errno); + return 1; + } + + // create a cachefile for the header + CACHETABLE ct = NULL; + toku_cachetable_create(&ct, 1<<25, (LSN){0}, 0); + + CACHEFILE cf = NULL; + r = toku_cachetable_openfd (&cf, ct, fd, fname); + assert_zero(r); + + FT ft = NULL; + open_header(fd, &ft, cf); + + if (do_interactive) { + run_iteractive_loop(fd, ft, cf); + } else { + if (do_header) { + dump_header(ft); + } + if (do_rootnode) { + dump_node(fd, ft->h->root_blocknum, ft); + } + if (do_node) { + dump_node(fd, do_node_num, ft); + } + if (do_fragmentation) { + dump_fragmentation(fd, ft, do_tsv); + } + if (do_translation_table) { + ft->blocktable.dump_translation_table_pretty(stdout); + } + if (do_garbage) { + dump_garbage_stats(fd, ft); + } + if (!do_header && !do_rootnode && !do_fragmentation && !do_translation_table && !do_garbage) { + printf("Block translation:"); + ft->blocktable.dump_translation_table(stdout); + + dump_header(ft); + + struct __dump_node_extra info; + info.fd = fd; + info.ft = ft; + ft->blocktable.iterate(block_table::TRANSLATION_CHECKPOINTED, + dump_node_wrapper, &info, true, true); + } + } + toku_cachefile_close(&cf, false, ZERO_LSN); + toku_cachetable_close(&ct); + toku_ft_free(ft); + toku_ft_layer_destroy(); + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/bytestring.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/bytestring.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/bytestring.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/bytestring.h 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,96 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#include "portability/toku_stdint.h" + +struct BYTESTRING { + uint32_t len; + char *data; +}; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/circular_buffer.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/circular_buffer.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/circular_buffer.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/circular_buffer.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/circular_buffer.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/circular_buffer.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/circular_buffer.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/circular_buffer.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_CIRCULAR_BUFFER_H -#define UTIL_CIRCULAR_BUFFER_H - #include #include #include @@ -210,5 +209,3 @@ } #include "circular_buffer.cc" - -#endif // UTIL_CIRCULAR_BUFFER_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/CMakeLists.txt mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/CMakeLists.txt 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/CMakeLists.txt 2014-10-08 13:19:52.000000000 +0000 @@ -1,10 +1,13 @@ set(util_srcs context + dbt frwlock kibbutz memarena mempool + minicron partitioned_counter + queue threadpool scoped_malloc x1764 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/constexpr.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/constexpr.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/constexpr.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/constexpr.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,11 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#pragma once - constexpr char UU() static_tolower(const char a) { return a >= 'A' && a <= 'Z' ? a - 'A' + 'a' : a; } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/context.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/context.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/context.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/context.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2014 Tokutek, Inc. DISCLAIMER: @@ -121,7 +121,7 @@ // engine status static struct context_status context_status; -#define CONTEXT_STATUS_INIT(key, legend) TOKUDB_STATUS_INIT(context_status, key, nullptr, PARCOUNT, "context: " legend, TOKU_ENGINE_STATUS) +#define CONTEXT_STATUS_INIT(key, legend) TOKUFT_STATUS_INIT(context_status, key, nullptr, PARCOUNT, "context: " legend, TOKU_ENGINE_STATUS) static void context_status_init(void) { diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/context.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/context.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/context.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/context.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2014 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/dbt.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/dbt.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/dbt.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/dbt.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,343 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include + +#include "portability/memory.h" + +#include "util/dbt.h" + +DBT *toku_init_dbt(DBT *dbt) { + memset(dbt, 0, sizeof(*dbt)); + return dbt; +} + +DBT toku_empty_dbt(void) { + static const DBT empty_dbt = { .data = 0, .size = 0, .ulen = 0, .flags = 0 }; + return empty_dbt; +} + +DBT *toku_init_dbt_flags(DBT *dbt, uint32_t flags) { + toku_init_dbt(dbt); + dbt->flags = flags; + return dbt; +} + +DBT_ARRAY *toku_dbt_array_init(DBT_ARRAY *dbts, uint32_t size) { + uint32_t capacity = 1; + while (capacity < size) { capacity *= 2; } + + XMALLOC_N(capacity, dbts->dbts); + for (uint32_t i = 0; i < capacity; i++) { + toku_init_dbt_flags(&dbts->dbts[i], DB_DBT_REALLOC); + } + dbts->size = size; + dbts->capacity = capacity; + return dbts; +} + +void toku_dbt_array_resize(DBT_ARRAY *dbts, uint32_t size) { + if (size != dbts->size) { + if (size > dbts->capacity) { + const uint32_t old_capacity = dbts->capacity; + uint32_t new_capacity = dbts->capacity; + while (new_capacity < size) { + new_capacity *= 2; + } + dbts->capacity = new_capacity; + XREALLOC_N(new_capacity, dbts->dbts); + for (uint32_t i = old_capacity; i < new_capacity; i++) { + toku_init_dbt_flags(&dbts->dbts[i], DB_DBT_REALLOC); + } + } else if (size < dbts->size) { + if (dbts->capacity >= 8 && size < dbts->capacity / 4) { + const int old_capacity = dbts->capacity; + const int new_capacity = dbts->capacity / 2; + for (int i = new_capacity; i < old_capacity; i++) { + toku_destroy_dbt(&dbts->dbts[i]); + } + XREALLOC_N(new_capacity, dbts->dbts); + dbts->capacity = new_capacity; + } + } + dbts->size = size; + } +} + +void toku_dbt_array_destroy_shallow(DBT_ARRAY *dbts) { + toku_free(dbts->dbts); + ZERO_STRUCT(*dbts); +} + +void toku_dbt_array_destroy(DBT_ARRAY *dbts) { + for (uint32_t i = 0; i < dbts->capacity; i++) { + toku_destroy_dbt(&dbts->dbts[i]); + } + toku_dbt_array_destroy_shallow(dbts); +} + + + +void toku_destroy_dbt(DBT *dbt) { + switch (dbt->flags) { + case DB_DBT_MALLOC: + case DB_DBT_REALLOC: + toku_free(dbt->data); + toku_init_dbt(dbt); + break; + } +} + +DBT *toku_fill_dbt(DBT *dbt, const void *k, uint32_t len) { + toku_init_dbt(dbt); + dbt->size=len; + dbt->data=(char*)k; + return dbt; +} + +DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len) { + toku_init_dbt_flags(dbt, DB_DBT_MALLOC); + dbt->size = len; + dbt->data = toku_xmemdup(k, len); + return dbt; +} + +DBT *toku_copyref_dbt(DBT *dst, const DBT src) { + dst->flags = 0; + dst->ulen = 0; + dst->size = src.size; + dst->data = src.data; + return dst; +} + +DBT *toku_clone_dbt(DBT *dst, const DBT &src) { + return toku_memdup_dbt(dst, src.data, src.size); +} + +void +toku_sdbt_cleanup(struct simple_dbt *sdbt) { + if (sdbt->data) toku_free(sdbt->data); + memset(sdbt, 0, sizeof(*sdbt)); +} + +static inline int sdbt_realloc(struct simple_dbt *sdbt) { + void *new_data = toku_realloc(sdbt->data, sdbt->len); + int r; + if (new_data == NULL) { + r = get_error_errno(); + } else { + sdbt->data = new_data; + r = 0; + } + return r; +} + +static inline int dbt_realloc(DBT *dbt) { + void *new_data = toku_realloc(dbt->data, dbt->ulen); + int r; + if (new_data == NULL) { + r = get_error_errno(); + } else { + dbt->data = new_data; + r = 0; + } + return r; +} + +// sdbt is the static value used when flags==0 +// Otherwise malloc or use the user-supplied memory, as according to the flags in d->flags. +int toku_dbt_set(uint32_t len, const void *val, DBT *d, struct simple_dbt *sdbt) { + int r; + if (d == nullptr) { + r = 0; + } else { + switch (d->flags) { + case (DB_DBT_USERMEM): + d->size = len; + if (d->ulendata, val, len); + r = 0; + } + break; + case (DB_DBT_MALLOC): + d->data = NULL; + d->ulen = 0; + //Fall through to DB_DBT_REALLOC + case (DB_DBT_REALLOC): + if (d->ulen < len) { + d->ulen = len*2; + r = dbt_realloc(d); + } + else if (d->ulen > 16 && d->ulen > len*4) { + d->ulen = len*2 < 16 ? 16 : len*2; + r = dbt_realloc(d); + } + else if (d->data==NULL) { + d->ulen = len; + r = dbt_realloc(d); + } + else r=0; + + if (r==0) { + memcpy(d->data, val, len); + d->size = len; + } + break; + case (0): + if (sdbt->len < len) { + sdbt->len = len*2; + r = sdbt_realloc(sdbt); + } + else if (sdbt->len > 16 && sdbt->len > len*4) { + sdbt->len = len*2 < 16 ? 16 : len*2; + r = sdbt_realloc(sdbt); + } + else r=0; + + if (r==0) { + memcpy(sdbt->data, val, len); + d->data = sdbt->data; + d->size = len; + } + break; + default: + r = EINVAL; + break; + } + } + return r; +} + +const DBT *toku_dbt_positive_infinity(void) { + static DBT positive_infinity_dbt = {}; + return &positive_infinity_dbt; +} + +const DBT *toku_dbt_negative_infinity(void) { + static DBT negative_infinity_dbt = {}; + return &negative_infinity_dbt; +} + +bool toku_dbt_is_infinite(const DBT *dbt) { + return dbt == toku_dbt_positive_infinity() || dbt == toku_dbt_negative_infinity(); +} + +bool toku_dbt_is_empty(const DBT *dbt) { + // can't have a null data field with a non-zero size + paranoid_invariant(dbt->data != nullptr || dbt->size == 0); + return dbt->data == nullptr; +} + +int toku_dbt_infinite_compare(const DBT *a, const DBT *b) { + if (a == b) { + return 0; + } else if (a == toku_dbt_positive_infinity()) { + return 1; + } else if (b == toku_dbt_positive_infinity()) { + return -1; + } else if (a == toku_dbt_negative_infinity()) { + return -1; + } else { + invariant(b == toku_dbt_negative_infinity()); + return 1; + } +} + +bool toku_dbt_equals(const DBT *a, const DBT *b) { + if (!toku_dbt_is_infinite(a) && !toku_dbt_is_infinite(b)) { + return a->data == b->data && a->size == b->size; + } else { + // a or b is infinite, so they're equal if they are the same infinite + return a == b ? true : false; + } +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/dbt.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/dbt.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/dbt.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/dbt.h 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,141 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +// TODO: John +// Document this API a little better so that DBT +// memory management can be morm widely understood. + +DBT *toku_init_dbt(DBT *); + +// returns: an initialized but empty dbt (for which toku_dbt_is_empty() is true) +DBT toku_empty_dbt(void); + +DBT *toku_init_dbt_flags(DBT *, uint32_t flags); + +void toku_destroy_dbt(DBT *); + +DBT *toku_fill_dbt(DBT *dbt, const void *k, uint32_t len); + +DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len); + +DBT *toku_copyref_dbt(DBT *dst, const DBT src); + +DBT *toku_clone_dbt(DBT *dst, const DBT &src); + +int toku_dbt_set(uint32_t len, const void *val, DBT *d, struct simple_dbt *sdbt); + +int toku_dbt_set_value(DBT *, const void **val, uint32_t vallen, void **staticptrp, bool dbt1_disposable); + +void toku_sdbt_cleanup(struct simple_dbt *sdbt); + +// returns: special DBT pointer representing positive infinity +const DBT *toku_dbt_positive_infinity(void); + +// returns: special DBT pointer representing negative infinity +const DBT *toku_dbt_negative_infinity(void); + +// returns: true if the given dbt is either positive or negative infinity +bool toku_dbt_is_infinite(const DBT *dbt); + +// returns: true if the given dbt has no data (ie: dbt->data == nullptr) +bool toku_dbt_is_empty(const DBT *dbt); + +// effect: compares two potentially infinity-valued dbts +// requires: at least one is infinite (assert otherwise) +int toku_dbt_infinite_compare(const DBT *a, const DBT *b); + +// returns: true if the given dbts have the same data pointer and size +bool toku_dbt_equals(const DBT *a, const DBT *b); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/dmt.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/dmt.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/dmt.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/dmt.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -130,7 +130,7 @@ toku_mempool_construct(&this->mp, aligned_memsize); if (aligned_memsize > 0) { paranoid_invariant(numvalues > 0); - void *ptr = toku_mempool_malloc(&this->mp, aligned_memsize, 1); + void *ptr = toku_mempool_malloc(&this->mp, aligned_memsize); paranoid_invariant_notnull(ptr); uint8_t * const CAST_FROM_VOIDP(dest, ptr); const uint8_t * const CAST_FROM_VOIDP(src, mem); @@ -261,7 +261,7 @@ paranoid_invariant(this->values_same_size); this->d.a.num_values++; - void *ptr = toku_mempool_malloc(&this->mp, align(this->value_length), 1); + void *ptr = toku_mempool_malloc(&this->mp, align(this->value_length)); paranoid_invariant_notnull(ptr); paranoid_invariant(reinterpret_cast(ptr) % ALIGNMENT == 0); dmtdata_t *CAST_FROM_VOIDP(n, ptr); @@ -302,7 +302,7 @@ paranoid_invariant(copy_bytes <= toku_mempool_get_used_size(&this->mp)); // Copy over to new mempool if (this->d.a.num_values > 0) { - void* dest = toku_mempool_malloc(&new_kvspace, copy_bytes, 1); + void* dest = toku_mempool_malloc(&new_kvspace, copy_bytes); invariant(dest!=nullptr); memcpy(dest, get_array_value(0), copy_bytes); } @@ -344,7 +344,7 @@ const uint32_t fixed_aligned_len = align(this->value_length); size_t mem_needed = num_values * fixed_aligned_len; toku_mempool_construct(&new_mp, mem_needed); - uint8_t* CAST_FROM_VOIDP(dest, toku_mempool_malloc(&new_mp, mem_needed, 1)); + uint8_t* CAST_FROM_VOIDP(dest, toku_mempool_malloc(&new_mp, mem_needed)); paranoid_invariant_notnull(dest); for (uint32_t i = 0; i < num_values; i++) { const dmt_node &n = get_node(tmp_array[i]); @@ -588,7 +588,7 @@ size_t val_size = value.get_size(); size_t size_to_alloc = __builtin_offsetof(dmt_node, value) + val_size; size_to_alloc = align(size_to_alloc); - void* np = toku_mempool_malloc(&this->mp, size_to_alloc, 1); + void* np = toku_mempool_malloc(&this->mp, size_to_alloc); paranoid_invariant_notnull(np); dmt_node *CAST_FROM_VOIDP(n, np); node_set_value(n, value); @@ -645,7 +645,7 @@ dmt_node &node = get_node(tmp_array[i]); const size_t bytes_to_copy = __builtin_offsetof(dmt_node, value) + node.value_length; const size_t bytes_to_alloc = align(bytes_to_copy); - void* newdata = toku_mempool_malloc(&new_kvspace, bytes_to_alloc, 1); + void* newdata = toku_mempool_malloc(&new_kvspace, bytes_to_alloc); memcpy(newdata, &node, bytes_to_copy); tmp_array[i] = toku_mempool_get_offset_from_pointer_and_base(&new_kvspace, newdata); } @@ -1251,7 +1251,7 @@ invariant_zero(toku_mempool_get_frag_size(&this->temp.mp)); struct mempool new_mp; toku_mempool_construct(&new_mp, used); - void * newbase = toku_mempool_malloc(&new_mp, used, 1); + void * newbase = toku_mempool_malloc(&new_mp, used); invariant_notnull(newbase); memcpy(newbase, toku_mempool_get_base(&this->temp.mp), used); toku_mempool_destroy(&this->temp.mp); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/dmt.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/dmt.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/dmt.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/dmt.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#pragma once /* COPYING CONDITIONS NOTICE: @@ -30,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,17 +85,22 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include -#include -#include -#include "growable_array.h" -#include "../ft/wbuf.h" #include +#include "portability/memory.h" +#include "portability/toku_portability.h" +#include "portability/toku_race_tools.h" +#include "portability/toku_stdint.h" + +#include "ft/serialize/wbuf.h" +#include "util/growable_array.h" +#include "util/mempool.h" + namespace toku { typedef uint32_t node_offset; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/doubly_linked_list.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/doubly_linked_list.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/doubly_linked_list.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/doubly_linked_list.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_DOUBLY_LINKED_LIST_H -#define UTIL_DOUBLY_LINKED_LIST_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -225,5 +225,3 @@ } } - -#endif // UTIL_DOUBLY_LINKED_LIST_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/fmutex.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/fmutex.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/fmutex.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/fmutex.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,5 +1,4 @@ -#ifndef FMUTEX_H -#define FMUTEX_H +#pragma once // fair mutex struct fmutex { @@ -105,5 +104,3 @@ int fmutex_blocked_users(struct fmutex *fm) const { return fm->num_want_mutex; } - -#endif // FMUTEX_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/frwlock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/frwlock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/frwlock.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/frwlock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/frwlock.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/frwlock.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/frwlock.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/frwlock.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_FRWLOCK_H -#define UTIL_FRWLOCK_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -176,5 +176,3 @@ // include the implementation here // #include "frwlock.cc" - -#endif // UTIL_FRWLOCK_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/growable_array.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/growable_array.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/growable_array.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/growable_array.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_GROWABLE_ARRAY_H -#define UTIL_GROWABLE_ARRAY_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -175,5 +175,3 @@ }; } - -#endif // UTIL_GROWABLE_ARRAY_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/kibbutz.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/kibbutz.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/kibbutz.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/kibbutz.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/kibbutz.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/kibbutz.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/kibbutz.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/kibbutz.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_KIBBUTZ_H -#define UTIL_KIBBUTZ_H - // // The kibbutz is another threadpool meant to do arbitrary work. // @@ -116,5 +115,3 @@ // destroys the kibbutz // void toku_kibbutz_destroy (KIBBUTZ k); - -#endif // UTIL_KIBBUTZ_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/memarena.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/memarena.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/memarena.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/memarena.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,157 +89,142 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#include #include #include #include -struct memarena { - char *buf; - size_t buf_used, buf_size; - size_t size_of_other_bufs; // the buf_size of all the other bufs. - size_t footprint_of_other_bufs; // the footprint of all the other bufs. - char **other_bufs; - int n_other_bufs; -}; - -MEMARENA toku_memarena_create_presized (size_t initial_size) { - MEMARENA XMALLOC(result); - result->buf_size = initial_size; - result->buf_used = 0; - result->other_bufs = NULL; - result->size_of_other_bufs = 0; - result->footprint_of_other_bufs = 0; - result->n_other_bufs = 0; - XMALLOC_N(result->buf_size, result->buf); - return result; -} - -MEMARENA toku_memarena_create (void) { - return toku_memarena_create_presized(1024); -} - -void toku_memarena_clear (MEMARENA ma) { - // Free the other bufs. - int i; - for (i=0; in_other_bufs; i++) { - toku_free(ma->other_bufs[i]); - ma->other_bufs[i]=0; - } - ma->n_other_bufs=0; - // But reuse the main buffer - ma->buf_used = 0; - ma->size_of_other_bufs = 0; - ma->footprint_of_other_bufs = 0; -} - -static size_t -round_to_page (size_t size) { - const size_t _PAGE_SIZE = 4096; - const size_t result = _PAGE_SIZE+((size-1)&~(_PAGE_SIZE-1)); - assert(0==(result&(_PAGE_SIZE-1))); // make sure it's aligned - assert(result>=size); // make sure it's not too small - assert(result 0) { + XMALLOC_N(_current_chunk.size, _current_chunk.buf); + } +} + +void memarena::destroy(void) { + if (_current_chunk.buf) { + toku_free(_current_chunk.buf); + } + for (int i = 0; i < _n_other_chunks; i++) { + toku_free(_other_chunks[i].buf); + } + if (_other_chunks) { + toku_free(_other_chunks); + } + _current_chunk = arena_chunk(); + _other_chunks = nullptr; + _n_other_chunks = 0; +} + +static size_t round_to_page(size_t size) { + const size_t page_size = 4096; + const size_t r = page_size + ((size - 1) & ~(page_size - 1)); + assert((r & (page_size - 1)) == 0); // make sure it's aligned + assert(r >= size); // make sure it's not too small + assert(r < size + page_size); // make sure we didn't grow by more than a page. + return r; } -void* toku_memarena_malloc (MEMARENA ma, size_t size) { - if (ma->buf_size < ma->buf_used + size) { +static const size_t MEMARENA_MAX_CHUNK_SIZE = 64 * 1024 * 1024; + +void *memarena::malloc_from_arena(size_t size) { + if (_current_chunk.buf == nullptr || _current_chunk.size < _current_chunk.used + size) { // The existing block isn't big enough. // Add the block to the vector of blocks. - if (ma->buf) { - int old_n = ma->n_other_bufs; - REALLOC_N(old_n+1, ma->other_bufs); - assert(ma->other_bufs); - ma->other_bufs[old_n]=ma->buf; - ma->n_other_bufs = old_n+1; - ma->size_of_other_bufs += ma->buf_size; - ma->footprint_of_other_bufs += toku_memory_footprint(ma->buf, ma->buf_used); + if (_current_chunk.buf) { + invariant(_current_chunk.size > 0); + int old_n = _n_other_chunks; + XREALLOC_N(old_n + 1, _other_chunks); + _other_chunks[old_n] = _current_chunk; + _n_other_chunks = old_n + 1; + _size_of_other_chunks += _current_chunk.size; + _footprint_of_other_chunks += toku_memory_footprint(_current_chunk.buf, _current_chunk.used); } - // Make a new one - { - size_t new_size = 2*ma->buf_size; - if (new_sizebuf); - ma->buf_used = 0; - ma->buf_size = new_size; + + // Make a new one. Grow the buffer size exponentially until we hit + // the max chunk size, but make it at least `size' bytes so the + // current allocation always fit. + size_t new_size = std::min(MEMARENA_MAX_CHUNK_SIZE, 2 * _current_chunk.size); + if (new_size < size) { + new_size = size; } + new_size = round_to_page(new_size); // at least size, but round to the next page size + XMALLOC_N(new_size, _current_chunk.buf); + _current_chunk.used = 0; + _current_chunk.size = new_size; } + invariant(_current_chunk.buf != nullptr); + // allocate in the existing block. - char *result=ma->buf+ma->buf_used; - ma->buf_used+=size; - return result; + char *p = _current_chunk.buf + _current_chunk.used; + _current_chunk.used += size; + return p; } -void *toku_memarena_memdup (MEMARENA ma, const void *v, size_t len) { - void *r=toku_memarena_malloc(ma, len); - memcpy(r,v,len); - return r; +void memarena::move_memory(memarena *dest) { + // Move memory to dest + XREALLOC_N(dest->_n_other_chunks + _n_other_chunks + 1, dest->_other_chunks); + dest->_size_of_other_chunks += _size_of_other_chunks + _current_chunk.size; + dest->_footprint_of_other_chunks += _footprint_of_other_chunks + toku_memory_footprint(_current_chunk.buf, _current_chunk.used); + for (int i = 0; i < _n_other_chunks; i++) { + dest->_other_chunks[dest->_n_other_chunks++] = _other_chunks[i]; + } + dest->_other_chunks[dest->_n_other_chunks++] = _current_chunk; + + // Clear out this memarena's memory + toku_free(_other_chunks); + _current_chunk = arena_chunk(); + _other_chunks = nullptr; + _size_of_other_chunks = 0; + _footprint_of_other_chunks = 0; + _n_other_chunks = 0; +} + +size_t memarena::total_memory_size(void) const { + return sizeof(*this) + + total_size_in_use() + + _n_other_chunks * sizeof(*_other_chunks); +} + +size_t memarena::total_size_in_use(void) const { + return _size_of_other_chunks + _current_chunk.used; +} + +size_t memarena::total_footprint(void) const { + return sizeof(*this) + + _footprint_of_other_chunks + + toku_memory_footprint(_current_chunk.buf, _current_chunk.used) + + _n_other_chunks * sizeof(*_other_chunks); } -void toku_memarena_destroy(MEMARENA *map) { - MEMARENA ma=*map; - if (ma->buf) { - toku_free(ma->buf); - ma->buf=0; - } - int i; - for (i=0; in_other_bufs; i++) { - toku_free(ma->other_bufs[i]); - } - if (ma->other_bufs) toku_free(ma->other_bufs); - ma->other_bufs=0; - ma->n_other_bufs=0; - toku_free(ma); - *map = 0; -} - -void toku_memarena_move_buffers(MEMARENA dest, MEMARENA source) { - int i; - char **other_bufs = dest->other_bufs; - static int move_counter = 0; - move_counter++; - REALLOC_N(dest->n_other_bufs + source->n_other_bufs + 1, other_bufs); - - dest ->size_of_other_bufs += source->size_of_other_bufs + source->buf_size; - dest ->footprint_of_other_bufs += source->footprint_of_other_bufs + toku_memory_footprint(source->buf, source->buf_used); - source->size_of_other_bufs = 0; - source->footprint_of_other_bufs = 0; - - assert(other_bufs); - dest->other_bufs = other_bufs; - for (i=0; in_other_bufs; i++) { - dest->other_bufs[dest->n_other_bufs++] = source->other_bufs[i]; - } - dest->other_bufs[dest->n_other_bufs++] = source->buf; - source->n_other_bufs = 0; - toku_free(source->other_bufs); - source->other_bufs = 0; - source->buf = 0; - source->buf_size = 0; - source->buf_used = 0; - -} - -size_t -toku_memarena_total_memory_size (MEMARENA m) -{ - return (toku_memarena_total_size_in_use(m) + - sizeof(*m) + - m->n_other_bufs * sizeof(*m->other_bufs)); -} - -size_t -toku_memarena_total_size_in_use (MEMARENA m) -{ - return m->size_of_other_bufs + m->buf_used; -} - -size_t -toku_memarena_total_footprint (MEMARENA m) -{ - return m->footprint_of_other_bufs + toku_memory_footprint(m->buf, m->buf_used) + - sizeof(*m) + - m->n_other_bufs * sizeof(*m->other_bufs); +//////////////////////////////////////////////////////////////////////////////// + +const void *memarena::chunk_iterator::current(size_t *used) const { + if (_chunk_idx < 0) { + *used = _ma->_current_chunk.used; + return _ma->_current_chunk.buf; + } else if (_chunk_idx < _ma->_n_other_chunks) { + *used = _ma->_other_chunks[_chunk_idx].used; + return _ma->_other_chunks[_chunk_idx].buf; + } + *used = 0; + return nullptr; +} + +void memarena::chunk_iterator::next() { + _chunk_idx++; +} + +bool memarena::chunk_iterator::more() const { + if (_chunk_idx < 0) { + return _ma->_current_chunk.buf != nullptr; + } + return _chunk_idx < _ma->_n_other_chunks; } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/memarena.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/memarena.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/memarena.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/memarena.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_MEMARENA_H -#define TOKU_MEMARENA_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,48 +87,90 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -/* We have too many memory management tricks: - * memarena (this code) is for a collection of objects that cannot be moved. - * The pattern is allocate more and more stuff. - * Don't free items as you go. - * Free all the items at once. - * Then reuse the same buffer again. - * Allocated objects never move. - * A memarena (as currently implemented) is not suitable for interprocess memory sharing. No reason it couldn't be made to work though. +/* + * A memarena is used to efficiently store a collection of objects that never move + * The pattern is allocate more and more stuff and free all of the items at once. + * The underlying memory will store 1 or more objects per chunk. Each chunk is + * contiguously laid out in memory but chunks are not necessarily contiguous with + * each other. */ +class memarena { +public: + memarena() : + _current_chunk(arena_chunk()), + _other_chunks(nullptr), + _n_other_chunks(0), + _size_of_other_chunks(0), + _footprint_of_other_chunks(0) { + } + + // Effect: Create a memarena with the specified initial size + void create(size_t initial_size); + + void destroy(void); + + // Effect: Allocate some memory. The returned value remains valid until the memarena is cleared or closed. + // In case of ENOMEM, aborts. + void *malloc_from_arena(size_t size); + + // Effect: Move all the memory from this memarena into DEST. + // When SOURCE is closed the memory won't be freed. + // When DEST is closed, the memory will be freed, unless DEST moves its memory to another memarena... + void move_memory(memarena *dest); + + // Effect: Calculate the amount of memory used by a memory arena. + size_t total_memory_size(void) const; + + // Effect: Calculate the used space of the memory arena (ie: excludes unused space) + size_t total_size_in_use(void) const; + + // Effect: Calculate the amount of memory used, according to toku_memory_footprint(), + // which is a more expensive but more accurate count of memory used. + size_t total_footprint(void) const; + + // iterator over the underlying chunks that store objects in the memarena. + // a chunk is represented by a pointer to const memory and a usable byte count. + class chunk_iterator { + public: + chunk_iterator(const memarena *ma) : + _ma(ma), _chunk_idx(-1) { + } + + // returns: base pointer to the current chunk + // *used set to the number of usable bytes + // if more() is false, returns nullptr and *used = 0 + const void *current(size_t *used) const; + + // requires: more() is true + void next(); + + bool more() const; + + private: + // -1 represents the 'initial' chunk in a memarena, ie: ma->_current_chunk + // >= 0 represents the i'th chunk in the ma->_other_chunks array + const memarena *_ma; + int _chunk_idx; + }; + +private: + struct arena_chunk { + arena_chunk() : buf(nullptr), used(0), size(0) { } + char *buf; + size_t used; + size_t size; + }; + + struct arena_chunk _current_chunk; + struct arena_chunk *_other_chunks; + int _n_other_chunks; + size_t _size_of_other_chunks; // the buf_size of all the other chunks. + size_t _footprint_of_other_chunks; // the footprint of all the other chunks. -struct memarena; - -typedef struct memarena *MEMARENA; - -MEMARENA toku_memarena_create_presized (size_t initial_size); -// Effect: Create a memarena with initial size. In case of ENOMEM, aborts. - -MEMARENA toku_memarena_create (void); -// Effect: Create a memarena with default initial size. In case of ENOMEM, aborts. - -void toku_memarena_clear (MEMARENA ma); -// Effect: Reset the internal state so that the allocated memory can be used again. - -void* toku_memarena_malloc (MEMARENA ma, size_t size); -// Effect: Allocate some memory. The returned value remains valid until the memarena is cleared or closed. -// In case of ENOMEM, aborts. - -void *toku_memarena_memdup (MEMARENA ma, const void *v, size_t len); - -void toku_memarena_destroy(MEMARENA *ma); - -void toku_memarena_move_buffers(MEMARENA dest, MEMARENA source); -// Effect: Move all the memory from SOURCE into DEST. When SOURCE is closed the memory won't be freed. When DEST is closed, the memory will be freed. (Unless DEST moves its memory to another memarena...) - -size_t toku_memarena_total_memory_size (MEMARENA); -// Effect: Calculate the amount of memory used by a memory arena. - -size_t toku_memarena_total_size_in_use (MEMARENA); - -size_t toku_memarena_total_footprint (MEMARENA); - -#endif + friend class memarena_unit_test; +}; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/mempool.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/mempool.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/mempool.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/mempool.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -207,24 +207,20 @@ return mp->free_offset; } -void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment) { +void *toku_mempool_malloc(struct mempool *mp, size_t size) { paranoid_invariant(size < (1U<<31)); paranoid_invariant(mp->size < (1U<<31)); paranoid_invariant(mp->free_offset < (1U<<31)); paranoid_invariant(mp->free_offset <= mp->size); void *vp; - size_t offset = (mp->free_offset + (alignment-1)) & ~(alignment-1); - //printf("mempool_malloc size=%ld base=%p free_offset=%ld mp->size=%ld offset=%ld\n", size, mp->base, mp->free_offset, mp->size, offset); - if (offset + size > mp->size) { - vp = 0; + if (mp->free_offset + size > mp->size) { + vp = nullptr; } else { - vp = (char *)mp->base + offset; - mp->free_offset = offset + size; + vp = reinterpret_cast(mp->base) + mp->free_offset; + mp->free_offset += size; } paranoid_invariant(mp->free_offset <= mp->size); - paranoid_invariant(((long)vp & (alignment-1)) == 0); paranoid_invariant(vp == 0 || toku_mempool_inrange(mp, vp, size)); - //printf("mempool returning %p\n", vp); return vp; } @@ -232,7 +228,8 @@ void toku_mempool_mfree(struct mempool *mp, void *vp, size_t size) { if (vp) { paranoid_invariant(toku_mempool_inrange(mp, vp, size)); } mp->frag_size += size; - paranoid_invariant(mp->frag_size <= mp->size); + invariant(mp->frag_size <= mp->free_offset); + invariant(mp->frag_size <= mp->size); } diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/mempool.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/mempool.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/mempool.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/mempool.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_MEMPOOL_H -#define UTIL_MEMPOOL_H - /* a memory pool is a contiguous region of memory that supports single allocations from the pool. these allocated regions are never recycled. when the memory pool no longer has free space, the allocated chunks @@ -164,8 +163,8 @@ /* get the amount of space that has been allocated for use (wasted or not) */ size_t toku_mempool_get_allocated_size(const struct mempool *mp); -/* allocate a chunk of memory from the memory pool suitably aligned */ -void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment); +/* allocate a chunk of memory from the memory pool */ +void *toku_mempool_malloc(struct mempool *mp, size_t size); /* free a previously allocated chunk of memory. the free only updates a count of the amount of free space in the memory pool. the memory @@ -181,7 +180,3 @@ size_t toku_mempool_footprint(struct mempool *mp); void toku_mempool_clone(const struct mempool* orig_mp, struct mempool* new_mp); - - - -#endif // UTIL_MEMPOOL_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/minicron.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/minicron.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/minicron.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/minicron.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,247 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "$Id$" + +#include +#include +#include + +#include "portability/toku_assert.h" +#include "util/minicron.h" + +static void +toku_gettime (toku_timespec_t *a) { + struct timeval tv; + gettimeofday(&tv, 0); + a->tv_sec = tv.tv_sec; + a->tv_nsec = tv.tv_usec * 1000LL; +} + + +static int +timespec_compare (toku_timespec_t *a, toku_timespec_t *b) { + if (a->tv_sec > b->tv_sec) return 1; + if (a->tv_sec < b->tv_sec) return -1; + if (a->tv_nsec > b->tv_nsec) return 1; + if (a->tv_nsec < b->tv_nsec) return -1; + return 0; +} + +// Implementation notes: +// When calling do_shutdown or change_period, the mutex is obtained, the variables in the minicron struct are modified, and +// the condition variable is signalled. Possibly the minicron thread will miss the signal. To avoid this problem, whenever +// the minicron thread acquires the mutex, it must check to see what the variables say to do (e.g., should it shut down?). + +static void* +minicron_do (void *pv) +{ + struct minicron *CAST_FROM_VOIDP(p, pv); + toku_mutex_lock(&p->mutex); + while (1) { + if (p->do_shutdown) { + toku_mutex_unlock(&p->mutex); + return 0; + } + if (p->period_in_ms == 0) { + // if we aren't supposed to do it then just do an untimed wait. + toku_cond_wait(&p->condvar, &p->mutex); + } + else if (p->period_in_ms <= 1000) { + toku_mutex_unlock(&p->mutex); + usleep(p->period_in_ms * 1000); + toku_mutex_lock(&p->mutex); + } + else { + // Recompute the wakeup time every time (instead of once per call to f) in case the period changges. + toku_timespec_t wakeup_at = p->time_of_last_call_to_f; + wakeup_at.tv_sec += (p->period_in_ms/1000); + wakeup_at.tv_nsec += (p->period_in_ms % 1000) * 1000000; + toku_timespec_t now; + toku_gettime(&now); + int compare = timespec_compare(&wakeup_at, &now); + // if the time to wakeup has yet to come, then we sleep + // otherwise, we continue + if (compare > 0) { + int r = toku_cond_timedwait(&p->condvar, &p->mutex, &wakeup_at); + if (r!=0 && r!=ETIMEDOUT) fprintf(stderr, "%s:%d r=%d (%s)", __FILE__, __LINE__, r, strerror(r)); + assert(r==0 || r==ETIMEDOUT); + } + } + // Now we woke up, and we should figure out what to do + if (p->do_shutdown) { + toku_mutex_unlock(&p->mutex); + return 0; + } + if (p->period_in_ms > 1000) { + toku_timespec_t now; + toku_gettime(&now); + toku_timespec_t time_to_call = p->time_of_last_call_to_f; + time_to_call.tv_sec += p->period_in_ms/1000; + time_to_call.tv_nsec += (p->period_in_ms % 1000) * 1000000; + int compare = timespec_compare(&time_to_call, &now); + if (compare <= 0) { + toku_gettime(&p->time_of_last_call_to_f); // the measured period includes the time to make the call. + toku_mutex_unlock(&p->mutex); + int r = p->f(p->arg); + assert(r==0); + toku_mutex_lock(&p->mutex); + + } + } + else if (p->period_in_ms != 0) { + toku_mutex_unlock(&p->mutex); + int r = p->f(p->arg); + assert(r==0); + toku_mutex_lock(&p->mutex); + } + } +} + +int +toku_minicron_setup(struct minicron *p, uint32_t period_in_ms, int(*f)(void *), void *arg) +{ + p->f = f; + p->arg = arg; + toku_gettime(&p->time_of_last_call_to_f); + //printf("now=%.6f", p->time_of_last_call_to_f.tv_sec + p->time_of_last_call_to_f.tv_nsec*1e-9); + p->period_in_ms = period_in_ms; + p->do_shutdown = false; + toku_mutex_init(&p->mutex, 0); + toku_cond_init (&p->condvar, 0); + return toku_pthread_create(&p->thread, 0, minicron_do, p); +} + +void +toku_minicron_change_period(struct minicron *p, uint32_t new_period) +{ + toku_mutex_lock(&p->mutex); + p->period_in_ms = new_period; + toku_cond_signal(&p->condvar); + toku_mutex_unlock(&p->mutex); +} + +/* unlocked function for use by engine status which takes no locks */ +uint32_t +toku_minicron_get_period_in_seconds_unlocked(struct minicron *p) +{ + uint32_t retval = p->period_in_ms/1000; + return retval; +} + +/* unlocked function for use by engine status which takes no locks */ +uint32_t +toku_minicron_get_period_in_ms_unlocked(struct minicron *p) +{ + uint32_t retval = p->period_in_ms; + return retval; +} + +int +toku_minicron_shutdown(struct minicron *p) { + toku_mutex_lock(&p->mutex); + assert(!p->do_shutdown); + p->do_shutdown = true; + //printf("%s:%d signalling\n", __FILE__, __LINE__); + toku_cond_signal(&p->condvar); + toku_mutex_unlock(&p->mutex); + void *returned_value; + //printf("%s:%d joining\n", __FILE__, __LINE__); + int r = toku_pthread_join(p->thread, &returned_value); + if (r!=0) fprintf(stderr, "%s:%d r=%d (%s)\n", __FILE__, __LINE__, r, strerror(r)); + assert(r==0); assert(returned_value==0); + toku_cond_destroy(&p->condvar); + toku_mutex_destroy(&p->mutex); + //printf("%s:%d shutdowned\n", __FILE__, __LINE__); + return 0; +} + +bool +toku_minicron_has_been_shutdown(struct minicron *p) { + return p->do_shutdown; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/minicron.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/minicron.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/minicron.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/minicron.h 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,126 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "$Id$" + +#pragma once + +#include +#include + +// Specification: +// A minicron is a miniature cron job for executing a job periodically inside a pthread. +// To create a minicron, +// 1) allocate a "struct minicron" somewhere. +// Rationale: This struct can be stored inside another struct (such as the cachetable), avoiding a malloc/free pair. +// 2) call toku_minicron_setup, specifying a period (in milliseconds), a function, and some arguments. +// If the period is positive then the function is called periodically (with the period specified) +// Note: The period is measured from when the previous call to f finishes to when the new call starts. +// Thus, if the period is 5 minutes, and it takes 8 minutes to run f, then the actual periodicity is 13 minutes. +// Rationale: If f always takes longer than f to run, then it will get "behind". This module makes getting behind explicit. +// 3) When finished, call toku_minicron_shutdown. +// 4) If you want to change the period, then call toku_minicron_change_period. The time since f finished is applied to the new period +// and the call is rescheduled. (If the time since f finished is more than the new period, then f is called immediately). + +struct minicron { + toku_pthread_t thread; + toku_timespec_t time_of_last_call_to_f; + toku_mutex_t mutex; + toku_cond_t condvar; + int (*f)(void*); + void *arg; + uint32_t period_in_ms; + bool do_shutdown; +}; + +int toku_minicron_setup (struct minicron *s, uint32_t period_in_ms, int(*f)(void *), void *arg); +void toku_minicron_change_period(struct minicron *p, uint32_t new_period); +uint32_t toku_minicron_get_period_in_seconds_unlocked(struct minicron *p); +uint32_t toku_minicron_get_period_in_ms_unlocked(struct minicron *p); +int toku_minicron_shutdown(struct minicron *p); +bool toku_minicron_has_been_shutdown(struct minicron *p); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/nb_mutex.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/nb_mutex.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/nb_mutex.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/nb_mutex.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_NB_MUTEX_H -#define UTIL_NB_MUTEX_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -155,5 +155,3 @@ static inline int nb_mutex_users(NB_MUTEX nb_mutex) { return rwlock_users(&nb_mutex->lock); } - -#endif // UTIL_NB_MUTEX_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/omt.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/omt.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/omt.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/omt.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/omt.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/omt.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/omt.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/omt.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_OMT_H -#define UTIL_OMT_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,6 +87,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -813,5 +813,3 @@ // include the implementation here #include "omt.cc" - -#endif // UTIL_OMT_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/partitioned_counter.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/partitioned_counter.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/partitioned_counter.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/partitioned_counter.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/partitioned_counter.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/partitioned_counter.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/partitioned_counter.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/partitioned_counter.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_PARTITIONED_COUNTER_H -#define UTIL_PARTITIONED_COUNTER_H - // Overview: A partitioned_counter provides a counter that can be incremented and the running sum can be read at any time. // We assume that increments are frequent, whereas reading is infrequent. // Implementation hint: Use thread-local storage so each thread increments its own data. The increment does not require a lock or atomic operation. @@ -187,5 +186,3 @@ friend void destroy_thread_local_part_of_partitioned_counters (void *); }; #endif - -#endif // UTIL_PARTITIONED_COUNTER_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/queue.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/queue.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/queue.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/queue.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,232 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include "toku_os.h" +#include +#include +#include "queue.h" +#include "memory.h" +#include + +struct qitem; + +struct qitem { + void *item; + struct qitem *next; + uint64_t weight; +}; + +struct queue { + uint64_t contents_weight; // how much stuff is in there? + uint64_t weight_limit; // Block enqueueing when the contents gets to be bigger than the weight. + struct qitem *head, *tail; + + bool eof; + + toku_mutex_t mutex; + toku_cond_t cond; +}; + +// Representation invariant: +// q->contents_weight is the sum of the weights of everything in the queue. +// q->weight_limit is the limit on the weight before we block. +// q->head is the oldest thing in the queue. q->tail is the newest. (If nothing is in the queue then both are NULL) +// If q->head is not null: +// q->head->item is the oldest item. +// q->head->weight is the weight of that item. +// q->head->next is the next youngest thing. +// q->eof indicates that the producer has said "that's all". +// q->mutex and q->cond are used as condition variables. + + +int toku_queue_create (QUEUE *q, uint64_t weight_limit) +{ + QUEUE CALLOC(result); + if (result==NULL) return get_error_errno(); + result->contents_weight = 0; + result->weight_limit = weight_limit; + result->head = NULL; + result->tail = NULL; + result->eof = false; + toku_mutex_init(&result->mutex, NULL); + toku_cond_init(&result->cond, NULL); + *q = result; + return 0; +} + +int toku_queue_destroy (QUEUE q) +{ + if (q->head) return EINVAL; + assert(q->contents_weight==0); + toku_mutex_destroy(&q->mutex); + toku_cond_destroy(&q->cond); + toku_free(q); + return 0; +} + +int toku_queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq) +{ + toku_mutex_lock(&q->mutex); + assert(!q->eof); + // Go ahead and put it in, even if it's too much. + struct qitem *MALLOC(qi); + if (qi==NULL) { + int r = get_error_errno(); + toku_mutex_unlock(&q->mutex); + return r; + } + q->contents_weight += weight; + qi->item = item; + qi->weight = weight; + qi->next = NULL; + if (q->tail) { + q->tail->next = qi; + } else { + assert(q->head==NULL); + q->head = qi; + } + q->tail = qi; + // Wake up the consumer. + toku_cond_signal(&q->cond); + // Now block if there's too much stuff in there. + while (q->weight_limit < q->contents_weight) { + toku_cond_wait(&q->cond, &q->mutex); + } + // we are allowed to return. + if (total_weight_after_enq) { + *total_weight_after_enq = q->contents_weight; + } + toku_mutex_unlock(&q->mutex); + return 0; +} + +int toku_queue_eof (QUEUE q) +{ + toku_mutex_lock(&q->mutex); + assert(!q->eof); + q->eof = true; + toku_cond_signal(&q->cond); + toku_mutex_unlock(&q->mutex); + return 0; +} + +int toku_queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq) +{ + toku_mutex_lock(&q->mutex); + int result; + while (q->head==NULL && !q->eof) { + toku_cond_wait(&q->cond, &q->mutex); + } + if (q->head==NULL) { + assert(q->eof); + result = EOF; + } else { + struct qitem *head = q->head; + q->contents_weight -= head->weight; + *item = head->item; + if (weight) + *weight = head->weight; + if (total_weight_after_deq) + *total_weight_after_deq = q->contents_weight; + q->head = head->next; + toku_free(head); + if (q->head==NULL) { + q->tail = NULL; + } + // wake up the producer, since we decreased the contents_weight. + toku_cond_signal(&q->cond); + // Successful result. + result = 0; + } + toku_mutex_unlock(&q->mutex); + return result; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/queue.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/queue.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/queue.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/queue.h 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,137 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// The abstraction: +// +// queue.h implements a queue suitable for a producer-consumer relationship between two pthreads. +// The enqueue/dequeue operation is fairly heavyweight (involving pthread condition variables) so it may be useful +// to enqueue large chunks rather than small chunks. +// It probably won't work right to have two consumer threads. +// +// Every item inserted into the queue has a weight. If the weight +// gets too big, then the queue blocks on trying to insert more items. +// The weight can be used to limit the total number of items in the +// queue (weight of each item=1) or the total memory consumed by queue +// items (weight of each item is its size). Or the weight's could all be +// zero for an unlimited queue. + +typedef struct queue *QUEUE; + +int toku_queue_create (QUEUE *q, uint64_t weight_limit); +// Effect: Create a queue with a given weight limit. The queue is initially empty. + +int toku_queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq); +// Effect: Insert ITEM of weight WEIGHT into queue. If the resulting contents weight too much then block (don't return) until the total weight is low enough. +// If total_weight_after_enq!=NULL then return the current weight of the items in the queue (after finishing blocking on overweight, and after enqueueing the item). +// If successful return 0. +// If an error occurs, return the error number, and the state of the queue is undefined. The item may have been enqueued or not, and in fact the queue may be badly corrupted if the condition variables go awry. If it's just a matter of out-of-memory, then the queue is probably OK. +// Requires: There is only a single consumer. (We wake up the consumer using a pthread_cond_signal (which is suitable only for single consumers.) + +int toku_queue_eof (QUEUE q); +// Effect: Inform the queue that no more values will be inserted. After all the values that have been inserted are dequeued, further dequeue operations will return EOF. +// Returns 0 on success. On failure, things are pretty bad (likely to be some sort of mutex failure). + +int toku_queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq); +// Effect: Wait until the queue becomes nonempty. Then dequeue and return the oldest item. The item and its weight are returned in *ITEM. +// If weight!=NULL then return the item's weight in *weight. +// If total_weight_after_deq!=NULL then return the current weight of the items in the queue (after dequeuing the item). +// Return 0 if an item is returned. +// Return EOF is we no more items will be returned. +// Usage note: The queue should be destroyed only after any consumers will no longer look at it (for example, they saw EOF). + +int toku_queue_destroy (QUEUE q); +// Effect: Destroy the queue. +// Requires: The queue must be empty and no consumer should try to dequeue after this (one way to do this is to make sure the consumer saw EOF). +// Returns 0 on success. If the queue is not empty, returns EINVAL. Other errors are likely to be bad (some sort of mutex or condvar failure). + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/rwlock.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/rwlock.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/rwlock.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/rwlock.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_RWLOCK_H -#define UTIL_RWLOCK_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -99,7 +99,7 @@ * Overview ***************************************** * - * TokuDB employs readers/writers locks for the ephemeral locks (e.g., + * TokuFT employs readers/writers locks for the ephemeral locks (e.g., * on FT nodes) Why not just use the toku_pthread_rwlock API? * * 1) we need multiprocess rwlocks (not just multithreaded) @@ -353,4 +353,3 @@ toku_cond_destroy(&cond); } -#endif // UTIL_RWLOCK_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/scoped_malloc.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/scoped_malloc.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/scoped_malloc.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/scoped_malloc.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/scoped_malloc.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/scoped_malloc.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/scoped_malloc.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/scoped_malloc.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/sort.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/sort.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/sort.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/sort.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,22 +89,11 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_SORT_H -#define UTIL_SORT_H +#pragma once #include #include -#if defined(HAVE_CILK) -#include -#define cilk_worker_count (__cilkrts_get_nworkers()) -#else -#define cilk_spawn -#define cilk_sync -#define cilk_for for -#define cilk_worker_count 1 -#endif - namespace toku { template @@ -148,9 +137,8 @@ } const int mid = n / 2; sortdata_t *right_as[2] = { &(as[0])[mid], &(as[1])[mid] }; - const int r1 = cilk_spawn mergesort_internal(as, which, mid, extra); + const int r1 = mergesort_internal(as, which, mid, extra); const int r2 = mergesort_internal(right_as, which, n - mid, extra); - cilk_sync; if (r1 != r2) { // move everything to the same place (r2) memcpy(as[r2], as[r1], mid * (sizeof as[r2][0])); @@ -222,9 +210,8 @@ const int a2 = an / 2; const sortdata_t *akey = &a[a2]; const int b2 = binsearch(*akey, b, bn, 0, extra); - cilk_spawn merge(dest, a, a2, b, b2, extra); + merge(dest, a, a2, b, b2, extra); merge(&dest[a2 + b2], akey, an - a2, &b[b2], bn - b2, extra); - cilk_sync; } } @@ -272,5 +259,3 @@ }; }; - -#endif // UTIL_SORT_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/status.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/status.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/status.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/status.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,10 +90,11 @@ #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #pragma once + #include #include -#define TOKUDB_STATUS_INIT(array,k,c,t,l,inc) do { \ +#define TOKUFT_STATUS_INIT(array,k,c,t,l,inc) do { \ array.status[k].keyname = #k; \ array.status[k].columnname = #c; \ array.status[k].type = t; \ @@ -104,7 +105,7 @@ constexpr_static_assert((inc) == TOKU_ENGINE_STATUS \ || strcmp(#c, "nullptr"), "Missing column name."); \ constexpr_static_assert(static_strncasecmp(#c, "TOKU", strlen("TOKU")), \ - "Do not start column names with toku/tokudb. Names get TOKUDB_ prefix automatically."); \ + "Do not start column names with toku."); \ array.status[k].include = static_cast(inc); \ if (t == PARCOUNT) { \ array.status[k].value.parcount = create_partitioned_counter(); \ diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/marked-omt-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/marked-omt-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/marked-omt-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/marked-omt-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/memarena-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/memarena-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/memarena-test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/memarena-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,234 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include + +#include "portability/toku_assert.h" + +#include "util/memarena.h" + +class memarena_unit_test { +private: + static const int magic = 37; + + template + void iterate_chunks(memarena *ma, F &fn) { + for (memarena::chunk_iterator it(ma); it.more(); it.next()) { + size_t used = 0; + const void *buf = it.current(&used); + fn(buf, used); + } + } + + void test_create(size_t size) { + memarena ma; + ma.create(size); + invariant(ma._current_chunk.size == size); + invariant(ma._current_chunk.used == 0); + if (size == 0) { + invariant_null(ma._current_chunk.buf); + } else { + invariant_notnull(ma._current_chunk.buf); + } + + // make sure memory was allocated ok by + // writing to buf and reading it back + if (size > 0) { + memset(ma._current_chunk.buf, magic, size); + } + for (size_t i = 0; i < size; i++) { + const char *buf = reinterpret_cast(ma._current_chunk.buf); + invariant(buf[i] == magic); + } + ma.destroy(); + } + + void test_malloc(size_t size) { + memarena ma; + ma.create(14); + void *v = ma.malloc_from_arena(size); + invariant_notnull(v); + + // make sure memory was allocated ok by + // writing to buf and reading it back + if (size > 0) { + memset(ma._current_chunk.buf, magic, size); + } + for (size_t i = 0; i < size; i++) { + const char *c = reinterpret_cast(ma._current_chunk.buf); + invariant(c[i] == magic); + } + ma.destroy(); + } + + static void test_iterate_fn(const void *buf, size_t used) { + for (size_t i = 0; i < used; i++) { + const char *c = reinterpret_cast(buf); + invariant(c[i] == (char) ((intptr_t) &c[i])); + } + } + + void test_iterate(size_t size) { + memarena ma; + ma.create(14); + for (size_t k = 0; k < size / 64; k += 64) { + void *v = ma.malloc_from_arena(64); + for (size_t i = 0; i < 64; i++) { + char *c = reinterpret_cast(v); + c[i] = (char) ((intptr_t) &c[i]); + } + } + size_t rest = size % 64; + if (rest != 0) { + void *v = ma.malloc_from_arena(64); + for (size_t i = 0; i < 64; i++) { + char *c = reinterpret_cast(v); + c[i] = (char) ((intptr_t) &c[i]); + } + } + + iterate_chunks(&ma, test_iterate_fn); + ma.destroy(); + } + + void test_move_memory(size_t size) { + memarena ma; + ma.create(14); + for (size_t k = 0; k < size / 64; k += 64) { + void *v = ma.malloc_from_arena(64); + for (size_t i = 0; i < 64; i++) { + char *c = reinterpret_cast(v); + c[i] = (char) ((intptr_t) &c[i]); + } + } + size_t rest = size % 64; + if (rest != 0) { + void *v = ma.malloc_from_arena(64); + for (size_t i = 0; i < 64; i++) { + char *c = reinterpret_cast(v); + c[i] = (char) ((intptr_t) &c[i]); + } + } + + memarena ma2; + ma.move_memory(&ma2); + iterate_chunks(&ma2, test_iterate_fn); + + ma.destroy(); + ma2.destroy(); + } + +public: + void test() { + test_create(0); + test_create(64); + test_create(128 * 1024 * 1024); + test_malloc(0); + test_malloc(63); + test_malloc(64); + test_malloc(64 * 1024 * 1024); + test_malloc((64 * 1024 * 1024) + 1); + test_iterate(0); + test_iterate(63); + test_iterate(128 * 1024); + test_iterate(64 * 1024 * 1024); + test_iterate((64 * 1024 * 1024) + 1); + test_move_memory(0); + test_move_memory(1); + test_move_memory(63); + test_move_memory(65); + test_move_memory(65 * 1024 * 1024); + test_move_memory(101 * 1024 * 1024); + } +}; + +int main(void) { + memarena_unit_test test; + test.test(); + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/minicron-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/minicron-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/minicron-test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/minicron-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,272 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#include +#include "test.h" +#include "util/minicron.h" +#include + +#include +#include + +static double +tdiff (struct timeval *a, struct timeval *b) { + return (a->tv_sec-b->tv_sec) + (a->tv_usec-b->tv_usec)*1e-6; +} + +struct timeval starttime; +static double elapsed (void) { + struct timeval now; + gettimeofday(&now, 0); + return tdiff(&now, &starttime); +} + +static int +#ifndef GCOV +__attribute__((__noreturn__)) +#endif +never_run (void *a) { + assert(a==0); + assert(0); +#if defined(GCOV) + return 0; +#endif +} + +// Can we start something with period=0 (the function should never run) and shut it down. +static void* +test1 (void* v) +{ + struct minicron m; + memset(&m, 0, sizeof(struct minicron)); + int r = toku_minicron_setup(&m, 0, never_run, 0); assert(r==0); + sleep(1); + r = toku_minicron_shutdown(&m); assert(r==0); + return v; +} + +// Can we start something with period=10 and shut it down after 2 seconds (the function should never run) . +static void* +test2 (void* v) +{ + struct minicron m; + memset(&m, 0, sizeof(struct minicron)); + int r = toku_minicron_setup(&m, 10000, never_run, 0); assert(r==0); + sleep(2); + r = toku_minicron_shutdown(&m); assert(r==0); + return v; +} + +struct tenx { + struct timeval tv; + int counter; +}; + +static int +run_5x (void *v) { + struct tenx *CAST_FROM_VOIDP(tx, v); + struct timeval now; + gettimeofday(&now, 0); + double diff = tdiff(&now, &tx->tv); + if (verbose) printf("T=%f tx->counter=%d\n", diff, tx->counter); + // We only verify that the timer was not premature. + // Sometimes it will be delayed, but there's no good way to test it and nothing we can do about it. + if (!(diff>0.5 + tx->counter)) { + printf("T=%f tx->counter=%d\n", diff, tx->counter); + assert(0); + } + tx->counter++; + return 0; +} + +// Start something with period=1 and run it a few times +static void* +test3 (void* v) +{ + struct minicron m; + struct tenx tx; + gettimeofday(&tx.tv, 0); + tx.counter=0; + memset(&m, 0, sizeof(struct minicron)); + int r = toku_minicron_setup(&m, 1000, run_5x, &tx); assert(r==0); + sleep(5); + r = toku_minicron_shutdown(&m); assert(r==0); + assert(tx.counter>=4 && tx.counter<=5); // after 5 seconds it could have run 4 or 5 times. + return v; +} + +static int +run_3sec (void *v) { + if (verbose) printf("start3sec at %.6f\n", elapsed()); + int *CAST_FROM_VOIDP(counter, v); + (*counter)++; + sleep(3); + if (verbose) printf("end3sec at %.6f\n", elapsed()); + return 0; +} + +// make sure that if f is really slow that it doesn't run too many times +static void* +test4 (void *v) { + struct minicron m; + int counter = 0; + memset(&m, 0, sizeof(struct minicron)); + int r = toku_minicron_setup(&m, 2000, run_3sec, &counter); assert(r==0); + sleep(10); + r = toku_minicron_shutdown(&m); assert(r==0); + assert(counter==3); + return v; +} + +static void* +test5 (void *v) { + struct minicron m; + int counter = 0; + memset(&m, 0, sizeof(struct minicron)); + int r = toku_minicron_setup(&m, 10000, run_3sec, &counter); assert(r==0); + toku_minicron_change_period(&m, 2000); + sleep(10); + r = toku_minicron_shutdown(&m); assert(r==0); + assert(counter==3); + return v; +} + +static void* +test6 (void *v) { + struct minicron m; + memset(&m, 0, sizeof(struct minicron)); + int r = toku_minicron_setup(&m, 5000, never_run, 0); assert(r==0); + toku_minicron_change_period(&m, 0); + sleep(7); + r = toku_minicron_shutdown(&m); assert(r==0); + return v; +} + +// test that we actually run once per period, even if the execution is long +static void* +test7 (void *v) { + struct minicron m; + memset(&m, 0, sizeof(struct minicron)); + int counter = 0; + int r = toku_minicron_setup(&m, 5000, run_3sec, &counter); assert(r==0); + sleep(17); + r = toku_minicron_shutdown(&m); assert(r==0); + assert(counter==3); + return v; +} + +typedef void*(*ptf)(void*); +int +test_main (int argc, const char *argv[]) { + default_parse_args(argc,argv); + gettimeofday(&starttime, 0); + + ptf testfuns[] = {test1, test2, test3, + test4, + test5, + test6, + test7 + }; +#define N (sizeof(testfuns)/sizeof(testfuns[0])) + toku_pthread_t tests[N]; + + unsigned int i; + for (i=0; i +#include "toku_os.h" +#include +#include +#include +#include +#include +#include "util/queue.h" + +static int verbose=1; + +static int count_0 = 0; +static uint64_t e_max_weight=0, d_max_weight = 0; // max weight seen by enqueue thread and dequeue thread respectively. + +static void *start_0 (void *arg) { + QUEUE q = (QUEUE)arg; + void *item; + uint64_t weight; + long count = 0; + while (1) { + uint64_t this_max_weight; + int r=toku_queue_deq(q, &item, &weight, &this_max_weight); + if (r==EOF) break; + assert(r==0); + if (this_max_weight>d_max_weight) d_max_weight=this_max_weight; + long v = (long)item; + //printf("D(%ld)=%ld %ld\n", v, this_max_weight, d_max_weight); + assert(v==count); + count_0++; + count++; + } + return NULL; +} + +static void enq (QUEUE q, long v, uint64_t weight) { + uint64_t this_max_weight; + int r = toku_queue_enq(q, (void*)v, (weight==0)?0:1, &this_max_weight); + assert(r==0); + if (this_max_weight>e_max_weight) e_max_weight=this_max_weight; + //printf("E(%ld)=%ld %ld\n", v, this_max_weight, e_max_weight); +} + +static void queue_test_0 (uint64_t weight) +// Test a queue that can hold WEIGHT items. +{ + //printf("\n"); + count_0 = 0; + e_max_weight = 0; + d_max_weight = 0; + QUEUE q; + int r; + r = toku_queue_create(&q, weight); assert(r==0); + toku_pthread_t thread; + r = toku_pthread_create(&thread, NULL, start_0, q); assert(r==0); + enq(q, 0L, weight); + enq(q, 1L, weight); + enq(q, 2L, weight); + enq(q, 3L, weight); + sleep(1); + enq(q, 4L, weight); + enq(q, 5L, weight); + r = toku_queue_eof(q); assert(r==0); + void *result; + r = toku_pthread_join(thread, &result); assert(r==0); + assert(result==NULL); + assert(count_0==6); + r = toku_queue_destroy(q); + assert(d_max_weight <= weight); + assert(e_max_weight <= weight); +} + + +static void parse_args (int argc, const char *argv[]) { + const char *progname=argv[0]; + argc--; argv++; + while (argc>0) { + if (strcmp(argv[0],"-v")==0) { + verbose++; + } else if (strcmp(argv[0],"-q")==0) { + verbose--; + } else { + fprintf(stderr, "Usage:\n %s [-v] [-q]\n", progname); + exit(1); + } + argc--; argv++; + } + if (verbose<0) verbose=0; +} + +int main (int argc, const char *argv[]) { + parse_args(argc, argv); + queue_test_0(0LL); + queue_test_0(1LL); + queue_test_0(2LL); + return 0; +} diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/rwlock_condvar.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/rwlock_condvar.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/rwlock_condvar.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/rwlock_condvar.h 2014-10-08 13:19:52.000000000 +0000 @@ -33,7 +33,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/sort-tmpl-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/sort-tmpl-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/sort-tmpl-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/sort-tmpl-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test_circular_buffer.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test_circular_buffer.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test_circular_buffer.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test_circular_buffer.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test_doubly_linked_list.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test_doubly_linked_list.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test_doubly_linked_list.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test_doubly_linked_list.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test-kibbutz2.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test-kibbutz2.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test-kibbutz2.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test-kibbutz2.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test-kibbutz.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test-kibbutz.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test-kibbutz.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test-kibbutz.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test_partitioned_counter_5833.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test_partitioned_counter_5833.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test_partitioned_counter_5833.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test_partitioned_counter_5833.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test_partitioned_counter.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test_partitioned_counter.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test_partitioned_counter.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test_partitioned_counter.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -201,9 +201,6 @@ head->prev = cp; } head = cp; -#ifdef __INTEL_COMPILER - __memory_barrier(); // for some reason I don't understand, ICC needs a memory barrier here. -Bradley -#endif cp->counter = 0; cp->inited = true; cp->myid = idcounter++; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test-rwlock.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test-rwlock.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test-rwlock.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test-rwlock.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test-rwlock-cheapness.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test-rwlock-cheapness.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/test-rwlock-cheapness.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/test-rwlock-cheapness.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/threadpool-nproc-limit.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/threadpool-nproc-limit.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/threadpool-nproc-limit.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/threadpool-nproc-limit.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/threadpool-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/threadpool-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/threadpool-test.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/threadpool-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/threadpool-testrunf.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/threadpool-testrunf.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/threadpool-testrunf.cc 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/threadpool-testrunf.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/x1764-test.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/x1764-test.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/tests/x1764-test.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/tests/x1764-test.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/threadpool.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/threadpool.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/threadpool.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/threadpool.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/threadpool.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/threadpool.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/threadpool.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/threadpool.h 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,7 @@ #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_THREADPOOL_H -#define UTIL_THREADPOOL_H +#pragma once #include @@ -137,5 +136,3 @@ // Print the state of the thread pool void toku_thread_pool_print(struct toku_thread_pool *pool, FILE *out); - -#endif // UTIL_THREADPOOL_H diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/x1764.cc mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/x1764.cc --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/x1764.cc 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/x1764.cc 2014-10-08 13:19:52.000000000 +0000 @@ -29,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/x1764.h mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/x1764.h --- mariadb-5.5-5.5.39/storage/tokudb/ft-index/util/x1764.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ft-index/util/x1764.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef X1764_H -#define X1764_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -121,6 +121,3 @@ uint32_t toku_x1764_finish (struct x1764 *l); // Effect: Return the final 32-bit result. - - -#endif diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ha_tokudb_alter_56.cc mariadb-5.5-5.5.40/storage/tokudb/ha_tokudb_alter_56.cc --- mariadb-5.5-5.5.39/storage/tokudb/ha_tokudb_alter_56.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ha_tokudb_alter_56.cc 2014-10-08 13:19:52.000000000 +0000 @@ -720,27 +720,6 @@ tokudb_alter_ctx *ctx = static_cast(ha_alter_info->handler_ctx); bool result = false; // success THD *thd = ha_thd(); - MDL_ticket *ticket = table->mdl_ticket; - if (ticket->get_type() != MDL_EXCLUSIVE) { - // get exclusive lock no matter what -#if defined(MARIADB_BASE_VERSION) - killed_state saved_killed_state = thd->killed; - thd->killed = NOT_KILLED; - while (wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) && thd->killed) - thd->killed = NOT_KILLED; - assert(ticket->get_type() == MDL_EXCLUSIVE); - if (thd->killed == NOT_KILLED) - thd->killed = saved_killed_state; -#else - THD::killed_state saved_killed_state = thd->killed; - thd->killed = THD::NOT_KILLED; - while (wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) && thd->killed) - thd->killed = THD::NOT_KILLED; - assert(ticket->get_type() == MDL_EXCLUSIVE); - if (thd->killed == THD::NOT_KILLED) - thd->killed = saved_killed_state; -#endif - } if (commit) { #if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \ @@ -768,6 +747,35 @@ } if (!commit) { + if (table->mdl_ticket->get_type() != MDL_EXCLUSIVE && + (ctx->add_index_changed || ctx->drop_index_changed || ctx->compression_changed)) { + + // get exclusive lock no matter what +#if defined(MARIADB_BASE_VERSION) + killed_state saved_killed_state = thd->killed; + thd->killed = NOT_KILLED; + for (volatile uint i = 0; wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED); i++) { + if (thd->killed != NOT_KILLED) + thd->killed = NOT_KILLED; + sleep(1); + } + assert(table->mdl_ticket->get_type() == MDL_EXCLUSIVE); + if (thd->killed == NOT_KILLED) + thd->killed = saved_killed_state; +#else + THD::killed_state saved_killed_state = thd->killed; + thd->killed = THD::NOT_KILLED; + for (volatile uint i = 0; wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED); i++) { + if (thd->killed != THD::NOT_KILLED) + thd->killed = THD::NOT_KILLED; + sleep(1); + } + assert(table->mdl_ticket->get_type() == MDL_EXCLUSIVE); + if (thd->killed == THD::NOT_KILLED) + thd->killed = saved_killed_state; +#endif + } + // abort the alter transaction NOW so that any alters are rolled back. this allows the following restores to work. tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton); assert(ctx->alter_txn == trx->stmt); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ha_tokudb.cc mariadb-5.5-5.5.40/storage/tokudb/ha_tokudb.cc --- mariadb-5.5-5.5.39/storage/tokudb/ha_tokudb.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ha_tokudb.cc 2014-10-08 13:19:51.000000000 +0000 @@ -92,6 +92,8 @@ #pragma implementation // gcc: Class implementation #endif +#include // must be first! + extern "C" { #include "stdint.h" #define __STDC_FORMAT_MACROS @@ -136,8 +138,8 @@ #include "tokudb_buffer.h" #include "tokudb_status.h" #include "tokudb_card.h" -#include "hatoku_hton.h" #include "ha_tokudb.h" +#include "hatoku_hton.h" #include static const char *ha_tokudb_exts[] = { @@ -471,7 +473,7 @@ static int ai_poll_fun(void *extra, float progress) { LOADER_CONTEXT context = (LOADER_CONTEXT)extra; - if (context->thd->killed) { + if (thd_killed(context->thd)) { sprintf(context->write_status_msg, "The process has been killed, aborting add index."); return ER_ABORTING_CONNECTION; } @@ -486,7 +488,7 @@ static int loader_poll_fun(void *extra, float progress) { LOADER_CONTEXT context = (LOADER_CONTEXT)extra; - if (context->thd->killed) { + if (thd_killed(context->thd)) { sprintf(context->write_status_msg, "The process has been killed, aborting bulk load."); return ER_ABORTING_CONNECTION; } @@ -1249,6 +1251,7 @@ tokudb_active_index = MAX_KEY; invalidate_icp(); trx_handler_list.data = this; + in_rpl_write_rows = in_rpl_delete_rows = in_rpl_update_rows = false; TOKUDB_HANDLER_DBUG_VOID_RETURN; } @@ -1638,8 +1641,7 @@ #if WITH_PARTITION_STORAGE_ENGINE // verify frm data for non-partitioned tables - if (TOKU_PARTITION_WRITE_FRM_DATA || - IF_PARTITIONING(table->part_info, NULL) == NULL) { + if (TOKU_PARTITION_WRITE_FRM_DATA || table->part_info == NULL) { error = verify_frm_data(table->s->path.str, txn); if (error) goto exit; @@ -3331,7 +3333,7 @@ ai_metadata_update_required = false; loader_error = 0; if (loader) { - if (!abort_loader && !thd->killed) { + if (!abort_loader && !thd_killed(thd)) { DBUG_EXECUTE_IF("tokudb_end_bulk_insert_sleep", { const char *orig_proc_info = tokudb_thd_get_proc_info(thd); thd_proc_info(thd, "DBUG sleep"); @@ -3341,7 +3343,7 @@ error = loader->close(loader); loader = NULL; if (error) { - if (thd->killed) { + if (thd_killed(thd)) { my_error(ER_QUERY_INTERRUPTED, MYF(0)); } goto cleanup; @@ -3476,7 +3478,7 @@ share->rows, key_info->name); thd_proc_info(thd, status_msg); - if (thd->killed) { + if (thd_killed(thd)) { my_error(ER_QUERY_INTERRUPTED, MYF(0)); error = ER_QUERY_INTERRUPTED; goto cleanup; @@ -3562,12 +3564,27 @@ return error; } +static void maybe_do_unique_checks_delay(THD *thd) { + if (thd->slave_thread) { + uint64_t delay_ms = THDVAR(thd, rpl_unique_checks_delay); + if (delay_ms) + usleep(delay_ms * 1000); + } +} + +static bool do_unique_checks(THD *thd, bool do_rpl_event) { + if (do_rpl_event && thd->slave_thread && opt_readonly && !THDVAR(thd, rpl_unique_checks)) + return false; + else + return !thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS); +} + int ha_tokudb::do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd) { - int error; + int error = 0; // // first do uniqueness checks // - if (share->has_unique_keys && !thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) { + if (share->has_unique_keys && do_unique_checks(thd, in_rpl_write_rows)) { for (uint keynr = 0; keynr < table_share->keys; keynr++) { bool is_unique_key = (table->key_info[keynr].flags & HA_NOSAME) || (keynr == primary_key); bool is_unique = false; @@ -3580,13 +3597,18 @@ if (!is_unique_key) { continue; } + + maybe_do_unique_checks_delay(thd); + // // if unique key, check uniqueness constraint // but, we do not need to check it if the key has a null // and we do not need to check it if unique_checks is off // error = is_val_unique(&is_unique, record, &table->key_info[keynr], keynr, txn); - if (error) { goto cleanup; } + if (error) { + goto cleanup; + } if (!is_unique) { error = DB_KEYEXIST; last_dup_key = keynr; @@ -3594,7 +3616,6 @@ } } } - error = 0; cleanup: return error; } @@ -3697,15 +3718,8 @@ tokudb_my_free(tmp_pk_val_data); } -// // set the put flags for the main dictionary -// -void ha_tokudb::set_main_dict_put_flags( - THD* thd, - bool opt_eligible, - uint32_t* put_flags - ) -{ +void ha_tokudb::set_main_dict_put_flags(THD* thd, bool opt_eligible, uint32_t* put_flags) { uint32_t old_prelock_flags = 0; uint curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); bool in_hot_index = share->num_DBs > curr_num_DBs; @@ -3725,8 +3739,7 @@ { *put_flags = old_prelock_flags; } - else if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS) - && !is_replace_into(thd) && !is_insert_ignore(thd)) + else if (!do_unique_checks(thd, in_rpl_write_rows | in_rpl_update_rows) && !is_replace_into(thd) && !is_insert_ignore(thd)) { *put_flags = old_prelock_flags; } @@ -3748,22 +3761,18 @@ int ha_tokudb::insert_row_to_main_dictionary(uchar* record, DBT* pk_key, DBT* pk_val, DB_TXN* txn) { int error = 0; - uint32_t put_flags = mult_put_flags[primary_key]; - THD *thd = ha_thd(); uint curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); - assert(curr_num_DBs == 1); - + + uint32_t put_flags = mult_put_flags[primary_key]; + THD *thd = ha_thd(); set_main_dict_put_flags(thd, true, &put_flags); - error = share->file->put( - share->file, - txn, - pk_key, - pk_val, - put_flags - ); + // for test, make unique checks have a very long duration + if ((put_flags & DB_OPFLAGS_MASK) == DB_NOOVERWRITE) + maybe_do_unique_checks_delay(thd); + error = share->file->put(share->file, txn, pk_key, pk_val, put_flags); if (error) { last_dup_key = primary_key; goto cleanup; @@ -3777,14 +3786,18 @@ int error = 0; uint curr_num_DBs = share->num_DBs; set_main_dict_put_flags(thd, true, &mult_put_flags[primary_key]); - uint32_t i, flags = mult_put_flags[primary_key]; + uint32_t flags = mult_put_flags[primary_key]; + + // for test, make unique checks have a very long duration + if ((flags & DB_OPFLAGS_MASK) == DB_NOOVERWRITE) + maybe_do_unique_checks_delay(thd); // the insert ignore optimization uses DB_NOOVERWRITE_NO_ERROR, // which is not allowed with env->put_multiple. // we have to insert the rows one by one in this case. if (flags & DB_NOOVERWRITE_NO_ERROR) { DB * src_db = share->key_file[primary_key]; - for (i = 0; i < curr_num_DBs; i++) { + for (uint32_t i = 0; i < curr_num_DBs; i++) { DB * db = share->key_file[i]; if (i == primary_key) { // if it's the primary key, insert the rows @@ -3845,7 +3858,7 @@ // error otherwise // int ha_tokudb::write_row(uchar * record) { - TOKUDB_HANDLER_DBUG_ENTER(""); + TOKUDB_HANDLER_DBUG_ENTER("%p", record); DBT row, prim_key; int error; @@ -3883,10 +3896,7 @@ if (share->has_auto_inc && record == table->record[0]) { tokudb_pthread_mutex_lock(&share->mutex); ulonglong curr_auto_inc = retrieve_auto_increment( - table->field[share->ai_field_index]->key_type(), - field_offset(table->field[share->ai_field_index], table), - record - ); + table->field[share->ai_field_index]->key_type(), field_offset(table->field[share->ai_field_index], table), record); if (curr_auto_inc > share->last_auto_increment) { share->last_auto_increment = curr_auto_inc; if (delay_updating_ai_metadata) { @@ -4054,7 +4064,6 @@ memset((void *) &prim_row, 0, sizeof(prim_row)); memset((void *) &old_prim_row, 0, sizeof(old_prim_row)); - ha_statistic_increment(&SSV::ha_update_count); #if MYSQL_VERSION_ID < 50600 if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) { @@ -4101,7 +4110,6 @@ } txn = using_ignore ? sub_trans : transaction; - if (hidden_primary_key) { memset((void *) &prim_key, 0, sizeof(prim_key)); prim_key.data = (void *) current_ident; @@ -4113,10 +4121,8 @@ create_dbt_key_from_table(&old_prim_key, primary_key, primary_key_buff, old_row, &has_null); } - // // do uniqueness checks - // - if (share->has_unique_keys && !thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) { + if (share->has_unique_keys && do_unique_checks(thd, in_rpl_update_rows)) { for (uint keynr = 0; keynr < table_share->keys; keynr++) { bool is_unique_key = (table->key_info[keynr].flags & HA_NOSAME) || (keynr == primary_key); if (keynr == primary_key && !share->pk_has_string) { @@ -4157,6 +4163,10 @@ set_main_dict_put_flags(thd, false, &mult_put_flags[primary_key]); + // for test, make unique checks have a very long duration + if ((mult_put_flags[primary_key] & DB_OPFLAGS_MASK) == DB_NOOVERWRITE) + maybe_do_unique_checks_delay(thd); + error = db_env->update_multiple( db_env, share->key_file[primary_key], @@ -4375,6 +4385,20 @@ return key_can_be_null && key_len > 0 && key[0] != 0; } +// Return true if bulk fetch can be used +static bool tokudb_do_bulk_fetch(THD *thd) { + switch (thd_sql_command(thd)) { + case SQLCOM_SELECT: + case SQLCOM_CREATE_TABLE: + case SQLCOM_INSERT_SELECT: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_DELETE: + return THDVAR(thd, bulk_fetch) != 0; + default: + return false; + } +} + // // Notification that a range query getting all elements that equal a key // to take place. Will pre acquire read lock @@ -4383,7 +4407,7 @@ // error otherwise // int ha_tokudb::prepare_index_key_scan(const uchar * key, uint key_len) { - TOKUDB_HANDLER_DBUG_ENTER(""); + TOKUDB_HANDLER_DBUG_ENTER("%p %u", key, key_len); int error = 0; DBT start_key, end_key; THD* thd = ha_thd(); @@ -4407,7 +4431,7 @@ range_lock_grabbed = true; range_lock_grabbed_null = index_key_is_null(table, tokudb_active_index, key, key_len); - doing_bulk_fetch = (thd_sql_command(thd) == SQLCOM_SELECT); + doing_bulk_fetch = tokudb_do_bulk_fetch(thd); bulk_fetch_iteration = 0; rows_fetched_using_bulk_fetch = 0; error = 0; @@ -4519,6 +4543,7 @@ } invalidate_bulk_fetch(); doing_bulk_fetch = false; + maybe_index_scan = false; error = 0; exit: TOKUDB_HANDLER_DBUG_RETURN(error); @@ -5261,86 +5286,91 @@ } int ha_tokudb::get_next(uchar* buf, int direction, DBT* key_to_compare, bool do_key_read) { - int error = 0; - uint32_t flags = SET_PRELOCK_FLAG(0); - THD* thd = ha_thd(); - tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);; - bool need_val; + int error = 0; HANDLE_INVALID_CURSOR(); - // we need to read the val of what we retrieve if - // we do NOT have a covering index AND we are using a clustering secondary - // key - need_val = (do_key_read == 0) && - (tokudb_active_index == primary_key || - key_is_clustering(&table->key_info[tokudb_active_index]) - ); - - if ((bytes_used_in_range_query_buff - curr_range_query_buff_offset) > 0) { - error = read_data_from_range_query_buff(buf, need_val, do_key_read); - } - else if (icp_went_out_of_range) { - icp_went_out_of_range = false; - error = HA_ERR_END_OF_FILE; + if (maybe_index_scan) { + maybe_index_scan = false; + if (!range_lock_grabbed) { + error = prepare_index_scan(); + } } - else { - invalidate_bulk_fetch(); - if (doing_bulk_fetch) { - struct smart_dbt_bf_info bf_info; - bf_info.ha = this; - // you need the val if you have a clustering index and key_read is not 0; - bf_info.direction = direction; - bf_info.thd = ha_thd(); - bf_info.need_val = need_val; - bf_info.buf = buf; - bf_info.key_to_compare = key_to_compare; - // - // call c_getf_next with purpose of filling in range_query_buff - // - rows_fetched_using_bulk_fetch = 0; - // it is expected that we can do ICP in the smart_dbt_bf_callback - // as a result, it's possible we don't return any data because - // none of the rows matched the index condition. Therefore, we need - // this while loop. icp_out_of_range will be set if we hit a row that - // the index condition states is out of our range. When that hits, - // we know all the data in the buffer is the last data we will retrieve - while (bytes_used_in_range_query_buff == 0 && !icp_went_out_of_range && error == 0) { - if (direction > 0) { - error = cursor->c_getf_next(cursor, flags, smart_dbt_bf_callback, &bf_info); - } else { - error = cursor->c_getf_prev(cursor, flags, smart_dbt_bf_callback, &bf_info); - } - } - // if there is no data set and we went out of range, - // then there is nothing to return - if (bytes_used_in_range_query_buff == 0 && icp_went_out_of_range) { - icp_went_out_of_range = false; - error = HA_ERR_END_OF_FILE; - } - if (bulk_fetch_iteration < HA_TOKU_BULK_FETCH_ITERATION_MAX) { - bulk_fetch_iteration++; - } + + if (!error) { + uint32_t flags = SET_PRELOCK_FLAG(0); - error = handle_cursor_error(error, HA_ERR_END_OF_FILE,tokudb_active_index); - if (error) { goto cleanup; } - - // - // now that range_query_buff is filled, read an element - // + // we need to read the val of what we retrieve if + // we do NOT have a covering index AND we are using a clustering secondary + // key + bool need_val = (do_key_read == 0) && + (tokudb_active_index == primary_key || key_is_clustering(&table->key_info[tokudb_active_index])); + + if ((bytes_used_in_range_query_buff - curr_range_query_buff_offset) > 0) { error = read_data_from_range_query_buff(buf, need_val, do_key_read); } + else if (icp_went_out_of_range) { + icp_went_out_of_range = false; + error = HA_ERR_END_OF_FILE; + } else { - struct smart_dbt_info info; - info.ha = this; - info.buf = buf; - info.keynr = tokudb_active_index; + invalidate_bulk_fetch(); + if (doing_bulk_fetch) { + struct smart_dbt_bf_info bf_info; + bf_info.ha = this; + // you need the val if you have a clustering index and key_read is not 0; + bf_info.direction = direction; + bf_info.thd = ha_thd(); + bf_info.need_val = need_val; + bf_info.buf = buf; + bf_info.key_to_compare = key_to_compare; + // + // call c_getf_next with purpose of filling in range_query_buff + // + rows_fetched_using_bulk_fetch = 0; + // it is expected that we can do ICP in the smart_dbt_bf_callback + // as a result, it's possible we don't return any data because + // none of the rows matched the index condition. Therefore, we need + // this while loop. icp_out_of_range will be set if we hit a row that + // the index condition states is out of our range. When that hits, + // we know all the data in the buffer is the last data we will retrieve + while (bytes_used_in_range_query_buff == 0 && !icp_went_out_of_range && error == 0) { + if (direction > 0) { + error = cursor->c_getf_next(cursor, flags, smart_dbt_bf_callback, &bf_info); + } else { + error = cursor->c_getf_prev(cursor, flags, smart_dbt_bf_callback, &bf_info); + } + } + // if there is no data set and we went out of range, + // then there is nothing to return + if (bytes_used_in_range_query_buff == 0 && icp_went_out_of_range) { + icp_went_out_of_range = false; + error = HA_ERR_END_OF_FILE; + } + if (bulk_fetch_iteration < HA_TOKU_BULK_FETCH_ITERATION_MAX) { + bulk_fetch_iteration++; + } - if (direction > 0) { - error = cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info); - } else { - error = cursor->c_getf_prev(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info); + error = handle_cursor_error(error, HA_ERR_END_OF_FILE,tokudb_active_index); + if (error) { goto cleanup; } + + // + // now that range_query_buff is filled, read an element + // + error = read_data_from_range_query_buff(buf, need_val, do_key_read); + } + else { + struct smart_dbt_info info; + info.ha = this; + info.buf = buf; + info.keynr = tokudb_active_index; + + if (direction > 0) { + error = cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info); + } else { + error = cursor->c_getf_prev(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info); + } + error = handle_cursor_error(error, HA_ERR_END_OF_FILE, tokudb_active_index); } - error = handle_cursor_error(error, HA_ERR_END_OF_FILE, tokudb_active_index); } } @@ -5352,12 +5382,15 @@ // read the full row by doing a point query into the // main table. // - if (!error && !do_key_read && (tokudb_active_index != primary_key) && !key_is_clustering(&table->key_info[tokudb_active_index])) { error = read_full_row(buf); } - trx->stmt_progress.queried++; - track_progress(thd); + + if (!error) { + tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(ha_thd(), tokudb_hton); + trx->stmt_progress.queried++; + track_progress(ha_thd()); + } cleanup: return error; } @@ -5426,8 +5459,7 @@ info.buf = buf; info.keynr = tokudb_active_index; - error = cursor->c_getf_first(cursor, flags, - SMART_DBT_CALLBACK(key_read), &info); + error = cursor->c_getf_first(cursor, flags, SMART_DBT_CALLBACK(key_read), &info); error = handle_cursor_error(error,HA_ERR_END_OF_FILE,tokudb_active_index); // @@ -5437,9 +5469,11 @@ if (!error && !key_read && (tokudb_active_index != primary_key) && !key_is_clustering(&table->key_info[tokudb_active_index])) { error = read_full_row(buf); } - trx->stmt_progress.queried++; + if (trx) { + trx->stmt_progress.queried++; + } track_progress(thd); - + maybe_index_scan = true; cleanup: TOKUDB_HANDLER_DBUG_RETURN(error); } @@ -5469,8 +5503,7 @@ info.buf = buf; info.keynr = tokudb_active_index; - error = cursor->c_getf_last(cursor, flags, - SMART_DBT_CALLBACK(key_read), &info); + error = cursor->c_getf_last(cursor, flags, SMART_DBT_CALLBACK(key_read), &info); error = handle_cursor_error(error,HA_ERR_END_OF_FILE,tokudb_active_index); // // still need to get entire contents of the row if operation done on @@ -5484,6 +5517,7 @@ trx->stmt_progress.queried++; } track_progress(thd); + maybe_index_scan = true; cleanup: TOKUDB_HANDLER_DBUG_RETURN(error); } @@ -5607,13 +5641,11 @@ DBUG_RETURN(to); } -// // Retrieves a row with based on the primary key saved in pos // Returns: // 0 on success // HA_ERR_KEY_NOT_FOUND if not found // error otherwise -// int ha_tokudb::rnd_pos(uchar * buf, uchar * pos) { TOKUDB_HANDLER_DBUG_ENTER(""); DBT db_pos; @@ -5626,12 +5658,20 @@ ha_statistic_increment(&SSV::ha_read_rnd_count); tokudb_active_index = MAX_KEY; + // test rpl slave by inducing a delay before the point query + THD *thd = ha_thd(); + if (thd->slave_thread && (in_rpl_delete_rows || in_rpl_update_rows)) { + uint64_t delay_ms = THDVAR(thd, rpl_lookup_rows_delay); + if (delay_ms) + usleep(delay_ms * 1000); + } + info.ha = this; info.buf = buf; info.keynr = primary_key; error = share->file->getf_set(share->file, transaction, - get_cursor_isolation_flags(lock.type, ha_thd()), + get_cursor_isolation_flags(lock.type, thd), key, smart_dbt_callback_rowread_ptquery, &info); if (error == DB_NOTFOUND) { @@ -5643,8 +5683,8 @@ TOKUDB_HANDLER_DBUG_RETURN(error); } -int ha_tokudb::prelock_range( const key_range *start_key, const key_range *end_key) { - TOKUDB_HANDLER_DBUG_ENTER(""); +int ha_tokudb::prelock_range(const key_range *start_key, const key_range *end_key) { + TOKUDB_HANDLER_DBUG_ENTER("%p %p", start_key, end_key); THD* thd = ha_thd(); int error = 0; @@ -5709,11 +5749,8 @@ goto cleanup; } - // // at this point, determine if we will be doing bulk fetch - // as of now, only do it if we are doing a select - // - doing_bulk_fetch = (thd_sql_command(thd) == SQLCOM_SELECT); + doing_bulk_fetch = tokudb_do_bulk_fetch(thd); bulk_fetch_iteration = 0; rows_fetched_using_bulk_fetch = 0; @@ -5728,7 +5765,7 @@ // Forward scans use read_range_first()/read_range_next(). // int ha_tokudb::prepare_range_scan( const key_range *start_key, const key_range *end_key) { - TOKUDB_HANDLER_DBUG_ENTER(""); + TOKUDB_HANDLER_DBUG_ENTER("%p %p", start_key, end_key); int error = prelock_range(start_key, end_key); if (!error) { range_lock_grabbed = true; @@ -5742,7 +5779,7 @@ bool eq_range, bool sorted) { - TOKUDB_HANDLER_DBUG_ENTER(""); + TOKUDB_HANDLER_DBUG_ENTER("%p %p %u %u", start_key, end_key, eq_range, sorted); int error = prelock_range(start_key, end_key); if (error) { goto cleanup; } range_lock_grabbed = true; @@ -6846,7 +6883,7 @@ if (error) { goto cleanup; } #if WITH_PARTITION_STORAGE_ENGINE - if (TOKU_PARTITION_WRITE_FRM_DATA || IF_PARTITIONING(form->part_info, NULL) == NULL) { + if (TOKU_PARTITION_WRITE_FRM_DATA || form->part_info == NULL) { error = write_frm_data(status_block, txn, form->s->path.str); if (error) { goto cleanup; } } @@ -7712,7 +7749,7 @@ thd_progress_report(thd, num_processed, (long long unsigned) share->rows); #endif - if (thd->killed) { + if (thd_killed(thd)) { error = ER_ABORTING_CONNECTION; goto cleanup; } @@ -8157,6 +8194,37 @@ trx->handlers = list_delete(trx->handlers, &trx_handler_list); } +void ha_tokudb::rpl_before_write_rows() { + in_rpl_write_rows = true; +} + +void ha_tokudb::rpl_after_write_rows() { + in_rpl_write_rows = false; +} + +void ha_tokudb::rpl_before_delete_rows() { + in_rpl_delete_rows = true; +} + +void ha_tokudb::rpl_after_delete_rows() { + in_rpl_delete_rows = false; +} + +void ha_tokudb::rpl_before_update_rows() { + in_rpl_update_rows = true; +} + +void ha_tokudb::rpl_after_update_rows() { + in_rpl_update_rows = false; +} + +bool ha_tokudb::rpl_lookup_rows() { + if (!in_rpl_delete_rows && !in_rpl_update_rows) + return true; + else + return THDVAR(ha_thd(), rpl_lookup_rows); +} + // table admin #include "ha_tokudb_admin.cc" diff -Nru mariadb-5.5-5.5.39/storage/tokudb/ha_tokudb.h mariadb-5.5-5.5.40/storage/tokudb/ha_tokudb.h --- mariadb-5.5-5.5.39/storage/tokudb/ha_tokudb.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/ha_tokudb.h 2014-10-08 13:19:52.000000000 +0000 @@ -251,6 +251,7 @@ uint64_t bulk_fetch_iteration; uint64_t rows_fetched_using_bulk_fetch; bool doing_bulk_fetch; + bool maybe_index_scan; // // buffer used to temporarily store a "packed key" @@ -796,6 +797,19 @@ private: int do_optimize(THD *thd); int map_to_handler_error(int error); + +public: + void rpl_before_write_rows(); + void rpl_after_write_rows(); + void rpl_before_delete_rows(); + void rpl_after_delete_rows(); + void rpl_before_update_rows(); + void rpl_after_update_rows(); + bool rpl_lookup_rows(); +private: + bool in_rpl_write_rows; + bool in_rpl_delete_rows; + bool in_rpl_update_rows; }; #if TOKU_INCLUDE_OPTION_STRUCTS diff -Nru mariadb-5.5-5.5.39/storage/tokudb/hatoku_hton.cc mariadb-5.5-5.5.40/storage/tokudb/hatoku_hton.cc --- mariadb-5.5-5.5.39/storage/tokudb/hatoku_hton.cc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/hatoku_hton.cc 2014-10-08 13:19:52.000000000 +0000 @@ -92,6 +92,7 @@ #define MYSQL_SERVER 1 #include "hatoku_defines.h" #include +#include #include "stdint.h" #if defined(_WIN32) @@ -330,9 +331,25 @@ sql_print_error(" "); sql_print_error("************************************************************"); break; + case TOKUDB_UPGRADE_FAILURE: + sql_print_error("%s upgrade failed. A clean shutdown of the previous version is required.", tokudb_hton_name); + break; + default: + sql_print_error("%s unknown error %d", tokudb_hton_name, error); + break; } } +static int tokudb_set_product_name(void) { + size_t n = strlen(tokudb_hton_name); + char tokudb_product_name[n+1]; + memset(tokudb_product_name, 0, sizeof tokudb_product_name); + for (size_t i = 0; i < n; i++) + tokudb_product_name[i] = tolower(tokudb_hton_name[i]); + int r = db_env_set_toku_product_name(tokudb_product_name); + return r; +} + static int tokudb_init_func(void *p) { TOKUDB_DBUG_ENTER("%p", p); int r; @@ -346,11 +363,17 @@ #if TOKUDB_CHECK_JEMALLOC if (tokudb_check_jemalloc && dlsym(RTLD_DEFAULT, "mallctl") == NULL) { - sql_print_error("%s not initialized because jemalloc is not loaded", tokudb_hton_name); + sql_print_error("%s is not initialized because jemalloc is not loaded", tokudb_hton_name); goto error; } #endif + r = tokudb_set_product_name(); + if (r) { + sql_print_error("%s can not set product name error %d", tokudb_hton_name, r); + goto error; + } + tokudb_pthread_mutex_init(&tokudb_mutex, MY_MUTEX_INIT_FAST); (void) my_hash_init(&tokudb_open_tables, table_alias_charset, 32, 0, 0, (my_hash_get_key) tokudb_get_key, 0, 0); @@ -532,6 +555,7 @@ if (r) { DBUG_PRINT("info", ("env->open %d", r)); + handle_ydb_error(r); goto error; } @@ -610,8 +634,35 @@ if (db_env) { if (tokudb_init_flags & DB_INIT_LOG) tokudb_cleanup_log_files(); - error = db_env->close(db_env, 0); // Error is logged - assert(error==0); +#if TOKU_INCLUDE_XA + long total_prepared = 0; // count the total number of prepared txn's that we discard + while (1) { + // get xid's + const long n_xid = 1; + TOKU_XA_XID xids[n_xid]; + long n_prepared = 0; + error = db_env->txn_xa_recover(db_env, xids, n_xid, &n_prepared, total_prepared == 0 ? DB_FIRST : DB_NEXT); + assert(error == 0); + if (n_prepared == 0) + break; + // discard xid's + for (long i = 0; i < n_xid; i++) { + DB_TXN *txn = NULL; + error = db_env->get_txn_from_xid(db_env, &xids[i], &txn); + assert(error == 0); + error = txn->discard(txn, 0); + assert(error == 0); + } + total_prepared += n_prepared; + } +#endif + error = db_env->close(db_env, total_prepared > 0 ? TOKUFT_DIRTY_SHUTDOWN : 0); +#if TOKU_INCLUDE_XA + if (error != 0 && total_prepared > 0) { + sql_print_error("%s: %ld prepared txns still live, please shutdown, error %d", tokudb_hton_name, total_prepared, error); + } else +#endif + assert(error == 0); db_env = NULL; } @@ -703,7 +754,7 @@ info.thd = thd; int r = txn->commit_with_progress(txn, flags, txn_progress_func, &info); if (r != 0) { - sql_print_error("tried committing transaction %p and got error code %d", txn, r); + sql_print_error("%s: tried committing transaction %p and got error code %d", tokudb_hton_name, txn, r); } assert(r == 0); thd_proc_info(thd, orig_proc_info); @@ -715,7 +766,7 @@ info.thd = thd; int r = txn->abort_with_progress(txn, txn_progress_func, &info); if (r != 0) { - sql_print_error("tried aborting transaction %p and got error code %d", txn, r); + sql_print_error("%s: tried aborting transaction %p and got error code %d", tokudb_hton_name, txn, r); } assert(r == 0); thd_proc_info(thd, orig_proc_info); @@ -792,6 +843,12 @@ static int tokudb_xa_prepare(handlerton* hton, THD* thd, bool all) { TOKUDB_DBUG_ENTER(""); int r = 0; + + /* if support_xa is disable, just return */ + if (!THDVAR(thd, support_xa)) { + TOKUDB_DBUG_RETURN(r); + } + DBUG_PRINT("trans", ("preparing transaction %s", all ? "all" : "stmt")); tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, hton); DB_TXN* txn = all ? trx->all : trx->stmt; @@ -814,7 +871,7 @@ TOKUDB_DBUG_RETURN(r); } -static int tokudb_xa_recover(handlerton* hton, XID* xid_list, uint len) { +static int tokudb_xa_recover(handlerton* hton, XID* xid_list, uint len) { TOKUDB_DBUG_ENTER(""); int r = 0; if (len == 0 || xid_list == NULL) { @@ -1215,7 +1272,7 @@ #endif static void tokudb_print_error(const DB_ENV * db_env, const char *db_errpfx, const char *buffer) { - sql_print_error("%s: %s", db_errpfx, buffer); + sql_print_error("%s: %s", db_errpfx, buffer); } static void tokudb_cleanup_log_files(void) { @@ -1394,9 +1451,36 @@ #if TOKUDB_CHECK_JEMALLOC MYSQL_SYSVAR(check_jemalloc), #endif + MYSQL_SYSVAR(bulk_fetch), +#if TOKU_INCLUDE_XA + MYSQL_SYSVAR(support_xa), +#endif + MYSQL_SYSVAR(rpl_unique_checks), + MYSQL_SYSVAR(rpl_unique_checks_delay), + MYSQL_SYSVAR(rpl_lookup_rows), + MYSQL_SYSVAR(rpl_lookup_rows_delay), NULL }; +// Split ./database/table-dictionary into database, table and dictionary strings +static void tokudb_split_dname(const char *dname, String &database_name, String &table_name, String &dictionary_name) { + const char *splitter = strchr(dname, '/'); + if (splitter) { + const char *database_ptr = splitter+1; + const char *table_ptr = strchr(database_ptr, '/'); + if (table_ptr) { + database_name.append(database_ptr, table_ptr - database_ptr); + table_ptr += 1; + const char *dictionary_ptr = strchr(table_ptr, '-'); + if (dictionary_ptr) { + table_name.append(table_ptr, dictionary_ptr - table_ptr); + dictionary_ptr += 1; + dictionary_name.append(dictionary_ptr); + } + } + } +} + struct st_mysql_storage_engine tokudb_storage_engine = { MYSQL_HANDLERTON_INTERFACE_VERSION }; static struct st_mysql_information_schema tokudb_file_map_information_schema = { MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION }; @@ -1442,31 +1526,12 @@ assert(iname_len == curr_val.size - 1); table->field[1]->store(iname, iname_len, system_charset_info); - // denormalize the dname - const char *database_name = NULL; - size_t database_len = 0; - const char *table_name = NULL; - size_t table_len = 0; - const char *dictionary_name = NULL; - size_t dictionary_len = 0; - database_name = strchr(dname, '/'); - if (database_name) { - database_name += 1; - table_name = strchr(database_name, '/'); - if (table_name) { - database_len = table_name - database_name; - table_name += 1; - dictionary_name = strchr(table_name, '-'); - if (dictionary_name) { - table_len = dictionary_name - table_name; - dictionary_name += 1; - dictionary_len = strlen(dictionary_name); - } - } - } - table->field[2]->store(database_name, database_len, system_charset_info); - table->field[3]->store(table_name, table_len, system_charset_info); - table->field[4]->store(dictionary_name, dictionary_len, system_charset_info); + // split the dname + String database_name, table_name, dictionary_name; + tokudb_split_dname(dname, database_name, table_name, dictionary_name); + table->field[2]->store(database_name.c_ptr(), database_name.length(), system_charset_info); + table->field[3]->store(table_name.c_ptr(), table_name.length(), system_charset_info); + table->field[4]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info); error = schema_table_store_record(thd, table); } @@ -1497,10 +1562,12 @@ rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { error = tokudb_file_map(table, thd); + if (error) + my_error(error, MYF(0)); } rw_unlock(&tokudb_hton_initialized_lock); @@ -1527,6 +1594,9 @@ {"bt_num_blocks_in_use", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE }, {"bt_size_allocated", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE }, {"bt_size_in_use", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE} }; @@ -1564,25 +1634,25 @@ // Recalculate and check just to be safe. { size_t dname_len = strlen((const char *)dname->data); - size_t iname_len = strlen((const char *)iname->data); assert(dname_len == dname->size - 1); + table->field[0]->store((char *)dname->data, dname_len, system_charset_info); + size_t iname_len = strlen((const char *)iname->data); assert(iname_len == iname->size - 1); - table->field[0]->store( - (char *)dname->data, - dname_len, - system_charset_info - ); - table->field[1]->store( - (char *)iname->data, - iname_len, - system_charset_info - ); + table->field[1]->store((char *)iname->data, iname_len, system_charset_info); } table->field[2]->store(bt_num_blocks_allocated, false); table->field[3]->store(bt_num_blocks_in_use, false); table->field[4]->store(bt_size_allocated, false); table->field[5]->store(bt_size_in_use, false); + // split the dname + { + String database_name, table_name, dictionary_name; + tokudb_split_dname((const char *)dname->data, database_name, table_name, dictionary_name); + table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info); + table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info); + table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info); + } error = schema_table_store_record(thd, table); exit: @@ -1606,12 +1676,7 @@ goto cleanup; } while (error == 0) { - error = tmp_cursor->c_get( - tmp_cursor, - &curr_key, - &curr_val, - DB_NEXT - ); + error = tmp_cursor->c_get(tmp_cursor, &curr_key, &curr_val, DB_NEXT); if (!error) { error = tokudb_report_fractal_tree_info_for_db(&curr_key, &curr_val, table, thd); } @@ -1644,10 +1709,12 @@ rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { error = tokudb_fractal_tree_info(table, thd); + if (error) + my_error(error, MYF(0)); } //3938: unlock the status flag lock @@ -1675,6 +1742,9 @@ {"blocknum", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE }, {"offset", 0, MYSQL_TYPE_LONGLONG, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE }, {"size", 0, MYSQL_TYPE_LONGLONG, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE }, + {"table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE} }; @@ -1747,19 +1817,13 @@ // See #5789 // Recalculate and check just to be safe. size_t dname_len = strlen((const char *)dname->data); - size_t iname_len = strlen((const char *)iname->data); assert(dname_len == dname->size - 1); + table->field[0]->store((char *)dname->data, dname_len, system_charset_info); + + size_t iname_len = strlen((const char *)iname->data); assert(iname_len == iname->size - 1); - table->field[0]->store( - (char *)dname->data, - dname_len, - system_charset_info - ); - table->field[1]->store( - (char *)iname->data, - iname_len, - system_charset_info - ); + table->field[1]->store((char *)iname->data, iname_len, system_charset_info); + table->field[2]->store(e.checkpoint_counts[i], false); table->field[3]->store(e.blocknums[i], false); static const int64_t freelist_null = -1; @@ -1778,6 +1842,13 @@ table->field[5]->store(e.sizes[i], false); } + // split the dname + String database_name, table_name, dictionary_name; + tokudb_split_dname((const char *)dname->data, database_name, table_name,dictionary_name); + table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info); + table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info); + table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info); + error = schema_table_store_record(thd, table); } @@ -1818,12 +1889,7 @@ goto cleanup; } while (error == 0) { - error = tmp_cursor->c_get( - tmp_cursor, - &curr_key, - &curr_val, - DB_NEXT - ); + error = tmp_cursor->c_get(tmp_cursor, &curr_key, &curr_val, DB_NEXT); if (!error) { error = tokudb_report_fractal_tree_block_map_for_db(&curr_key, &curr_val, table, thd); } @@ -1856,10 +1922,12 @@ rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { error = tokudb_fractal_tree_block_map(table, thd); + if (error) + my_error(error, MYF(0)); } //3938: unlock the status flag lock @@ -1968,7 +2036,7 @@ } // dump to stderr if (lock_timeout_debug & 2) { - TOKUDB_TRACE("%s", log_str.c_ptr()); + sql_print_error("%s: %s", tokudb_hton_name, log_str.c_ptr()); } } } @@ -2007,11 +2075,13 @@ rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { struct tokudb_trx_extra e = { thd, tables->table }; error = db_env->iterate_live_transactions(db_env, tokudb_trx_callback, &e); + if (error) + my_error(error, MYF(0)); } rw_unlock(&tokudb_hton_initialized_lock); @@ -2038,6 +2108,9 @@ {"lock_waits_key_left", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {"lock_waits_key_right", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {"lock_waits_start_time", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"lock_waits_table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"lock_waits_table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"lock_waits_table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE} }; @@ -2063,6 +2136,13 @@ tokudb_pretty_right_key(db, right_key, &right_str); table->field[4]->store(right_str.ptr(), right_str.length(), system_charset_info); table->field[5]->store(start_time, false); + + String database_name, table_name, dictionary_name; + tokudb_split_dname(dname, database_name, table_name, dictionary_name); + table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info); + table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info); + table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info); + int error = schema_table_store_record(thd, table); return error; } @@ -2078,11 +2158,13 @@ rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { struct tokudb_lock_waits_extra e = { thd, tables->table }; error = db_env->iterate_pending_lock_requests(db_env, tokudb_lock_waits_callback, &e); + if (error) + my_error(error, MYF(0)); } rw_unlock(&tokudb_hton_initialized_lock); @@ -2108,6 +2190,9 @@ {"locks_dname", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {"locks_key_left", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {"locks_key_right", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"locks_table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"locks_table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"locks_table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE} }; @@ -2139,6 +2224,12 @@ tokudb_pretty_right_key(db, &right_key, &right_str); table->field[4]->store(right_str.ptr(), right_str.length(), system_charset_info); + String database_name, table_name, dictionary_name; + tokudb_split_dname(dname, database_name, table_name, dictionary_name); + table->field[5]->store(database_name.c_ptr(), database_name.length(), system_charset_info); + table->field[6]->store(table_name.c_ptr(), table_name.length(), system_charset_info); + table->field[7]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info); + error = schema_table_store_record(thd, table); } return error; @@ -2155,11 +2246,13 @@ rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { struct tokudb_locks_extra e = { thd, tables->table }; error = db_env->iterate_live_transactions(db_env, tokudb_locks_callback, &e); + if (error) + my_error(error, MYF(0)); } rw_unlock(&tokudb_hton_initialized_lock); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/hatoku_hton.h mariadb-5.5-5.5.40/storage/tokudb/hatoku_hton.h --- mariadb-5.5-5.5.39/storage/tokudb/hatoku_hton.h 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/hatoku_hton.h 2014-10-08 13:19:52.000000000 +0000 @@ -450,17 +450,42 @@ NULL }; -static MYSQL_THDVAR_ENUM(empty_scan, - PLUGIN_VAR_OPCMDARG, +static MYSQL_THDVAR_ENUM(empty_scan, PLUGIN_VAR_OPCMDARG, "TokuDB algorithm to check if the table is empty when opened. ", NULL, NULL, TOKUDB_EMPTY_SCAN_RL, &tokudb_empty_scan_typelib ); #if TOKUDB_CHECK_JEMALLOC static uint tokudb_check_jemalloc; -static MYSQL_SYSVAR_UINT(check_jemalloc, tokudb_check_jemalloc, 0, "Check if jemalloc is linked", NULL, NULL, 1, 0, 1, 0); +static MYSQL_SYSVAR_UINT(check_jemalloc, tokudb_check_jemalloc, 0, "Check if jemalloc is linked", + NULL, NULL, 1, 0, 1, 0); +#endif + +static MYSQL_THDVAR_BOOL(bulk_fetch, PLUGIN_VAR_THDLOCAL, "enable bulk fetch", + NULL /*check*/, NULL /*update*/, true /*default*/); + +#if TOKU_INCLUDE_XA +static MYSQL_THDVAR_BOOL(support_xa, + PLUGIN_VAR_OPCMDARG, + "Enable TokuDB support for the XA two-phase commit", + NULL, // check + NULL, // update + true // default +); #endif +static MYSQL_THDVAR_BOOL(rpl_unique_checks, PLUGIN_VAR_THDLOCAL, "enable unique checks on replication slave", + NULL /*check*/, NULL /*update*/, true /*default*/); + +static MYSQL_THDVAR_ULONGLONG(rpl_unique_checks_delay, PLUGIN_VAR_THDLOCAL, "time in milliseconds to add to unique checks test on replication slave", + NULL, NULL, 0 /*default*/, 0 /*min*/, ~0ULL /*max*/, 1 /*blocksize*/); + +static MYSQL_THDVAR_BOOL(rpl_lookup_rows, PLUGIN_VAR_THDLOCAL, "lookup a row on rpl slave", + NULL /*check*/, NULL /*update*/, true /*default*/); + +static MYSQL_THDVAR_ULONGLONG(rpl_lookup_rows_delay, PLUGIN_VAR_THDLOCAL, "time in milliseconds to add to lookups on replication slave", + NULL, NULL, 0 /*default*/, 0 /*min*/, ~0ULL /*max*/, 1 /*blocksize*/); + extern HASH tokudb_open_tables; extern pthread_mutex_t tokudb_mutex; extern uint32_t tokudb_write_status_frequency; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/disabled.def mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/disabled.def --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/disabled.def 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/disabled.def 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,12 @@ +rpl_tokudb_delete_pk: unreliable, uses timestamp differences +rpl_tokudb_delete_pk_lookup1: unreliable, uses timestamp differences +rpl_tokudb_update_pk_uc0_lookup0: unreliable, uses timestamp differences +rpl_tokudb_update_pk_uc0_lookup1: unreliable, uses timestamp differences +rpl_tokudb_update_pk_uc1_lookup0: unreliable, uses timestamp differences +rpl_tokudb_update_pk_uc1_lookup1: unreliable, uses timestamp differences +rpl_tokudb_update_unique_uc0_lookup0: unreliable, uses timestamp differences +rpl_tokudb_update_unique_uc0_lookup1: unreliable, uses timestamp differences +rpl_tokudb_write_pk: unreliable, uses timestamp differences +rpl_tokudb_write_pk_uc1: unreliable, uses timestamp differences +rpl_tokudb_write_unique: unreliable, uses timestamp differences +rpl_tokudb_write_unique_uc1: unreliable, uses timestamp differences diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/include/have_tokudb.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/include/have_tokudb.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/include/have_tokudb.opt 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/include/have_tokudb.opt 2014-10-08 13:19:51.000000000 +0000 @@ -1 +1 @@ ---loose-tokudb --plugin-load=$HA_TOKUDB_SO +--loose-tokudb --plugin-load=$HA_TOKUDB_SO --loose-tokudb-check-jemalloc=0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk_lookup1.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk_lookup1.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk_lookup1.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk_lookup1.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,17 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, primary key(a)) engine=tokudb; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); +include/diff_tables.inc [master:test.t, slave:test.t] +delete from t where a=2; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart > 5; +@tend-@tstart > 5 +1 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,17 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, primary key(a)) engine=tokudb; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); +include/diff_tables.inc [master:test.t, slave:test.t] +delete from t where a=2; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +1 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +1 +select * from t; +a b +1 3 +2 2 +3 5 +4 3 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +0 +select * from t; +a b +1 3 +2 2 +3 5 +4 3 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +0 +select * from t; +a b +1 3 +2 2 +3 5 +4 3 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +0 +select * from t; +a b +1 3 +2 2 +3 5 +4 3 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=tokudb; +insert into t values (1,0,-1); +insert into t values (2,0,-2),(3,0,-3); +insert into t values (4,0,-4); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +1 +select * from t; +a b c +1 3 -1 +2 2 -2 +3 5 -3 +4 3 -4 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=tokudb; +insert into t values (1,0,-1); +insert into t values (2,0,-2),(3,0,-3); +insert into t values (4,0,-4); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +0 +select * from t; +a b c +1 3 -1 +2 2 -2 +3 5 -3 +4 3 -4 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,14 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, primary key(a)) engine=tokudb; +select unix_timestamp() into @tstart; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); +select unix_timestamp()-@tstart <= 10; +unix_timestamp()-@tstart <= 10 +1 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk_uc1.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk_uc1.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk_uc1.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk_uc1.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,14 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, primary key(a)) engine=tokudb; +select unix_timestamp() into @tstart; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); +select unix_timestamp()-@tstart <= 10; +unix_timestamp()-@tstart <= 10 +0 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,14 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=tokudb; +select unix_timestamp() into @tstart; +insert into t values (1,2); +insert into t values (2,3),(3,4); +insert into t values (4,5); +select unix_timestamp()-@tstart <= 10; +unix_timestamp()-@tstart <= 10 +1 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique_uc1.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique_uc1.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique_uc1.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique_uc1.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,14 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=tokudb; +select unix_timestamp() into @tstart; +insert into t values (1,2); +insert into t values (2,3),(3,4); +insert into t values (4,5); +select unix_timestamp()-@tstart <= 10; +unix_timestamp()-@tstart <= 10 +0 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=0 --tokudb-rpl-unique-checks=ON --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,66 @@ +# test replicated delete rows log events on a table with a primary key. +# the slave is read only with tokudb rpl row lookups ON. +# this will cause SLOW deletes. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +delete from t where a=2; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart > 5; # assert big delay in the delete time + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,63 @@ +# test replicated delete rows log events on a table with a primary key. +# the slave is read only with tokudb rpl row lookups OFF. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +delete from t where a=2; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,70 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,70 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=ON --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,70 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=ON --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,70 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,70 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=$engine; +# show create table t; +insert into t values (1,0,-1); +insert into t values (2,0,-2),(3,0,-3); +insert into t values (4,0,-4); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,69 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=$engine; +# show create table t; +insert into t values (1,0,-1); +insert into t values (2,0,-2),(3,0,-3); +insert into t values (4,0,-4); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=5000 --tokudb-rpl-unique-checks=OFF diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,53 @@ +# test replicated write rows log events on a table with a primary key. +# the slave is read only with tokudb unique checks disabled. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_unique_checks%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, primary key(a)) engine=$engine; +# show create table t; +select unix_timestamp() into @tstart; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +connection master; +select unix_timestamp()-@tstart <= 10; + +connection slave; +# insert into t values (5); # test read-only +# show create table t; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=ON diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,53 @@ +# test replicated write rows log events on a table with a primary key. +# the slave is read only with tokudb unique checks disabled. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_unique_checks%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, primary key(a)) engine=$engine; +# show create table t; +select unix_timestamp() into @tstart; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +connection master; +select unix_timestamp()-@tstart <= 10; + +connection slave; +# insert into t values (5); # test read-only +# show create table t; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=5000 --tokudb-rpl-unique-checks=OFF diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,52 @@ +# test replicated write rows log events on a table with a primary key and a unique secondary key. +# the slave is read only with tokudb unique checks disabled. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_unique_checks%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=$engine; +# show create table t; +select unix_timestamp() into @tstart; +insert into t values (1,2); +insert into t values (2,3),(3,4); +insert into t values (4,5); + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +connection master; +select unix_timestamp()-@tstart <= 10; + +connection slave; +# show create table t; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=5000 --tokudb-rpl-unique-checks=ON diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,52 @@ +# test replicated write rows log events on a table with a primary key and a unique secondary key. +# the slave is read only with tokudb unique checks disabled. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_unique_checks%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=$engine; +# show create table t; +select unix_timestamp() into @tstart; +insert into t values (1,2); +insert into t values (2,3),(3,4); +insert into t values (4,5); + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +connection master; +select unix_timestamp()-@tstart <= 10; + +connection slave; +# show create table t; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/tokudb_innodb_xa_crash.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/tokudb_innodb_xa_crash.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/rpl/t/tokudb_innodb_xa_crash.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/rpl/t/tokudb_innodb_xa_crash.test 2014-10-08 13:19:51.000000000 +0000 @@ -1,6 +1,6 @@ ---source include/master-slave.inc --source include/have_tokudb.inc --source include/have_innodb.inc +--source include/master-slave.inc eval CREATE TABLE t1(`a` INT) ENGINE=TokuDB; eval CREATE TABLE t2(`a` INT) ENGINE=InnoDB; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/disabled.def mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/disabled.def --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/disabled.def 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/disabled.def 2014-10-08 13:19:51.000000000 +0000 @@ -58,3 +58,6 @@ mvcc-19: No online ALTER in MariaDB 5.5 mvcc-20: No online ALTER in MariaDB 5.5 mvcc-27: No online OPTIMIZE in MariaDB 5.5 +cluster_key_part: engine options on partitioned tables +i_s_tokudb_lock_waits_released: unstable, race conditions +i_s_tokudb_locks_released: unstable, race conditions diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/include/have_tokudb.inc mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/include/have_tokudb.inc --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/include/have_tokudb.inc 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/include/have_tokudb.inc 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1 @@ +let $datadir=`select @@datadir`; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_create_select_hash_part.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_create_select_hash_part.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_create_select_hash_part.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_create_select_hash_part.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,328 @@ +set default_storage_engine='tokudb'; +drop table if exists t,t1,t2,t3; +CREATE TABLE `t` ( +`num` int(10) unsigned auto_increment NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +); +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +8388608 +CREATE TABLE `t1` ( +`num` int(10) unsigned NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) as select * from t; +CREATE TABLE `t2` ( +`num` int(10) unsigned NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) PARTITION BY HASH (num) +PARTITIONS 8 as select * from t; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2; +DROP TABLE t3; +1 +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t3; +1 +drop table t,t1,t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_create_select_range_part.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_create_select_range_part.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_create_select_range_part.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_create_select_range_part.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,335 @@ +set default_storage_engine='tokudb'; +drop table if exists t,t1,t2; +CREATE TABLE `t` ( +`num` int(10) unsigned auto_increment NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +); +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +8388608 +CREATE TABLE `t1` ( +`num` int(10) unsigned NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) as select * from t; +CREATE TABLE `t2` ( +`num` int(10) unsigned NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) PARTITION BY RANGE (num) +(PARTITION p0 VALUES LESS THAN (1000000), +PARTITION p1 VALUES LESS THAN (2000000), +PARTITION p2 VALUES LESS THAN (3000000), +PARTITION p3 VALUES LESS THAN (4000000), +PARTITION p4 VALUES LESS THAN (5000000), +PARTITION p5 VALUES LESS THAN (6000000), +PARTITION p6 VALUES LESS THAN (7000000), +PARTITION p7 VALUES LESS THAN MAXVALUE) as select * from t; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1; +count(*) +8388608 +DROP TABLE t3; +CREATE TABLE t4 AS SELECT count(*) from t2; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2; +DROP TABLE t4; +1 +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE `t3` (`x` bigint); +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +DROP TABLE t3; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; +DROP TABLE t4; +1 +drop table t,t1,t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_create_select.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_create_select.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_create_select.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_create_select.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,242 @@ +set default_storage_engine='tokudb'; +drop table if exists t,t1,t2; +CREATE TABLE `t` ( +`num` int(10) unsigned auto_increment NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +); +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +8388608 +CREATE TABLE `t1` ( +`num` int(10) unsigned NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) as select * from t; +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +CREATE TABLE t2 AS SELECT count(*) from t1; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1; +DROP TABLE t2; +1 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TABLE t2; +1 +drop table t,t1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_create_temp_select.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_create_temp_select.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_create_temp_select.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_create_temp_select.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,242 @@ +set default_storage_engine='tokudb'; +drop table if exists t,t1,t2; +CREATE TABLE `t` ( +`num` int(10) unsigned auto_increment NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +); +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +8388608 +CREATE TABLE `t1` ( +`num` int(10) unsigned NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) as select * from t; +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; +DROP TEMPORARY TABLE t2; +1 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; +DROP TEMPORARY TABLE t2; +1 +drop table t,t1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_delete.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_delete.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_delete.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_delete.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,54 @@ +set default_storage_engine='tokudb'; +drop table if exists t; +CREATE TABLE `t` (id bigint not null auto_increment primary key, val bigint not null default 0); +INSERT INTO t (id) values (null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +8388608 +set tokudb_bulk_fetch = ON; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +set tokudb_bulk_fetch = OFF; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +delete from t where val > 0; +1 +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_delete_trigger.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_delete_trigger.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_delete_trigger.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_delete_trigger.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,54 @@ +set default_storage_engine='tokudb'; +drop table if exists t; +create table t (id bigint not null primary key, x bigint not null); +insert into t values (1,0),(2,0),(3,0),(4,0); +create trigger t_delete before delete on t for each row insert into t values (1000000,0); +begin; +delete from t where x=0; +ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_delete; +create trigger t_delete after delete on t for each row insert into t values (1000000,0); +begin; +delete from t where x=0; +ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_delete; +create trigger t_delete before delete on t for each row delete from t where id=1000000; +begin; +delete from t where x=0; +ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_delete; +create trigger t_delete after delete on t for each row delete from t where id=1000000; +begin; +delete from t where x=0; +ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_delete; +create trigger t_delete before delete on t for each row update t set x=x+1 where id=1000000; +begin; +delete from t where x=0; +ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_delete; +create trigger t_delete after delete on t for each row update t set x=x+1 where id=10000000; +begin; +delete from t where x=0; +ERROR HY000: Can't update table 't' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_delete; +create table count (count bigint not null); +create trigger t_delete before delete on t for each row insert into count select count(*) from t; +begin; +delete from t where x=0; +select * from count; +count +4 +3 +2 +1 +rollback; +drop trigger t_delete; +drop table count; +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_dup_key.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_dup_key.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_dup_key.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_dup_key.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,207 @@ +set default_storage_engine='tokudb'; +drop table if exists t,t1,t2; +CREATE TABLE `t` ( +`num` int(10) unsigned auto_increment NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +); +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +8388608 +CREATE TABLE `t1` ( +`num` int(10) unsigned NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) as select * from t; +CREATE TABLE `t2` ( +`num` int(10) unsigned auto_increment NOT NULL, +`count` bigint(20) NOT NULL, +UNIQUE (num) +) ENGINE=TokuDB DEFAULT CHARSET=latin1; +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; +1 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; +1 +drop table t,t1,t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_insert_select.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_insert_select.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_insert_select.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_insert_select.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,205 @@ +set default_storage_engine='tokudb'; +drop table if exists t,t1,t2; +CREATE TABLE `t` ( +`num` int(10) unsigned auto_increment NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +); +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +8388608 +CREATE TABLE `t1` ( +`num` int(10) unsigned NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) as select * from t; +CREATE TABLE `t2` ( +`count` bigint(20) NOT NULL +) ENGINE=TokuDB DEFAULT CHARSET=latin1; +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +INSERT into t2 SELECT count(*) from t1; +INSERT into t2 SELECT count(*) from t1; +INSERT into t2 SELECT count(*) from t1; +INSERT into t2 SELECT count(*) from t1; +INSERT into t2 SELECT count(*) from t1; +INSERT into t2 SELECT count(*) from t1; +INSERT into t2 SELECT count(*) from t1; +INSERT into t2 SELECT count(*) from t1; +INSERT into t2 SELECT count(*) from t1; +INSERT into t2 SELECT count(*) from t1; +1 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +INSERT into t2 SELECT count(*) from t1 where num > 7000000; +1 +drop table t,t1,t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_trigger.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_trigger.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_trigger.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_trigger.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,45 @@ +set default_storage_engine='tokudb'; +drop table if exists s,t; +create table s (id bigint not null primary key, x bigint); +insert into s values (1,0),(2,0),(3,0),(4,0); +create table t like s; +begin; +insert into t select * from s; +rollback; +create trigger t_trigger before insert on t for each row insert into s values (1000000,0); +begin; +insert into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row insert into s values (1000000,0); +begin; +insert into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before insert on t for each row delete from s where id=1000000; +begin; +insert into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row delete from s where id=1000000; +begin; +insert into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000; +begin; +insert into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000; +begin; +insert into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +drop table s,t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_update_trigger.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_update_trigger.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_update_trigger.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_update_trigger.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,121 @@ +set default_storage_engine='tokudb'; +drop table if exists s,t; +create table s (id bigint not null primary key, x bigint); +insert into s values (1,0),(2,0),(3,0),(4,0); +create table t like s; +begin; +insert into t select * from s; +rollback; +create trigger t_trigger before insert on t for each row insert into s values (1000000,0); +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row insert into s values (1000000,0); +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before insert on t for each row delete from s where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row delete from s where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +truncate table t; +insert into t values (1,0); +create trigger t_trigger before insert on t for each row insert into s values (1000000,0); +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row insert into s values (1000000,0); +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before insert on t for each row delete from s where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row delete from s where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +truncate table t; +insert into t values (1,0); +create trigger t_trigger before update on t for each row insert into s values (1000000,0); +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after update on t for each row insert into s values (1000000,0); +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before update on t for each row delete from s where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after update on t for each row delete from s where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before update on t for each row update s set x=x+1 where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after update on t for each row update s set x=x+1 where id=1000000; +begin; +insert into t select * from s on duplicate key update x=t.x+1; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +drop table s,t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_replace_select.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_replace_select.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_replace_select.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_replace_select.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,367 @@ +set default_storage_engine='tokudb'; +drop table if exists t,t1,t2; +CREATE TABLE `t` ( +`num` int(10) unsigned auto_increment NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +); +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +8388608 +CREATE TABLE `t1` ( +`num` int(10) unsigned NOT NULL, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) as select * from t; +CREATE TABLE `t2` ( +`count` bigint(20) NOT NULL +) ENGINE=TokuDB DEFAULT CHARSET=latin1; +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +REPLACE into t2 SELECT count(*) from t1; +REPLACE into t2 SELECT count(*) from t1; +REPLACE into t2 SELECT count(*) from t1; +REPLACE into t2 SELECT count(*) from t1; +REPLACE into t2 SELECT count(*) from t1; +REPLACE into t2 SELECT count(*) from t1; +REPLACE into t2 SELECT count(*) from t1; +REPLACE into t2 SELECT count(*) from t1; +REPLACE into t2 SELECT count(*) from t1; +REPLACE into t2 SELECT count(*) from t1; +1 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +SELECT count(*) from t1; +count(*) +8388608 +INSERT IGNORE into t2 SELECT count(*) from t1; +INSERT IGNORE into t2 SELECT count(*) from t1; +INSERT IGNORE into t2 SELECT count(*) from t1; +INSERT IGNORE into t2 SELECT count(*) from t1; +INSERT IGNORE into t2 SELECT count(*) from t1; +INSERT IGNORE into t2 SELECT count(*) from t1; +INSERT IGNORE into t2 SELECT count(*) from t1; +INSERT IGNORE into t2 SELECT count(*) from t1; +INSERT IGNORE into t2 SELECT count(*) from t1; +INSERT IGNORE into t2 SELECT count(*) from t1; +1 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +REPLACE into t2 SELECT count(*) from t1 where num > 7000000; +1 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +SELECT count(*) from t1 where num > 7000000; +count(*) +1847274 +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; +1 +drop table t,t1,t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_replace_select_trigger.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_replace_select_trigger.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_replace_select_trigger.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_replace_select_trigger.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,121 @@ +set default_storage_engine='tokudb'; +drop table if exists s,t; +create table s (id bigint not null primary key, x bigint); +insert into s values (1,0),(2,0),(3,0),(4,0); +create table t like s; +begin; +replace into t select * from s; +rollback; +create trigger t_trigger before insert on t for each row replace into s values (1000000,0); +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row replace into s values (1000000,0); +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before insert on t for each row delete from s where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row delete from s where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +truncate table t; +insert into t values (1,1); +create trigger t_trigger before insert on t for each row replace into s values (1000000,0); +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row replace into s values (1000000,0); +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before insert on t for each row delete from s where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row delete from s where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +truncate table t; +insert into t values (1,1); +create trigger t_trigger before delete on t for each row replace into s values (1000000,0); +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after delete on t for each row replace into s values (1000000,0); +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before delete on t for each row delete from s where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after delete on t for each row delete from s where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger before delete on t for each row update s set x=x+1 where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +create trigger t_trigger after delete on t for each row update s set x=x+1 where id=1000000; +begin; +replace into t select * from s; +ERROR HY000: Can't update table 's' in stored function/trigger because it is already used by statement which invoked this stored function/trigger. +rollback; +drop trigger t_trigger; +drop table s,t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_select_hash_part.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_select_hash_part.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_select_hash_part.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_select_hash_part.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,278 @@ +set default_storage_engine='tokudb'; +drop table if exists t; +CREATE TABLE `t` ( +`num` int(10) unsigned NOT NULL auto_increment, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) PARTITION BY HASH (num) PARTITIONS 8; +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +1048576 +set tokudb_bulk_fetch=ON; +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +set tokudb_bulk_fetch=OFF; +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +1 +set tokudb_bulk_fetch=ON; +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +set tokudb_bulk_fetch=OFF; +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +1 +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_select_range_part.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_select_range_part.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/bf_select_range_part.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/bf_select_range_part.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,286 @@ +set default_storage_engine='tokudb'; +drop table if exists t; +CREATE TABLE `t` ( +`num` int(10) unsigned NOT NULL auto_increment, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) PARTITION BY RANGE (num) +(PARTITION p0 VALUES LESS THAN (100000), +PARTITION p1 VALUES LESS THAN (200000), +PARTITION p2 VALUES LESS THAN (300000), +PARTITION p3 VALUES LESS THAN (400000), +PARTITION p4 VALUES LESS THAN (500000), +PARTITION p5 VALUES LESS THAN (600000), +PARTITION p6 VALUES LESS THAN (700000), +PARTITION p7 VALUES LESS THAN MAXVALUE); +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +1048576 +set tokudb_bulk_fetch=ON; +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +set tokudb_bulk_fetch=OFF; +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +1 +set tokudb_bulk_fetch=ON; +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +set tokudb_bulk_fetch=OFF; +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +1 +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,28 @@ +set default_storage_engine='tokudb'; +drop table if exists t; +create table t ( +x int not null, +y int not null, +primary key(x)) +partition by hash(x) partitions 2; +show create table t; +Table Create Table +t CREATE TABLE `t` ( + `x` int(11) NOT NULL, + `y` int(11) NOT NULL, + PRIMARY KEY (`x`) +) ENGINE=TokuDB DEFAULT CHARSET=latin1 +/*!50100 PARTITION BY HASH (x) +PARTITIONS 2 */ +alter table t add clustering key(y); +show create table t; +Table Create Table +t CREATE TABLE `t` ( + `x` int(11) NOT NULL, + `y` int(11) NOT NULL, + PRIMARY KEY (`x`), + CLUSTERING KEY `y` (`y`) +) ENGINE=TokuDB DEFAULT CHARSET=latin1 +/*!50100 PARTITION BY HASH (x) +PARTITIONS 2 */ +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,107 +0,0 @@ -drop table if exists t; -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; -@@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on -create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=innodb; -insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4); -explain select x,id from t force index (x) where x=0 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=0 and id=0; -x id -0 0 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select y,id from t force index (x) where x=0 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 -flush status; -select y,id from t force index (x) where x=0 and id=0; -y id -0 0 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=0 and id=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=0 and id=1; -x id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select y,id from t force index (x)where x=0 and id=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 -flush status; -select y,id from t force index(x) where x=0 and id=1; -y id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=2 and id=3; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=2 and id=3; -x id -2 3 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=2 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=2 and id=0; -x id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,107 +0,0 @@ -drop table if exists t; -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; -@@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on -create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=tokudb; -insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4); -explain select x,id from t force index (x) where x=0 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=0 and id=0; -x id -0 0 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select y,id from t force index (x) where x=0 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 -flush status; -select y,id from t force index (x) where x=0 and id=0; -y id -0 0 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=0 and id=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=0 and id=1; -x id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select y,id from t force index (x)where x=0 and id=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 -flush status; -select y,id from t force index(x) where x=0 and id=1; -y id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=2 and id=3; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=2 and id=3; -x id -2 3 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=2 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=2 and id=0; -x id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -drop table if exists t; -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; -@@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on -create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=innodb; -insert into t values (0,0,0,0),(0,1,0,1); -explain select c,a,b from t where c=0 and a=0 and b=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const PRIMARY,c PRIMARY 8 const,const 1 -flush status; -select c,a,b from t where c=0 and a=0 and b=1; -c a b -0 0 1 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select c,a,b from t force index (c) where c=0 and a=0 and b=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const c c 12 const,const,const 1 Using index -flush status; -select c,a,b from t force index (c) where c=0 and a=0 and b=1; -c a b -0 0 1 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -drop table if exists t; -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; -@@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on -create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=tokudb; -insert into t values (0,0,0,0),(0,1,0,1); -explain select c,a,b from t where c=0 and a=0 and b=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const PRIMARY,c PRIMARY 8 const,const 1 -flush status; -select c,a,b from t where c=0 and a=0 and b=1; -c a b -0 0 1 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select c,a,b from t force index (c) where c=0 and a=0 and b=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const c c 12 const,const,const 1 Using index -flush status; -select c,a,b from t force index (c) where c=0 and a=0 and b=1; -c a b -0 0 1 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/information-schema-global-status.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/information-schema-global-status.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/information-schema-global-status.result 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/information-schema-global-status.result 2014-10-08 13:19:52.000000000 +0000 @@ -45,6 +45,7 @@ TOKUDB_CACHETABLE_MISS_TIME TOKUDB_CACHETABLE_PREFETCHES TOKUDB_CACHETABLE_SIZE_CACHEPRESSURE +TOKUDB_CACHETABLE_SIZE_CLONED TOKUDB_CACHETABLE_SIZE_CURRENT TOKUDB_CACHETABLE_SIZE_LEAF TOKUDB_CACHETABLE_SIZE_LIMIT diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks_released.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks_released.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks_released.result 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks_released.result 2014-10-08 13:19:52.000000000 +0000 @@ -4,22 +4,21 @@ create table t (id int primary key); set autocommit=0; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name set autocommit=0; set tokudb_prelock_empty=OFF; insert into t values (1); set autocommit=0; insert into t values (1); select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main ERROR 23000: Duplicate entry '1' for key 'PRIMARY' commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result 2014-10-08 13:19:52.000000000 +0000 @@ -4,7 +4,7 @@ create table t (id int primary key); set autocommit=0; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name insert into t values (1); insert into t values (3); insert into t values (5); @@ -13,16 +13,16 @@ insert into t values (4); insert into t values (6); select * from information_schema.tokudb_locks order by locks_trx_id,locks_key_left; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 -TRX_ID MYSQL_ID ./test/t-main 0003000000 0003000000 -TRX_ID MYSQL_ID ./test/t-main 0005000000 0005000000 -TRX_ID MYSQL_ID ./test/t-main 0002000000 0002000000 -TRX_ID MYSQL_ID ./test/t-main 0004000000 0004000000 -TRX_ID MYSQL_ID ./test/t-main 0006000000 0006000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main +TRX_ID MYSQL_ID ./test/t-main 0003000000 0003000000 test t main +TRX_ID MYSQL_ID ./test/t-main 0005000000 0005000000 test t main +TRX_ID MYSQL_ID ./test/t-main 0002000000 0002000000 test t main +TRX_ID MYSQL_ID ./test/t-main 0004000000 0004000000 test t main +TRX_ID MYSQL_ID ./test/t-main 0006000000 0006000000 test t main commit; commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name commit; drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result 2014-10-08 13:19:52.000000000 +0000 @@ -5,65 +5,64 @@ select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name set autocommit=0; set tokudb_prelock_empty=OFF; insert into t values (1); set autocommit=0; insert into t values (1); select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time -REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name +REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name ERROR 23000: Duplicate entry '1' for key 'PRIMARY' commit; select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name set autocommit=0; set tokudb_prelock_empty=OFF; replace into t values (1); set autocommit=0; replace into t values (1); select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time -REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name +REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name commit; select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result 2014-10-08 13:19:52.000000000 +0000 @@ -5,35 +5,35 @@ select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name set autocommit=0; set tokudb_prelock_empty=OFF; insert into t values (1); set autocommit=0; insert into t values (1); select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time -REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name +REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name ERROR HY000: Lock wait timeout exceeded; try restarting transaction commit; select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,126 @@ +'#--------------------begin------------------------#' +SET @session_start_value = @@session.tokudb_support_xa; +SELECT @session_start_value; +@session_start_value +1 +SET @global_start_value = @@global.tokudb_support_xa; +SELECT @global_start_value; +@global_start_value +1 +SET @@session.tokudb_support_xa = 0; +SET @@session.tokudb_support_xa = DEFAULT; +SELECT @@session.tokudb_support_xa; +@@session.tokudb_support_xa +1 +SET @@global.tokudb_support_xa = 0; +SET @@global.tokudb_support_xa = DEFAULT; +SELECT @@global.tokudb_support_xa; +@@global.tokudb_support_xa +1 +'#--------------------case#1 valid set support_xa------------------------#' +SET @@session.tokudb_support_xa = 0; +SELECT @@session.tokudb_support_xa; +@@session.tokudb_support_xa +0 +SET @@session.tokudb_support_xa = 1; +SELECT @@session.tokudb_support_xa; +@@session.tokudb_support_xa +1 +SET @@global.tokudb_support_xa = 0; +SELECT @@global.tokudb_support_xa; +@@global.tokudb_support_xa +0 +SET @@global.tokudb_support_xa = 1; +SELECT @@global.tokudb_support_xa; +@@global.tokudb_support_xa +1 +'#--------------------case#2 invalid set support_xa------------------------#' +SET @@session.tokudb_support_xa = -0.6; +ERROR 42000: Incorrect argument type to variable 'tokudb_support_xa' +SET @@session.tokudb_support_xa = 1.6; +ERROR 42000: Incorrect argument type to variable 'tokudb_support_xa' +SET @@session.tokudb_support_xa = "T"; +ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'T' +SET @@session.tokudb_support_xa = "Y"; +ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'Y' +SET @@session.tokudb_support_xa = OF; +SELECT @@session.tokudb_support_xa; +@@session.tokudb_support_xa +0 +SET @@global.tokudb_support_xa = 2; +ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of '2' +SET @@global.tokudb_support_xa = "T"; +ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'T' +SET @@global.tokudb_support_xa = "Y"; +ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'Y' +'#--------------------case#3 xa.test port from tokudb_mariadb/xa.test ------------------------#' +'#--------------------xa.test with tokudb_support_xa OFF ------------------------#' +SET @@global.tokudb_support_xa = OFF; +SELECT @@global.tokudb_support_xa; +@@global.tokudb_support_xa +0 +create table t1 (a int) engine=tokudb; +xa start 'test1'; +insert t1 values (10); +xa end 'test1'; +xa prepare 'test1'; +xa rollback 'test1'; +select * from t1; +a +xa start 'test2'; +xa start 'test-bad'; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the ACTIVE state +insert t1 values (20); +xa prepare 'test2'; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the ACTIVE state +xa end 'test2'; +xa prepare 'test2'; +xa commit 'test2'; +select * from t1; +a +20 +xa start 'testa','testb'; +insert t1 values (30); +commit; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the ACTIVE state +xa end 'testa','testb'; +begin; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the IDLE state +create table t2 (a int); +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the IDLE state +xa start 'testa','testb'; +ERROR XAE08: XAER_DUPID: The XID already exists +xa start 'testa','testb', 123; +ERROR XAE08: XAER_DUPID: The XID already exists +xa start 0x7465737462, 0x2030405060, 0xb; +insert t1 values (40); +xa end 'testb',' 0@P`',11; +xa prepare 'testb',0x2030405060,11; +start transaction; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the PREPARED state +xa recover; +formatID gtrid_length bqual_length data +11 5 5 testb 0@P` +xa prepare 'testa','testb'; +xa recover; +formatID gtrid_length bqual_length data +11 5 5 testb 0@P` +1 5 5 testatestb +xa commit 'testb',0x2030405060,11; +ERROR XAE04: XAER_NOTA: Unknown XID +xa rollback 'testa','testb'; +xa start 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your XYZ server version for the right syntax to use near '' at line 1 +select * from t1; +a +20 +drop table t1; +'#--------------------end------------------------#' +SET @@session.tokudb_support_xa = @session_start_value; +SELECT @@session.tokudb_support_xa; +@@session.tokudb_support_xa +1 +SET @@global.tokudb_support_xa = @global_start_value; +SELECT @@global.tokudb_support_xa; +@@global.tokudb_support_xa +1 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/suite.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/suite.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/suite.opt 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/suite.opt 2014-10-08 13:19:52.000000000 +0000 @@ -1 +1 @@ ---tokudb --plugin-load=$HA_TOKUDB_SO +--tokudb --plugin-load=$HA_TOKUDB_SO --loose-tokudb-check-jemalloc=0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_create_select_hash_part.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_create_select_hash_part.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_create_select_hash_part.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_create_select_hash_part.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,143 @@ +# Verify that index and range scans are not slow +# on tables during create select statements +# due to tokudb bulk fetch not being used + +source include/have_tokudb.inc; +source include/have_partition.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t,t1,t2,t3; +enable_warnings; + +let $maxq = 10; + +CREATE TABLE `t` ( + `num` int(10) unsigned auto_increment NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +); + +# put 8M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +# Create base table (control table) from source table t +CREATE TABLE `t1` ( + `num` int(10) unsigned NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) as select * from t; + +# Create source hash partitioned table from source table t +CREATE TABLE `t2` ( + `num` int(10) unsigned NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) PARTITION BY HASH (num) +PARTITIONS 8 as select * from t; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + CREATE TABLE `t3` (`x` bigint); + SELECT count(*) from t1; + DROP TABLE t3; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + CREATE TABLE t3 AS SELECT count(*) from t2; + DROP TABLE t3; + inc $i; +} + +let $time_elapsed_create_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_create_select seconds.; + +# This check evaluates whether the time elapsed during the create select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +# Additionally, it is important to note that 1.5 is the multiplier applied to the time_elapsed_select +# value because it appears that MySQL 5.5.39 uses a sorted index scan during the create select statement +# while Percona Server 5.6 uses an unsorted index scan. +# The issue has been resolved in MySQL 5.6 but still persists in Maria 10.0.12 +# in the defect found at https://mariadb.atlassian.net/browse/MDEV-6547. +let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= 1.5 * $time_elapsed_select`; +echo $verdict; +if (!$verdict) { echo index scan t2 $time_elapsed_create_select $time_elapsed_select; } + +let $maxrq = 30; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + CREATE TABLE `t3` (`x` bigint); + SELECT count(*) from t1 where num > 7000000; + DROP TABLE t3; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + CREATE TABLE t3 AS SELECT count(*) from t2 where num > 7000000; + DROP TABLE t3; + inc $i; +} + +let $time_elapsed_create_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_create_select seconds.; + +# This check evaluates whether the time elapsed during the create select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +# Additionally, it is important to note that 1.5 is the multiplier applied to the time_elapsed_select +# value because it appears that MySQL 5.5.39 uses a sorted index scan during the create select statement +# while Percona Server 5.6 uses an unsorted index scan. +# The issue has been resolved in MySQL 5.6 but still persists in Maria 10.0.12 +# in the defect found at https://mariadb.atlassian.net/browse/MDEV-6547. +let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= 1.5 * $time_elapsed_select`; +echo $verdict; +if (!$verdict) { echo range scan t2 $time_elapsed_create_select $time_elapsed_select; } + +drop table t,t1,t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_create_select_range_part.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_create_select_range_part.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_create_select_range_part.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_create_select_range_part.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,138 @@ +# Verify that index and range scans are not slow +# on tables during create select statements +# due to tokudb bulk fetch not being used + +source include/have_tokudb.inc; +source include/have_partition.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t,t1,t2; +enable_warnings; + +let $maxq = 10; + +CREATE TABLE `t` ( + `num` int(10) unsigned auto_increment NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +); + +# put 8M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +# Create base table (control table) from source table t +CREATE TABLE `t1` ( + `num` int(10) unsigned NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) as select * from t; + +# Create source range partitioned table from source table t +CREATE TABLE `t2` ( + `num` int(10) unsigned NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) PARTITION BY RANGE (num) +(PARTITION p0 VALUES LESS THAN (1000000), + PARTITION p1 VALUES LESS THAN (2000000), + PARTITION p2 VALUES LESS THAN (3000000), + PARTITION p3 VALUES LESS THAN (4000000), + PARTITION p4 VALUES LESS THAN (5000000), + PARTITION p5 VALUES LESS THAN (6000000), + PARTITION p6 VALUES LESS THAN (7000000), + PARTITION p7 VALUES LESS THAN MAXVALUE) as select * from t; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + CREATE TABLE `t3` (`x` bigint); + SELECT count(*) from t1; + DROP TABLE t3; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + CREATE TABLE t4 AS SELECT count(*) from t2; + DROP TABLE t4; + inc $i; +} + +let $time_elapsed_create_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_create_select seconds.; + +# This check evaluates whether the time elapsed during the create select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +let $maxrq = 30; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + CREATE TABLE `t3` (`x` bigint); + SELECT count(*) from t1 where num > 7000000; + DROP TABLE t3; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + CREATE TABLE t4 AS SELECT count(*) from t2 where num > 7000000; + DROP TABLE t4; + inc $i; +} + +let $time_elapsed_create_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_create_select seconds.; + +# This check evaluates whether the time elapsed during the create select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +drop table t,t1,t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_create_select.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_create_select.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_create_select.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_create_select.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,118 @@ +# Verify that index and range scans are not slow +# on tables during create select statements +# due to tokudb bulk fetch not being used + +source include/have_tokudb.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t,t1,t2; +enable_warnings; + +let $maxq = 10; + +CREATE TABLE `t` ( + `num` int(10) unsigned auto_increment NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +); + +# put 8M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +# Create first table from source table t +CREATE TABLE `t1` ( + `num` int(10) unsigned NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) as select * from t; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t1; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + CREATE TABLE t2 AS SELECT count(*) from t1; + DROP TABLE t2; + inc $i; +} + +let $time_elapsed_create_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_create_select seconds.; + +# This check evaluates whether the time elapsed during the create select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +let $maxrq = 30; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + SELECT count(*) from t1 where num > 7000000; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + CREATE TABLE t2 AS SELECT count(*) from t1 where num > 7000000; + DROP TABLE t2; + inc $i; +} + +let $time_elapsed_create_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_create_select seconds.; + +# This check evaluates whether the time elapsed during the create select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +drop table t,t1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_create_temp_select.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_create_temp_select.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_create_temp_select.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_create_temp_select.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,118 @@ +# Verify that index and range scans are not slow +# on temporary tables during create select statements +# due to tokudb bulk fetch not being used + +source include/have_tokudb.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t,t1,t2; +enable_warnings; + +let $maxq = 10; + +CREATE TABLE `t` ( + `num` int(10) unsigned auto_increment NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +); + +# put 8M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +# Create first table from source table t +CREATE TABLE `t1` ( + `num` int(10) unsigned NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) as select * from t; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t1; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1; + DROP TEMPORARY TABLE t2; + inc $i; +} + +let $time_elapsed_create_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_create_select seconds.; + +# This check evaluates whether the time elapsed during the create select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +let $maxrq = 30; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + SELECT count(*) from t1 where num > 7000000; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Range scans took $time_elapsed_select seconds.; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + CREATE TEMPORARY TABLE t2 AS SELECT count(*) from t1 where num > 7000000; + DROP TEMPORARY TABLE t2; + inc $i; +} + +let $time_elapsed_create_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Range scans took $time_elapsed_create_select seconds.; + +# This check evaluates whether the time elapsed during the create select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_create_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +drop table t,t1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_delete.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_delete.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_delete.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_delete.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,68 @@ +# Verify that index scans for delete statements use bulk fetch and are +# at least twice as fast + +source include/have_tokudb.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t; +enable_warnings; + +CREATE TABLE `t` (id bigint not null auto_increment primary key, val bigint not null default 0); + +# put 8M rows into t +INSERT INTO t (id) values (null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +# run $maxq measurements +let $maxq = 10; + +# measure the time to do $maxq deletes from t that affect no rows with bulk fetch ON +set tokudb_bulk_fetch = ON; +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + delete from t where val > 0; + inc $i; +} +let $time_elapsed_bf_on = `select to_seconds(now()) - $s`; + +# measure the time to do $maxq deletes from t that affect no rows with bulk fetch OFF +set tokudb_bulk_fetch = OFF; +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + delete from t where val > 0; + inc $i; +} +let $time_elapsed_bf_off = `select to_seconds(now()) - $s`; + +# verify that a delete scan with bulk fetch ON is at least 2 times faster than with bulk fetch OFF +let $verdict = `select $time_elapsed_bf_off > $time_elapsed_bf_on && ($time_elapsed_bf_off - $time_elapsed_bf_on) / $time_elapsed_bf_on >= 2`; +echo $verdict; +if (!$verdict) { echo $time_elapsed_bf_on $time_elapsed_bf_off; } + +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_delete_trigger.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_delete_trigger.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_delete_trigger.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_delete_trigger.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,70 @@ +# verify that delete triggers can not insert, delete, or update rows in the target table + +source include/have_tokudb.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t; +enable_warnings; + +create table t (id bigint not null primary key, x bigint not null); +insert into t values (1,0),(2,0),(3,0),(4,0); + +# verify that a before delete trigger can not insert into the target table +create trigger t_delete before delete on t for each row insert into t values (1000000,0); +begin; +error 1442; +delete from t where x=0; +rollback; +drop trigger t_delete; + +# verify that an after delete trigger can not insert into the target table +create trigger t_delete after delete on t for each row insert into t values (1000000,0); +begin; +error 1442; +delete from t where x=0; +rollback; +drop trigger t_delete; + +# verify that a before delete trigger can not delete from the target table +create trigger t_delete before delete on t for each row delete from t where id=1000000; +begin; +error 1442; +delete from t where x=0; +rollback; +drop trigger t_delete; + +# verify that an after delete trigger can not delete from the target table +create trigger t_delete after delete on t for each row delete from t where id=1000000; +begin; +error 1442; +delete from t where x=0; +rollback; +drop trigger t_delete; + +# verify that a before delete trigger can not update the target table +create trigger t_delete before delete on t for each row update t set x=x+1 where id=1000000; +begin; +error 1442; +delete from t where x=0; +rollback; +drop trigger t_delete; + +# verify that an after delete trigger can not update the target table +create trigger t_delete after delete on t for each row update t set x=x+1 where id=10000000; +begin; +error 1442; +delete from t where x=0; +rollback; +drop trigger t_delete; + +# can execute select on the target table in a delete trigger. it better use a different handler. +create table count (count bigint not null); +create trigger t_delete before delete on t for each row insert into count select count(*) from t; +begin; +delete from t where x=0; +select * from count; +rollback; +drop trigger t_delete; +drop table count; + +drop table t; \ No newline at end of file diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_dup_key.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_dup_key.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_dup_key.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_dup_key.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,127 @@ +# Verify that index and range scans are not slow +# on tables during insert select on duplicate key statements +# due to tokudb bulk fetch not being used. +# In this test case, the on duplicate key condition does not need to fire +# since the performance of the embedded select statement is all we are measuring. + +source include/have_tokudb.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t,t1,t2; +enable_warnings; + +let $maxq = 10; + +CREATE TABLE `t` ( + `num` int(10) unsigned auto_increment NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +); + +# put 8M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +# Create first table from source table t +CREATE TABLE `t1` ( + `num` int(10) unsigned NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) as select * from t; + +# Create second table t2 that will serve as the target for the insert select statment +CREATE TABLE `t2` ( + `num` int(10) unsigned auto_increment NOT NULL, + `count` bigint(20) NOT NULL, + UNIQUE (num) + ) ENGINE=TokuDB DEFAULT CHARSET=latin1; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t1; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + INSERT into t2 (num,count) SELECT NULL,count(*) from t1 on DUPLICATE KEY UPDATE count=count+1; + inc $i; +} +let $time_elapsed_insert_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_insert_select seconds.; + +# This check evaluates whether the time elapsed during the insert select on duplicate key statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_insert_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +let $maxrq = 30; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + SELECT count(*) from t1 where num > 7000000; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Range scans took $time_elapsed_select seconds.; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + INSERT into t2 (num,count) SELECT NULL,count(*) from t1 where num > 7000000 on DUPLICATE KEY UPDATE count=count+1; + inc $i; +} +let $time_elapsed_insert_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Range scans took $time_elapsed_insert_select seconds.; + +# This check evaluates whether the time elapsed during the insert select on duplicate key statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_insert_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +enable_warnings; +drop table t,t1,t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_insert_select.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_insert_select.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_insert_select.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_insert_select.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,122 @@ +# Verify that index and range scans are not slow +# on tables during insert select statements +# due to tokudb bulk fetch not being used + +source include/have_tokudb.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t,t1,t2; +enable_warnings; + +let $maxq = 10; + +CREATE TABLE `t` ( + `num` int(10) unsigned auto_increment NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +); + +# put 8M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +# Create first table from source table t +CREATE TABLE `t1` ( + `num` int(10) unsigned NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) as select * from t; + +# Create second table t2 that will serve as the target for the insert select statment +CREATE TABLE `t2` ( + `count` bigint(20) NOT NULL + ) ENGINE=TokuDB DEFAULT CHARSET=latin1; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t1; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + INSERT into t2 SELECT count(*) from t1; + inc $i; +} +let $time_elapsed_insert_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_insert_select seconds.; + +# This check evaluates whether the time elapsed during the insert select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_insert_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +let $maxrq = 30; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + SELECT count(*) from t1 where num > 7000000; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# This check evaluates whether the time elapsed during the insert select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +#echo Range scans took $time_elapsed_select seconds.; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + INSERT into t2 SELECT count(*) from t1 where num > 7000000; + inc $i; +} +let $time_elapsed_insert_select = `select to_seconds(now()) - $s`; + +# This check evaluates whether the time elapsed during the insert select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +#echo Range scans took $time_elapsed_insert_select seconds.; + +# This check evaluates whether the time elapsed during the insert select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_insert_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +drop table t,t1,t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_trigger.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_trigger.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_trigger.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_trigger.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,65 @@ +# verify that various insert triggers can not execute on the source table for an insert select statement + +source include/have_tokudb.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists s,t; +enable_warnings; + +create table s (id bigint not null primary key, x bigint); +insert into s values (1,0),(2,0),(3,0),(4,0); + +create table t like s; +begin; +insert into t select * from s; +rollback; + +# verify that before insert triggers can not insert into the source table +create trigger t_trigger before insert on t for each row insert into s values (1000000,0); +begin; +error 1442; +insert into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not insert into the source table +create trigger t_trigger after insert on t for each row insert into s values (1000000,0); +begin; +error 1442; +insert into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that before insert triggers can not delete from the source table +create trigger t_trigger before insert on t for each row delete from s where id=1000000; +begin; +error 1442; +insert into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not delete from the source table +create trigger t_trigger after insert on t for each row delete from s where id=1000000; +begin; +error 1442; +insert into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that before insert triggers can not update the source table +create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +insert into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not update the source table +create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +insert into t select * from s; +rollback; +drop trigger t_trigger; + +drop table s,t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_update_trigger.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_update_trigger.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_update_trigger.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_update_trigger.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,170 @@ +# verify that various insert triggers can not execute on the source table for an insert select statement + +source include/have_tokudb.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists s,t; +enable_warnings; + +create table s (id bigint not null primary key, x bigint); +insert into s values (1,0),(2,0),(3,0),(4,0); + +create table t like s; +begin; +insert into t select * from s; +rollback; +# insert into t values (1,0); + +# verify that before insert triggers can not insert into the source table +create trigger t_trigger before insert on t for each row insert into s values (1000000,0); +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not insert into the source table +create trigger t_trigger after insert on t for each row insert into s values (1000000,0); +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that before insert triggers can not delete from the source table +create trigger t_trigger before insert on t for each row delete from s where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not delete from the source table +create trigger t_trigger after insert on t for each row delete from s where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that before insert triggers can not update the source table +create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not update the source table +create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# force duplicate keys +truncate table t; +insert into t values (1,0); + +# verify that before insert triggers can not insert into the source table +create trigger t_trigger before insert on t for each row insert into s values (1000000,0); +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not insert into the source table +create trigger t_trigger after insert on t for each row insert into s values (1000000,0); +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that before insert triggers can not delete from the source table +create trigger t_trigger before insert on t for each row delete from s where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not delete from the source table +create trigger t_trigger after insert on t for each row delete from s where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that before insert triggers can not update the source table +create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not update the source table +create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# force duplicate keys +truncate table t; +insert into t values (1,0); + +# verify that before insert triggers can not insert into the source table +create trigger t_trigger before update on t for each row insert into s values (1000000,0); +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not insert into the source table +create trigger t_trigger after update on t for each row insert into s values (1000000,0); +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that before update triggers can not delete from the source table +create trigger t_trigger before update on t for each row delete from s where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not delete from the source table +create trigger t_trigger after update on t for each row delete from s where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that before update triggers can not update the source table +create trigger t_trigger before update on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not update the source table +create trigger t_trigger after update on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +insert into t select * from s on duplicate key update x=t.x+1; +rollback; +drop trigger t_trigger; + +drop table s,t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_replace_select.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_replace_select.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_replace_select.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_replace_select.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,190 @@ +# Verify that index and range scans are not slow +# on tables during replace select and insert ignore statements +# due to tokudb bulk fetch not being used + +source include/have_tokudb.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t,t1,t2; +enable_warnings; + +let $maxq = 10; + +CREATE TABLE `t` ( + `num` int(10) unsigned auto_increment NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +); + +# put 8M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +# Create first table from source table t +CREATE TABLE `t1` ( + `num` int(10) unsigned NOT NULL, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) as select * from t; + +# Create second table t2 that will serve as the target for the replace select statment +CREATE TABLE `t2` ( + `count` bigint(20) NOT NULL + ) ENGINE=TokuDB DEFAULT CHARSET=latin1; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t1; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + REPLACE into t2 SELECT count(*) from t1; + inc $i; +} +let $time_elapsed_replace_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_replace_select seconds.; + +# This check evaluates whether the time elapsed during the replace select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_replace_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +############################################################## + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t1; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_select seconds.; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxq) { + INSERT IGNORE into t2 SELECT count(*) from t1; + inc $i; +} +let $time_elapsed_insert_ignore_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Index scans took $time_elapsed_insert_ignore_select seconds.; + +# This check evaluates whether the time elapsed during the insert ignore select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_insert_ignore_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +################################################################## + +let $maxrq = 30; + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + SELECT count(*) from t1 where num > 7000000; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Range scans took $time_elapsed_select seconds.; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + REPLACE into t2 SELECT count(*) from t1 where num > 7000000; + inc $i; +} +let $time_elapsed_replace_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Range scans took $time_elapsed_replace_select seconds.; + +# This check evaluates whether the time elapsed during the replace select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_replace_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +#################################################################### + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + SELECT count(*) from t1 where num > 7000000; + inc $i; +} +let $time_elapsed_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Range scans took $time_elapsed_select seconds.; + + +let $s = `select to_seconds(now())`; +let $i = 0; +while ($i < $maxrq) { + INSERT IGNORE into t2 SELECT count(*) from t1 where num > 7000000; + inc $i; +} +let $time_elapsed_insert_ignore_select = `select to_seconds(now()) - $s`; + +# The following line can be used to display the time elapsed data +# which could be useful for debugging. +#echo Range scans took $time_elapsed_insert_ignore_select seconds.; + +# This check evaluates whether the time elapsed during the insert ignore select statement is on par +# with the select statement, which will confirm that bulk fetch is in fact being used. +let $verdict = `select abs($time_elapsed_insert_ignore_select - $time_elapsed_select) <= $time_elapsed_select`; +echo $verdict; + +######################################################################### + +drop table t,t1,t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_replace_select_trigger.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_replace_select_trigger.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_replace_select_trigger.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_replace_select_trigger.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,169 @@ +# verify that various insert and update triggers can not execute on the source table +# for a replace select statement + +source include/have_tokudb.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists s,t; +enable_warnings; + +create table s (id bigint not null primary key, x bigint); +insert into s values (1,0),(2,0),(3,0),(4,0); + +create table t like s; +begin; +replace into t select * from s; +rollback; + +# verify that before insert triggers can not replace into the source table +create trigger t_trigger before insert on t for each row replace into s values (1000000,0); +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not replace into the source table +create trigger t_trigger after insert on t for each row replace into s values (1000000,0); +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that before insert triggers can not delete from the source table +create trigger t_trigger before insert on t for each row delete from s where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not delete from the source table +create trigger t_trigger after insert on t for each row delete from s where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that before insert triggers can not update the source table +create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not update the source table +create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +truncate table t; +insert into t values (1,1); + +# verify that before insert triggers can not replace into the source table +create trigger t_trigger before insert on t for each row replace into s values (1000000,0); +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not replace into the source table +create trigger t_trigger after insert on t for each row replace into s values (1000000,0); +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that before insert triggers can not delete from the source table +create trigger t_trigger before insert on t for each row delete from s where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not delete from the source table +create trigger t_trigger after insert on t for each row delete from s where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that before insert triggers can not update the source table +create trigger t_trigger before insert on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after insert triggers can not update the source table +create trigger t_trigger after insert on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +truncate table t; +insert into t values (1,1); + +# verify that before delete triggers can not replace into the source table +create trigger t_trigger before delete on t for each row replace into s values (1000000,0); +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after delete triggers can not replace into the source table +create trigger t_trigger after delete on t for each row replace into s values (1000000,0); +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that before delete triggers can not delete from the source table +create trigger t_trigger before delete on t for each row delete from s where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after delete triggers can not delete from the source table +create trigger t_trigger after delete on t for each row delete from s where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that before delete triggers can not update the source table +create trigger t_trigger before delete on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + +# verify that after delete triggers can not update the source table +create trigger t_trigger after delete on t for each row update s set x=x+1 where id=1000000; +begin; +error 1442; +replace into t select * from s; +rollback; +drop trigger t_trigger; + + +drop table s,t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_select_hash_part.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_select_hash_part.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_select_hash_part.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_select_hash_part.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,100 @@ +# Verify that index and range scans on a hash partitioned tokudb table are not slow on tables +# due to tokudb bulk fetch not being used. + +source include/have_tokudb.inc; +source include/have_partition.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t; +enable_warnings; + +let $maxq = 20; +let $debug = 0; + +# create the hash partition table +CREATE TABLE `t` ( + `num` int(10) unsigned NOT NULL auto_increment, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) PARTITION BY HASH (num) PARTITIONS 8; + +# put 1M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +set tokudb_bulk_fetch=ON; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t; + inc $i; +} +let $time_bf_on = `select unix_timestamp() - $s`; + +if ($debug) { echo index scans took $time_bf_on; } + +set tokudb_bulk_fetch=OFF; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t; + inc $i; +} +let $time_bf_off = `select unix_timestamp() - $s`; + +if ($debug) { echo index scans took $time_bf_off.; } + +# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on +let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`; +echo $verdict; +if (!$verdict) { echo index scan $time_bf_on $time_bf_off; } + +set tokudb_bulk_fetch=ON; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t where num > 500000; + inc $i; +} +let $time_bf_on = `select unix_timestamp() - $s`; + +if ($debug) { echo range scans took $time_bf_on; } + +set tokudb_bulk_fetch=OFF; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t where num > 500000; + inc $i; +} +let $time_bf_off = `select unix_timestamp() - $s`; + +if ($debug) { echo range scans took $time_bf_off.; } + +# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on +let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`; +echo $verdict; +if (!$verdict) { echo range scan $time_bf_on $time_bf_off; } + +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_select_range_part.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_select_range_part.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/bf_select_range_part.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/bf_select_range_part.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,108 @@ +# Verify that index and range scans on a range partitioned tokudb table are not slow on tables +# due to tokudb bulk fetch not being used. + +source include/have_tokudb.inc; +source include/have_partition.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t; +enable_warnings; + +let $maxq = 20; +let $debug = 0; + +# create the range partition table +CREATE TABLE `t` ( + `num` int(10) unsigned NOT NULL auto_increment, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) PARTITION BY RANGE (num) +(PARTITION p0 VALUES LESS THAN (100000), + PARTITION p1 VALUES LESS THAN (200000), + PARTITION p2 VALUES LESS THAN (300000), + PARTITION p3 VALUES LESS THAN (400000), + PARTITION p4 VALUES LESS THAN (500000), + PARTITION p5 VALUES LESS THAN (600000), + PARTITION p6 VALUES LESS THAN (700000), + PARTITION p7 VALUES LESS THAN MAXVALUE); + +# put 1M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +set tokudb_bulk_fetch=ON; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t; + inc $i; +} +let $time_bf_on = `select unix_timestamp() - $s`; + +if ($debug) { echo index scans took $time_bf_on; } + +set tokudb_bulk_fetch=OFF; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t; + inc $i; +} +let $time_bf_off = `select unix_timestamp() - $s`; + +if ($debug) { echo index scans took $time_bf_off.; } + +# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on +let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`; +echo $verdict; +if (!$verdict) { echo index scan $time_bf_on $time_bf_off; } + +set tokudb_bulk_fetch=ON; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t where num > 700000; + inc $i; +} +let $time_bf_on = `select unix_timestamp() - $s`; + +if ($debug) { echo range scans took $time_bf_on; } + +set tokudb_bulk_fetch=OFF; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t where num > 700000; + inc $i; +} +let $time_bf_off = `select unix_timestamp() - $s`; + +if ($debug) { echo range scans took $time_bf_off.; } + +# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on +let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`; +echo $verdict; +if (!$verdict) { echo range scan $time_bf_on $time_bf_off; } + +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/cluster_key_part.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/cluster_key_part.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/cluster_key_part.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/cluster_key_part.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,23 @@ +# Test that clustering keys can be created on partitioned tokudb tables + +source include/have_tokudb.inc; +source include/have_partition.inc; +set default_storage_engine='tokudb'; + +disable_warnings; +drop table if exists t; +enable_warnings; + +create table t ( + x int not null, + y int not null, + primary key(x)) +partition by hash(x) partitions 2; + +show create table t; + +alter table t add key(y) clustering=yes; + +show create table t; + +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,44 +0,0 @@ -source include/have_innodb.inc; - -disable_warnings; -drop table if exists t; -enable_warnings; - -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; - -create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=innodb; - -insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4); - -explain select x,id from t force index (x) where x=0 and id=0; -flush status; -select x,id from t force index (x) where x=0 and id=0; -show status like 'handler_read%'; - -explain select y,id from t force index (x) where x=0 and id=0; -flush status; -select y,id from t force index (x) where x=0 and id=0; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=0 and id=1; -flush status; -select x,id from t force index (x) where x=0 and id=1; -show status like 'handler_read%'; - -explain select y,id from t force index (x)where x=0 and id=1; -flush status; -select y,id from t force index(x) where x=0 and id=1; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=2 and id=3; -flush status; -select x,id from t force index (x) where x=2 and id=3; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=2 and id=0; -flush status; -select x,id from t force index (x) where x=2 and id=0; -show status like 'handler_read%'; - -drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,44 +0,0 @@ -#source include/have_tokudb.inc; - -disable_warnings; -drop table if exists t; -enable_warnings; - -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; - -create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=tokudb; - -insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4); - -explain select x,id from t force index (x) where x=0 and id=0; -flush status; -select x,id from t force index (x) where x=0 and id=0; -show status like 'handler_read%'; - -explain select y,id from t force index (x) where x=0 and id=0; -flush status; -select y,id from t force index (x) where x=0 and id=0; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=0 and id=1; -flush status; -select x,id from t force index (x) where x=0 and id=1; -show status like 'handler_read%'; - -explain select y,id from t force index (x)where x=0 and id=1; -flush status; -select y,id from t force index(x) where x=0 and id=1; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=2 and id=3; -flush status; -select x,id from t force index (x) where x=2 and id=3; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=2 and id=0; -flush status; -select x,id from t force index (x) where x=2 and id=0; -show status like 'handler_read%'; - -drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -source include/have_innodb.inc; - -disable_warnings; -drop table if exists t; -enable_warnings; - -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; - -create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=innodb; - -insert into t values (0,0,0,0),(0,1,0,1); - -explain select c,a,b from t where c=0 and a=0 and b=1; -flush status; -select c,a,b from t where c=0 and a=0 and b=1; -show status like 'handler_read%'; - -explain select c,a,b from t force index (c) where c=0 and a=0 and b=1; -flush status; -select c,a,b from t force index (c) where c=0 and a=0 and b=1; -show status like 'handler_read%'; - -drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -#source include/have_tokudb.inc; - -disable_warnings; -drop table if exists t; -enable_warnings; - -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; - -create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=tokudb; - -insert into t values (0,0,0,0),(0,1,0,1); - -explain select c,a,b from t where c=0 and a=0 and b=1; -flush status; -select c,a,b from t where c=0 and a=0 and b=1; -show status like 'handler_read%'; - -explain select c,a,b from t force index (c) where c=0 and a=0 and b=1; -flush status; -select c,a,b from t force index (c) where c=0 and a=0 and b=1; -show status like 'handler_read%'; - -drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/fast_update_binlog_mixed.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/fast_update_binlog_mixed.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/fast_update_binlog_mixed.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/fast_update_binlog_mixed.test 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,6 @@ -source include/master-slave.inc; source include/have_binlog_format_mixed.inc; source include/have_tokudb.inc; +source include/master-slave.inc; set default_storage_engine='tokudb'; create table tt (id int primary key, x int); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/fast_update_binlog_statement.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/fast_update_binlog_statement.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/fast_update_binlog_statement.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/fast_update_binlog_statement.test 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,6 @@ -source include/master-slave.inc; source include/have_binlog_format_statement.inc; source include/have_tokudb.inc; +source include/master-slave.inc; set default_storage_engine='tokudb'; create table tt (id int primary key, x int); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/hotindex-insert-bigchar.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/hotindex-insert-bigchar.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/hotindex-insert-bigchar.opt 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/hotindex-insert-bigchar.opt 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ ---loose-tokudb-max-lock-memory=320M diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_locks_released.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_locks_released.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_locks_released.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_locks_released.test 2014-10-08 13:19:51.000000000 +0000 @@ -1,7 +1,3 @@ -if (`select @@tokudb_version <= "7.1.6"`) -{ - --skip Race condition in the test in TokuDB 7.1.6 or earlier -} # verify that information_schema.tokudb_locks gets populated with locks for 2 clients source include/have_tokudb.inc; @@ -36,12 +32,14 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait race replace_column 1 TRX_ID 2 MYSQL_ID; +replace_result $datadir ./; eval select * from information_schema.tokudb_locks; connection conn_a; commit; # verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction replace_column 1 TRX_ID 2 MYSQL_ID; +replace_result $datadir ./; select * from information_schema.tokudb_locks; connection conn_b; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_locks.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_locks.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_locks.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_locks.test 2014-10-08 13:19:52.000000000 +0000 @@ -29,6 +29,7 @@ # should find 3 locks for 2 transactions connection default; replace_column 1 TRX_ID 2 MYSQL_ID; +replace_result $datadir ./; eval select * from information_schema.tokudb_locks order by locks_trx_id,locks_key_left; connection conn_a; @@ -42,4 +43,4 @@ disconnect conn_a; -drop table t; \ No newline at end of file +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test 2014-10-08 13:19:51.000000000 +0000 @@ -1,8 +1,4 @@ # verify that information_schema.tokudb_locks gets populated with locks, information_schema.tokudb_lock_waits gets -if (`select @@tokudb_version <= "7.1.6"`) -{ - --skip Race condition in the test in TokuDB 7.1.6 or earlier -} # populated with 1 lock_wait and all transactions are present in information_schema.tokudb_trx for 2 clients source include/have_tokudb.inc; @@ -37,10 +33,12 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait race replace_column 1 TRX_ID 2 MYSQL_ID; +replace_result $datadir ./; select * from information_schema.tokudb_locks; # should find the presence of a lock_wait on the 2nd transaction replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME; +replace_result $datadir ./; select * from information_schema.tokudb_lock_waits; # should find the presence of two transactions @@ -51,6 +49,7 @@ commit; # verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction replace_column 1 TRX_ID 2 MYSQL_ID; +replace_result $datadir ./; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; @@ -87,10 +86,12 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait race replace_column 1 TRX_ID 2 MYSQL_ID; +replace_result $datadir ./; select * from information_schema.tokudb_locks; # should find the presence of a lock_wait on the 2nd transaction replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME; +replace_result $datadir ./; select * from information_schema.tokudb_lock_waits; # should find the presence of two transactions @@ -101,6 +102,7 @@ commit; # verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction replace_column 1 TRX_ID 2 MYSQL_ID; +replace_result $datadir ./; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_timeout.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_timeout.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_timeout.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_timeout.test 2014-10-08 13:19:52.000000000 +0000 @@ -30,10 +30,12 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait race replace_column 1 TRX_ID 2 MYSQL_ID; +replace_result $datadir ./; select * from information_schema.tokudb_locks; # should find the presence of a lock_wait on the 2nd transaction replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME; +replace_result $datadir ./; select * from information_schema.tokudb_lock_waits; # should find the presence of two transactions diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/tokudb_support_xa.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/tokudb_support_xa.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb/t/tokudb_support_xa.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb/t/tokudb_support_xa.test 2014-10-08 13:19:52.000000000 +0000 @@ -0,0 +1,133 @@ +--source include/load_sysvars.inc +let $engine=TokuDB; + +--echo '#--------------------begin------------------------#' +SET @session_start_value = @@session.tokudb_support_xa; +SELECT @session_start_value; + +SET @global_start_value = @@global.tokudb_support_xa; +SELECT @global_start_value; + +SET @@session.tokudb_support_xa = 0; +SET @@session.tokudb_support_xa = DEFAULT; +SELECT @@session.tokudb_support_xa; +SET @@global.tokudb_support_xa = 0; +SET @@global.tokudb_support_xa = DEFAULT; +SELECT @@global.tokudb_support_xa; + +--echo '#--------------------case#1 valid set support_xa------------------------#' +# for session +SET @@session.tokudb_support_xa = 0; +SELECT @@session.tokudb_support_xa; +SET @@session.tokudb_support_xa = 1; +SELECT @@session.tokudb_support_xa; + +# for global +SET @@global.tokudb_support_xa = 0; +SELECT @@global.tokudb_support_xa; +SET @@global.tokudb_support_xa = 1; +SELECT @@global.tokudb_support_xa; + +--echo '#--------------------case#2 invalid set support_xa------------------------#' +# for session +--Error ER_WRONG_TYPE_FOR_VAR +SET @@session.tokudb_support_xa = -0.6; +--Error ER_WRONG_TYPE_FOR_VAR +SET @@session.tokudb_support_xa = 1.6; +--Error ER_WRONG_VALUE_FOR_VAR +SET @@session.tokudb_support_xa = "T"; +--Error ER_WRONG_VALUE_FOR_VAR +SET @@session.tokudb_support_xa = "Y"; +SET @@session.tokudb_support_xa = OF; +SELECT @@session.tokudb_support_xa; + +# for global +--Error ER_WRONG_VALUE_FOR_VAR +SET @@global.tokudb_support_xa = 2; +--Error ER_WRONG_VALUE_FOR_VAR +SET @@global.tokudb_support_xa = "T"; +--Error ER_WRONG_VALUE_FOR_VAR +SET @@global.tokudb_support_xa = "Y"; + + +--echo '#--------------------case#3 xa.test port from tokudb_mariadb/xa.test ------------------------#' +--echo '#--------------------xa.test with tokudb_support_xa OFF ------------------------#' +SET @@global.tokudb_support_xa = OFF; +SELECT @@global.tokudb_support_xa; +create table t1 (a int) engine=tokudb; +xa start 'test1'; +insert t1 values (10); +xa end 'test1'; +xa prepare 'test1'; +xa rollback 'test1'; +select * from t1; + +xa start 'test2'; +--error ER_XAER_RMFAIL +xa start 'test-bad'; +insert t1 values (20); +--error ER_XAER_RMFAIL +xa prepare 'test2'; +xa end 'test2'; +xa prepare 'test2'; +xa commit 'test2'; +select * from t1; + +xa start 'testa','testb'; +insert t1 values (30); + +--error ER_XAER_RMFAIL +commit; + +xa end 'testa','testb'; + +--error ER_XAER_RMFAIL +begin; +--error ER_XAER_RMFAIL +create table t2 (a int); + +connect (con1,localhost,root,,); +connection con1; + +--error ER_XAER_DUPID +xa start 'testa','testb'; +--error ER_XAER_DUPID +xa start 'testa','testb', 123; + +# gtrid [ , bqual [ , formatID ] ] +xa start 0x7465737462, 0x2030405060, 0xb; +insert t1 values (40); +xa end 'testb',' 0@P`',11; +xa prepare 'testb',0x2030405060,11; + +--error ER_XAER_RMFAIL +start transaction; + +xa recover; + +connection default; + +xa prepare 'testa','testb'; + +xa recover; + +--error ER_XAER_NOTA +xa commit 'testb',0x2030405060,11; +xa rollback 'testa','testb'; + +--replace_regex /MariaDB/XYZ/ /MySQL/XYZ/ +--error ER_PARSE_ERROR +xa start 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'; + +select * from t1; + +disconnect con1; +connection default; +drop table t1; + +--echo '#--------------------end------------------------#' +SET @@session.tokudb_support_xa = @session_start_value; +SELECT @@session.tokudb_support_xa; + +SET @@global.tokudb_support_xa = @global_start_value; +SELECT @@global.tokudb_support_xa; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_add_index/suite.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_add_index/suite.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_add_index/suite.opt 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_add_index/suite.opt 2014-10-08 13:19:52.000000000 +0000 @@ -1 +1 @@ ---tokudb --plugin-load=$HA_TOKUDB_SO +--tokudb --plugin-load=$HA_TOKUDB_SO --loose-tokudb-check-jemalloc=0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_alter_table/r/fractional_time_alter_table.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_alter_table/r/fractional_time_alter_table.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_alter_table/r/fractional_time_alter_table.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_alter_table/r/fractional_time_alter_table.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,174 +0,0 @@ -SET DEFAULT_STORAGE_ENGINE = 'tokudb'; -DROP TABLE IF EXISTS foo; -set tokudb_disable_slow_alter=on; -create table foo ( -a timestamp, -b timestamp(1), -c timestamp(2), -d timestamp(3), -e timestamp(4), -f timestamp(5), -g timestamp(6) -) engine=TokuDB; -alter table foo change a a timestamp(1); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a timestamp(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a timestamp(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a timestamp(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a timestamp(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a timestamp(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b timestamp(2); -show create table foo; -Table Create Table -foo CREATE TABLE `foo` ( - `a` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, - `b` timestamp(2) NOT NULL DEFAULT '0000-00-00 00:00:00.00', - `c` timestamp(2) NOT NULL DEFAULT '0000-00-00 00:00:00.00', - `d` timestamp(3) NOT NULL DEFAULT '0000-00-00 00:00:00.000', - `e` timestamp(4) NOT NULL DEFAULT '0000-00-00 00:00:00.0000', - `f` timestamp(5) NOT NULL DEFAULT '0000-00-00 00:00:00.00000', - `g` timestamp(6) NOT NULL DEFAULT '0000-00-00 00:00:00.000000' -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -alter table foo change b b timestamp(1); -alter table foo change b b timestamp(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b timestamp(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b timestamp(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b timestamp(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d timestamp(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d timestamp(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d timestamp(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change f f timestamp(6); -alter table foo change f f timestamp(5); -alter table foo change f f timestamp(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -drop table foo; -create table foo ( -a datetime, -b datetime(1), -c datetime(2), -d datetime(3), -e datetime(4), -f datetime(5), -g datetime(6) -) engine=TokuDB; -alter table foo change a a datetime(1); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a datetime(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a datetime(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a datetime(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a datetime(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a datetime(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b datetime(2); -show create table foo; -Table Create Table -foo CREATE TABLE `foo` ( - `a` datetime DEFAULT NULL, - `b` datetime(2) DEFAULT NULL, - `c` datetime(2) DEFAULT NULL, - `d` datetime(3) DEFAULT NULL, - `e` datetime(4) DEFAULT NULL, - `f` datetime(5) DEFAULT NULL, - `g` datetime(6) DEFAULT NULL -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -alter table foo change b b datetime(1); -alter table foo change b b datetime(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b datetime(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b datetime(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b datetime(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d datetime(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d datetime(5); -alter table foo change d d datetime(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change g g datetime(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -drop table foo; -create table foo ( -a time, -b time(1), -c time(2), -d time(3), -e time(4), -f time(5), -g time(6) -) engine=TokuDB; -alter table foo change a a time(1); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a time(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a time(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a time(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a time(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a time(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b time(2); -show create table foo; -Table Create Table -foo CREATE TABLE `foo` ( - `a` time DEFAULT NULL, - `b` time(2) DEFAULT NULL, - `c` time(2) DEFAULT NULL, - `d` time(3) DEFAULT NULL, - `e` time(4) DEFAULT NULL, - `f` time(5) DEFAULT NULL, - `g` time(6) DEFAULT NULL -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -alter table foo change b b time(1); -alter table foo change b b time(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b time(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b time(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b time(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d time(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d time(5); -alter table foo change d d time(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change g g time(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -drop table foo; -create table foo (a int, b int) engine=TokuDB; -insert into foo values (1,2); -alter table foo add column tt timestamp(3) NOT NULL Default '1981-11-01 10:11:13.123' after a; -select * from foo; -a tt b -1 1981-11-01 10:11:13.123 2 -alter table foo drop column tt; -alter table foo add column tt datetime(3) NOT NULL Default '1981-11-01 10:11:13.123' after a; -select * from foo; -a tt b -1 1981-11-01 10:11:13.123 2 -alter table foo drop column tt; -alter table foo add column tt time(3) NOT NULL Default '10:11:13.123' after a; -select * from foo; -a tt b -1 10:11:13.123 2 -alter table foo drop column tt; -drop table foo; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_alter_table/suite.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_alter_table/suite.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_alter_table/suite.opt 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_alter_table/suite.opt 2014-10-08 13:19:52.000000000 +0000 @@ -1 +1 @@ ---tokudb --plugin-load=$HA_TOKUDB_SO +--tokudb --plugin-load=$HA_TOKUDB_SO --loose-tokudb-check-jemalloc=0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_alter_table/t/fractional_time_alter_table.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_alter_table/t/fractional_time_alter_table.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_alter_table/t/fractional_time_alter_table.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_alter_table/t/fractional_time_alter_table.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,164 +0,0 @@ -SET DEFAULT_STORAGE_ENGINE = 'tokudb'; - ---disable_warnings -DROP TABLE IF EXISTS foo; ---enable_warnings -set tokudb_disable_slow_alter=on; - -create table foo ( - a timestamp, - b timestamp(1), - c timestamp(2), - d timestamp(3), - e timestamp(4), - f timestamp(5), - g timestamp(6) -) engine=TokuDB; - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(2); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(6); - -alter table foo change b b timestamp(2); -show create table foo; -alter table foo change b b timestamp(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b timestamp(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b timestamp(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b timestamp(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b timestamp(6); - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d timestamp(2); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d timestamp(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d timestamp(6); - -alter table foo change f f timestamp(6); -alter table foo change f f timestamp(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change f f timestamp(4); -drop table foo; - - -create table foo ( - a datetime, - b datetime(1), - c datetime(2), - d datetime(3), - e datetime(4), - f datetime(5), - g datetime(6) -) engine=TokuDB; - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(2); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(6); - -alter table foo change b b datetime(2); -show create table foo; -alter table foo change b b datetime(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b datetime(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b datetime(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b datetime(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b datetime(6); - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d datetime(2); -alter table foo change d d datetime(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d datetime(6); - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change g g datetime(5); -drop table foo; - - -create table foo ( - a time, - b time(1), - c time(2), - d time(3), - e time(4), - f time(5), - g time(6) -) engine=TokuDB; - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(2); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(6); - -alter table foo change b b time(2); -show create table foo; -alter table foo change b b time(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b time(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b time(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b time(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b time(6); - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d time(2); -alter table foo change d d time(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d time(6); - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change g g time(5); -drop table foo; - - -create table foo (a int, b int) engine=TokuDB; -insert into foo values (1,2); -alter table foo add column tt timestamp(3) NOT NULL Default '1981-11-01 10:11:13.123' after a; -select * from foo; -alter table foo drop column tt; - -alter table foo add column tt datetime(3) NOT NULL Default '1981-11-01 10:11:13.123' after a; -select * from foo; -alter table foo drop column tt; - - -alter table foo add column tt time(3) NOT NULL Default '10:11:13.123' after a; -select * from foo; -alter table foo drop column tt; - -drop table foo; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/4676.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/4676.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/4676.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/4676.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -DROP TABLE IF EXISTS t; -CREATE TABLE t (a INT) ENGINE='tokudb' PARTITION BY KEY (a) (PARTITION part0, PARTITION part1); -SHOW CREATE TABLE t; -Table Create Table -t CREATE TABLE `t` ( - `a` int(11) DEFAULT NULL -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -/*!50100 PARTITION BY KEY (a) -(PARTITION part0 ENGINE = TokuDB, - PARTITION part1 ENGINE = TokuDB) */ -ALTER TABLE t TRUNCATE PARTITION part0; -ALTER TABLE t TRUNCATE PARTITION part1; -DROP TABLE IF EXISTS t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/4677.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/4677.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/4677.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/4677.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -drop table if exists t; -create table t (a int primary key) engine='tokudb'; -begin; -insert into t values (1); -insert into t values (3); -begin; -insert into t values (2); -insert into t values (4); -commit; -commit; -drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/fractional_time.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/fractional_time.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/fractional_time.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/fractional_time.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -SET DEFAULT_STORAGE_ENGINE = 'tokudb'; -DROP TABLE IF EXISTS foo; -create table foo (a timestamp(6), b timestamp(4), c timestamp(5), primary key (a))engine=tokudb; -insert into foo values ('2010-12-10 14:12:09.123452', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123454', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123451', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123453', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -select * from foo; -a b c -2010-12-10 14:12:09.123451 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123452 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123453 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123454 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -explain select * from foo where a > '2010-12-10 14:12:09.123452'; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range PRIMARY PRIMARY 7 NULL 2 Using where -select * from foo where a > '2010-12-10 14:12:09.123452'; -a b c -2010-12-10 14:12:09.123453 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123454 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -alter table foo change a a datetime(6), change b b datetime(4), change c c datetime(5); -show create table foo; -Table Create Table -foo CREATE TABLE `foo` ( - `a` datetime(6) NOT NULL DEFAULT '0000-00-00 00:00:00.000000', - `b` datetime(4) DEFAULT NULL, - `c` datetime(5) DEFAULT NULL, - PRIMARY KEY (`a`) -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -select * from foo; -a b c -2010-12-10 14:12:09.123451 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123452 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123453 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123454 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -explain select * from foo where a > '2010-12-10 14:12:09.123452'; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range PRIMARY PRIMARY 8 NULL 2 Using where -select * from foo where a > '2010-12-10 14:12:09.123452'; -a b c -2010-12-10 14:12:09.123453 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123454 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -drop table foo; -create table foo (a time(6), b time(4), c time(5), primary key (a))engine=TokuDB; -insert into foo values ('14:12:09.123452', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123454', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123451', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123453', '14:12:09.123416', '14:12:09.123451'); -select * from foo; -a b c -14:12:09.123451 14:12:09.1234 14:12:09.12345 -14:12:09.123452 14:12:09.1234 14:12:09.12345 -14:12:09.123453 14:12:09.1234 14:12:09.12345 -14:12:09.123454 14:12:09.1234 14:12:09.12345 -explain select * from foo where a > '14:12:09.123452'; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range PRIMARY PRIMARY 6 NULL 2 Using where -select * from foo where a > '14:12:09.123452'; -a b c -14:12:09.123453 14:12:09.1234 14:12:09.12345 -14:12:09.123454 14:12:09.1234 14:12:09.12345 -DROP TABLE foo; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/mdev5932.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/mdev5932.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/mdev5932.result 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/mdev5932.result 2014-10-08 13:19:51.000000000 +0000 @@ -1,5 +1,6 @@ drop table if exists t1,t2; drop table if exists t1i,t2i; +drop table if exists tsub,t3; CREATE TABLE t1 (a CHAR(3), INDEX(a)) ENGINE=TokuDB; INSERT INTO t1 VALUES ('foo'),( NULL); SELECT * FROM t1 WHERE 'bar' NOT IN ( SELECT t1_1.a FROM t1 AS t1_1, t1 AS t1_2 ); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/simple_icp.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/simple_icp.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/simple_icp.result 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/simple_icp.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,178 +0,0 @@ -drop table if exists a,b,c,foo; -create table a (a int auto_increment, primary key (a)) engine=TokuDB; -create table b (a int auto_increment, primary key (a)) engine=TokuDB; -create table c (a int auto_increment, primary key (a)) engine=TokuDB; -insert into a values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); -insert into b values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); -insert into c values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); -create table foo (a int, b int, c int, d int, e int, key(a,b,c)) engine=TokuDB; -insert into foo (a,b,c) select * from a,b,c; -flush status; -show status like '%Handler_read_next%'; -Variable_name Value -Handler_read_next 0 -explain select * from foo where a between 5 and 6 and c=10; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range a a 5 NULL 800 Using where -select * from foo where a between 5 and 6 and c=10; -a b c d e -5 1 10 NULL NULL -5 2 10 NULL NULL -5 3 10 NULL NULL -5 4 10 NULL NULL -5 5 10 NULL NULL -5 6 10 NULL NULL -5 7 10 NULL NULL -5 8 10 NULL NULL -5 9 10 NULL NULL -5 10 10 NULL NULL -5 11 10 NULL NULL -5 12 10 NULL NULL -5 13 10 NULL NULL -5 14 10 NULL NULL -5 15 10 NULL NULL -5 16 10 NULL NULL -5 17 10 NULL NULL -5 18 10 NULL NULL -5 19 10 NULL NULL -5 20 10 NULL NULL -6 1 10 NULL NULL -6 2 10 NULL NULL -6 3 10 NULL NULL -6 4 10 NULL NULL -6 5 10 NULL NULL -6 6 10 NULL NULL -6 7 10 NULL NULL -6 8 10 NULL NULL -6 9 10 NULL NULL -6 10 10 NULL NULL -6 11 10 NULL NULL -6 12 10 NULL NULL -6 13 10 NULL NULL -6 14 10 NULL NULL -6 15 10 NULL NULL -6 16 10 NULL NULL -6 17 10 NULL NULL -6 18 10 NULL NULL -6 19 10 NULL NULL -6 20 10 NULL NULL -show status like '%Handler_read_next%'; -Variable_name Value -Handler_read_next 41 -flush status; -show status like '%Handler_read_prev%'; -Variable_name Value -Handler_read_prev 0 -explain select * from foo where a between 5 and 6 and c=10; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range a a 5 NULL 800 Using where -select * from foo where a between 5 and 6 and c=10 order by a desc; -a b c d e -6 20 10 NULL NULL -6 19 10 NULL NULL -6 18 10 NULL NULL -6 17 10 NULL NULL -6 16 10 NULL NULL -6 15 10 NULL NULL -6 14 10 NULL NULL -6 13 10 NULL NULL -6 12 10 NULL NULL -6 11 10 NULL NULL -6 10 10 NULL NULL -6 9 10 NULL NULL -6 8 10 NULL NULL -6 7 10 NULL NULL -6 6 10 NULL NULL -6 5 10 NULL NULL -6 4 10 NULL NULL -6 3 10 NULL NULL -6 2 10 NULL NULL -6 1 10 NULL NULL -5 20 10 NULL NULL -5 19 10 NULL NULL -5 18 10 NULL NULL -5 17 10 NULL NULL -5 16 10 NULL NULL -5 15 10 NULL NULL -5 14 10 NULL NULL -5 13 10 NULL NULL -5 12 10 NULL NULL -5 11 10 NULL NULL -5 10 10 NULL NULL -5 9 10 NULL NULL -5 8 10 NULL NULL -5 7 10 NULL NULL -5 6 10 NULL NULL -5 5 10 NULL NULL -5 4 10 NULL NULL -5 3 10 NULL NULL -5 2 10 NULL NULL -5 1 10 NULL NULL -show status like '%Handler_read_prev%'; -Variable_name Value -Handler_read_prev 41 -flush status; -show status like '%Handler_read_prev%'; -Variable_name Value -Handler_read_prev 0 -explain select * from foo where a > 19 and c=10; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range a a 5 NULL 1713 Using where -select * from foo where a > 19 and c=10 order by a desc; -a b c d e -20 20 10 NULL NULL -20 19 10 NULL NULL -20 18 10 NULL NULL -20 17 10 NULL NULL -20 16 10 NULL NULL -20 15 10 NULL NULL -20 14 10 NULL NULL -20 13 10 NULL NULL -20 12 10 NULL NULL -20 11 10 NULL NULL -20 10 10 NULL NULL -20 9 10 NULL NULL -20 8 10 NULL NULL -20 7 10 NULL NULL -20 6 10 NULL NULL -20 5 10 NULL NULL -20 4 10 NULL NULL -20 3 10 NULL NULL -20 2 10 NULL NULL -20 1 10 NULL NULL -show status like '%Handler_read_prev%'; -Variable_name Value -Handler_read_prev 21 -flush status; -show status like '%Handler_read_next%'; -Variable_name Value -Handler_read_next 0 -explain select * from foo where a > 19 and c=10; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range a a 5 NULL 1402 Using where -select * from foo where a > 19 and c=10; -a b c d e -20 1 10 NULL NULL -20 2 10 NULL NULL -20 3 10 NULL NULL -20 4 10 NULL NULL -20 5 10 NULL NULL -20 6 10 NULL NULL -20 7 10 NULL NULL -20 8 10 NULL NULL -20 9 10 NULL NULL -20 10 10 NULL NULL -20 11 10 NULL NULL -20 12 10 NULL NULL -20 13 10 NULL NULL -20 14 10 NULL NULL -20 15 10 NULL NULL -20 16 10 NULL NULL -20 17 10 NULL NULL -20 18 10 NULL NULL -20 19 10 NULL NULL -20 20 10 NULL NULL -show status like '%Handler_read_next%'; -Variable_name Value -Handler_read_next 21 -drop table foo,a,b,c; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb718.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb718.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb718.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb718.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,9 @@ +set default_storage_engine='tokudb'; +drop table if exists t; +create table t (id int primary key); +begin; +insert into t values (1),(2); +select * from information_schema.tokudb_fractal_tree_info; +ERROR HY000: Unknown error -30994 +commit; +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr2.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr2.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr2.result 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr2.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,441 +0,0 @@ -drop table if exists t1,t2,t3,t4; -set @maria_mrr_tmp=@@optimizer_switch; -set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on'; -set @mrr_buffer_size_save= @@mrr_buffer_size; -set @save_storage_engine= @@storage_engine; -set storage_engine=TokuDB; -create table t1(a int); -show create table t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t2(a int); -insert into t2 select A.a + 10*(B.a + 10*C.a) from t1 A, t1 B, t1 C; -create table t3 ( -a char(8) not null, b char(8) not null, filler char(200), -key(a) -); -insert into t3 select @a:=concat('c-', 1000+ A.a, '=w'), @a, 'filler' from t2 A; -insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 2000+A.a, '=w'), -'filler-1' from t2 A; -insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 3000+A.a, '=w'), -'filler-2' from t2 A; -select a,filler from t3 where a >= 'c-9011=w'; -a filler -select a,filler from t3 where a >= 'c-1011=w' and a <= 'c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or -(a>='c-1014=w' and a <= 'c-1015=w'); -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -insert into t3 values ('c-1013=z', 'c-1013=z', 'err'); -insert into t3 values ('a-1014=w', 'a-1014=w', 'err'); -select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or -(a>='c-1014=w' and a <= 'c-1015=w'); -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -delete from t3 where b in ('c-1013=z', 'a-1014=w'); -select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or -a='c-1014=w' or a='c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -insert into t3 values ('c-1013=w', 'del-me', 'inserted'); -select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or -a='c-1014=w' or a='c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -c-1013=w inserted -delete from t3 where b='del-me'; -alter table t3 add primary key(b); -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or -b IN ('c-1019=w', 'c-1020=w', 'c-1021=w', -'c-1022=w', 'c-1023=w', 'c-1024=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -c-1024=w filler -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1020=w') or -b IN ('c-1021=w', 'c-1022=w', 'c-1023=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or -b IN ('c-1019=w', 'c-1020=w') or -(b>='c-1021=w' and b<= 'c-1023=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -drop table if exists t4; -create table t4 (a varchar(10), b int, c char(10), filler char(200), -key idx1 (a, b, c)); -insert into t4 (filler) select concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'b-1',NULL,'c-1', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'b-1',NULL,'c-222', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'bb-1',NULL,'cc-2', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'zz-1',NULL,'cc-2', 'filler-data' from t2 order by a limit 500; -explain -select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1' - or c='no-such-row2'); -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 range idx1 idx1 29 NULL 16 Using where; Rowid-ordered scan -select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1' - or c='no-such-row2'); -a b c filler -NULL NULL NULL NULL-15 -NULL NULL NULL NULL-14 -NULL NULL NULL NULL-13 -NULL NULL NULL NULL-12 -NULL NULL NULL NULL-11 -NULL NULL NULL NULL-10 -NULL NULL NULL NULL-9 -NULL NULL NULL NULL-8 -NULL NULL NULL NULL-7 -NULL NULL NULL NULL-6 -NULL NULL NULL NULL-5 -NULL NULL NULL NULL-4 -NULL NULL NULL NULL-3 -NULL NULL NULL NULL-2 -NULL NULL NULL NULL-1 -explain -select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 range idx1 idx1 29 NULL 32 Using where; Rowid-ordered scan -select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -a b c filler -b-1 NULL c-1 NULL-15 -b-1 NULL c-1 NULL-14 -b-1 NULL c-1 NULL-13 -b-1 NULL c-1 NULL-12 -b-1 NULL c-1 NULL-11 -b-1 NULL c-1 NULL-10 -b-1 NULL c-1 NULL-9 -b-1 NULL c-1 NULL-8 -b-1 NULL c-1 NULL-7 -b-1 NULL c-1 NULL-6 -b-1 NULL c-1 NULL-5 -b-1 NULL c-1 NULL-4 -b-1 NULL c-1 NULL-3 -b-1 NULL c-1 NULL-2 -b-1 NULL c-1 NULL-1 -bb-1 NULL cc-2 NULL-15 -bb-1 NULL cc-2 NULL-14 -bb-1 NULL cc-2 NULL-13 -bb-1 NULL cc-2 NULL-12 -bb-1 NULL cc-2 NULL-11 -bb-1 NULL cc-2 NULL-10 -bb-1 NULL cc-2 NULL-9 -bb-1 NULL cc-2 NULL-8 -bb-1 NULL cc-2 NULL-7 -bb-1 NULL cc-2 NULL-6 -bb-1 NULL cc-2 NULL-5 -bb-1 NULL cc-2 NULL-4 -bb-1 NULL cc-2 NULL-3 -bb-1 NULL cc-2 NULL-2 -bb-1 NULL cc-2 NULL-1 -select * from t4 ignore index(idx1) where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -a b c filler -b-1 NULL c-1 NULL-15 -b-1 NULL c-1 NULL-14 -b-1 NULL c-1 NULL-13 -b-1 NULL c-1 NULL-12 -b-1 NULL c-1 NULL-11 -b-1 NULL c-1 NULL-10 -b-1 NULL c-1 NULL-9 -b-1 NULL c-1 NULL-8 -b-1 NULL c-1 NULL-7 -b-1 NULL c-1 NULL-6 -b-1 NULL c-1 NULL-5 -b-1 NULL c-1 NULL-4 -b-1 NULL c-1 NULL-3 -b-1 NULL c-1 NULL-2 -b-1 NULL c-1 NULL-1 -bb-1 NULL cc-2 NULL-15 -bb-1 NULL cc-2 NULL-14 -bb-1 NULL cc-2 NULL-13 -bb-1 NULL cc-2 NULL-12 -bb-1 NULL cc-2 NULL-11 -bb-1 NULL cc-2 NULL-10 -bb-1 NULL cc-2 NULL-9 -bb-1 NULL cc-2 NULL-8 -bb-1 NULL cc-2 NULL-7 -bb-1 NULL cc-2 NULL-6 -bb-1 NULL cc-2 NULL-5 -bb-1 NULL cc-2 NULL-4 -bb-1 NULL cc-2 NULL-3 -bb-1 NULL cc-2 NULL-2 -bb-1 NULL cc-2 NULL-1 -drop table t1, t2, t3, t4; -create table t1 (a int, b int not null,unique key (a,b),index(b)); -insert ignore into t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(null,7),(9,9),(8,8),(7,7),(null,9),(null,9),(6,6); -Warnings: -Warning 1062 Duplicate entry '6-6' for key 'a' -create table t2 like t1; -insert into t2 select * from t1; -alter table t1 modify b blob not null, add c int not null, drop key a, add unique key (a,b(20),c), drop key b, add key (b(10)); -select * from t1 where a is null; -a b c -NULL 7 0 -NULL 9 0 -NULL 9 0 -select * from t1 where (a is null or a > 0 and a < 3) and b > 7 limit 3; -a b c -NULL 9 0 -NULL 9 0 -select * from t1 where a is null and b=9 or a is null and b=7 limit 3; -a b c -NULL 7 0 -NULL 9 0 -NULL 9 0 -drop table t1, t2; -set storage_engine= @save_storage_engine; -set @@mrr_buffer_size= @mrr_buffer_size_save; -# -# Crash in quick_range_seq_next() in maria-5.3-dsmrr-cpk with join_cache_level = {8,1} -# -set @save_join_cache_level= @@join_cache_level; -SET SESSION join_cache_level = 8; -CREATE TABLE `t1` ( -`col_int_key` int(11) DEFAULT NULL, -`col_datetime_key` datetime DEFAULT NULL, -`col_varchar_key` varchar(1) DEFAULT NULL, -`col_varchar_nokey` varchar(1) DEFAULT NULL, -KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB DEFAULT CHARSET=latin1; -INSERT INTO `t1` VALUES (6,'2005-10-07 00:00:00','e','e'); -INSERT INTO `t1` VALUES (51,'2000-07-15 05:00:34','f','f'); -CREATE TABLE `t2` ( -`col_int_key` int(11) DEFAULT NULL, -`col_datetime_key` datetime DEFAULT NULL, -`col_varchar_key` varchar(1) DEFAULT NULL, -`col_varchar_nokey` varchar(1) DEFAULT NULL, -KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1; -INSERT INTO `t2` VALUES (2,'2004-10-11 18:13:16','w','w'); -INSERT INTO `t2` VALUES (2,'1900-01-01 00:00:00','d','d'); -SELECT table2 .`col_datetime_key` -FROM t2 JOIN ( t1 table2 JOIN t2 table3 ON table3 .`col_varchar_key` < table2 .`col_varchar_key` ) ON table3 .`col_varchar_nokey` ; -col_datetime_key -Warnings: -Warning 1292 Truncated incorrect DOUBLE value: 'd' -Warning 1292 Truncated incorrect DOUBLE value: 'd' -Warning 1292 Truncated incorrect DOUBLE value: 'd' -Warning 1292 Truncated incorrect DOUBLE value: 'd' -drop table t1, t2; -set join_cache_level=@save_join_cache_level; -CREATE TABLE t1( -pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, -PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES -(1,9,'x'), (2,5,'g'), (3,1,'o'), (4,0,'g'), (5,1,'v'), -(6,190,'m'), (7,6,'x'), (8,3,'c'), (9,4,'z'), (10,3,'i'), -(11,186,'x'), (12,1,'g'), (13,8,'q'), (14,226,'m'), (15,133,'p'); -CREATE TABLE t2( -pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, -PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t2 SELECT * FROM t1; -INSERT INTO t2 VALUES (77, 333, 'z'); -CREATE TABLE t3( -pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, -PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t3 SELECT * FROM t1; -INSERT INTO t3 VALUES -(88, 442, 'y'), (99, 445, 'w'), (87, 442, 'z'), (98, 445, 'v'), (86, 442, 'x'), -(97, 445, 't'), (85, 442, 'b'), (96, 445, 'l'), (84, 442, 'a'), (95, 445, 'k'); -set @save_join_cache_level=@@join_cache_level; -set join_cache_level=1; -SELECT COUNT(t1.v) FROM t1, t2 IGNORE INDEX (idx), t3 IGNORE INDEX (idx) -WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -COUNT(t1.v) -120 -EXPLAIN -SELECT COUNT(t1.v) FROM t1, t2 IGNORE INDEX (idx), t3 IGNORE INDEX (idx) -WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx 7 NULL 15 Using index -1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 16 Using where; Using join buffer (flat, BNL join) -1 SIMPLE t3 range PRIMARY PRIMARY 4 NULL 25 Using where; Using join buffer (flat, BNL join) -SELECT COUNT(t1.v) FROM t1, t2, t3 -WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -COUNT(t1.v) -120 -EXPLAIN -SELECT COUNT(t1.v) FROM t1, t2, t3 -WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx 7 NULL 15 Using index -1 SIMPLE t2 index PRIMARY,idx idx 7 NULL 16 Using where; Using index; Using join buffer (flat, BNL join) -1 SIMPLE t3 ref PRIMARY,idx idx 3 test.t2.v 3 Using where; Using index -set join_cache_level=@save_join_cache_level; -DROP TABLE t1,t2,t3; -# -# BUG#671361: virtual int Mrr_ordered_index_reader::refill_buffer(): Assertion `!know_key_tuple_params -# (works only on Maria because we need 1024-byte long key) -# -SET SESSION join_cache_level = 6; -SET SESSION join_buffer_size = 1024; -CREATE TABLE t1 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_varchar_1024_latin1_key varchar(1024) DEFAULT NULL, -PRIMARY KEY (pk), -KEY col_varchar_1024_latin1_key (col_varchar_1024_latin1_key) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES -(1,'z'), (2,'abcdefjhjkl'), (3,'in'), (4,'abcdefjhjkl'), (6,'abcdefjhjkl'), -(11,'zx'), (12,'abcdefjhjm'), (13,'jn'), (14,'abcdefjhjp'), (16,'abcdefjhjr'); -CREATE TABLE t2 ( -col_varchar_10_latin1 varchar(10) DEFAULT NULL -) ENGINE=TokuDB; -INSERT INTO t2 VALUES ('foo'), ('foo'); -EXPLAIN SELECT count(*) -FROM t1 AS table1, t2 AS table2 -WHERE -table1.col_varchar_1024_latin1_key = table2.col_varchar_10_latin1 AND table1.pk<>0 ; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE table2 ALL NULL NULL NULL NULL 2 Using where -1 SIMPLE table1 ref PRIMARY,col_varchar_1024_latin1_key col_varchar_1024_latin1_key 1027 test.table2.col_varchar_10_latin1 2 Using where; Using index -SELECT count(*) -FROM t1 AS table1, t2 AS table2 -WHERE -table1.col_varchar_1024_latin1_key = table2.col_varchar_10_latin1 AND table1.pk<>0 ; -count(*) -0 -drop table t1, t2; -# -# BUG#693747: Assertion multi_range_read.cc:908: int DsMrr_impl::dsmrr_init( -# -set @_save_join_cache_level= @@join_cache_level; -set @_save_join_buffer_size= @@join_buffer_size; -set join_cache_level=8; -set join_buffer_size=10240; -CREATE TABLE t1 ( -f2 varchar(32) COLLATE latin1_swedish_ci, -f3 int(11), -f4 varchar(1024) COLLATE utf8_bin, -f5 varchar(1024) COLLATE latin1_bin, -KEY (f5) -) ENGINE=TokuDB; -# Fill the table with some data -SELECT alias2.* , alias1.f2 -FROM -t1 AS alias1 -LEFT JOIN t1 AS alias2 ON alias1.f2 = alias2.f5 -WHERE -alias2.f3 < 0; -f2 f3 f4 f5 f2 -set join_cache_level=@_save_join_cache_level; -set join_buffer_size=@_save_join_buffer_size; -set optimizer_switch=@maria_mrr_tmp; -drop table t1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr.result 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,851 +0,0 @@ -drop table if exists t1,t2,t3,t4; -set @save_storage_engine= @@storage_engine; -set storage_engine=TokuDB; -set @innodb_mrr_tmp=@@optimizer_switch; -set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on'; -create table t1(a int); -show create table t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t2(a int); -insert into t2 select A.a + 10*(B.a + 10*C.a) from t1 A, t1 B, t1 C; -create table t3 ( -a char(8) not null, b char(8) not null, filler char(200), -key(a) -); -insert into t3 select @a:=concat('c-', 1000+ A.a, '=w'), @a, 'filler' from t2 A; -insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 2000+A.a, '=w'), -'filler-1' from t2 A; -insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 3000+A.a, '=w'), -'filler-2' from t2 A; -select a,filler from t3 where a >= 'c-9011=w'; -a filler -select a,filler from t3 where a >= 'c-1011=w' and a <= 'c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or -(a>='c-1014=w' and a <= 'c-1015=w'); -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -insert into t3 values ('c-1013=z', 'c-1013=z', 'err'); -insert into t3 values ('a-1014=w', 'a-1014=w', 'err'); -select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or -(a>='c-1014=w' and a <= 'c-1015=w'); -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -delete from t3 where b in ('c-1013=z', 'a-1014=w'); -select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or -a='c-1014=w' or a='c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -insert into t3 values ('c-1013=w', 'del-me', 'inserted'); -select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or -a='c-1014=w' or a='c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -c-1013=w inserted -delete from t3 where b='del-me'; -alter table t3 add primary key(b); -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or -b IN ('c-1019=w', 'c-1020=w', 'c-1021=w', -'c-1022=w', 'c-1023=w', 'c-1024=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -c-1024=w filler -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1020=w') or -b IN ('c-1021=w', 'c-1022=w', 'c-1023=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or -b IN ('c-1019=w', 'c-1020=w') or -(b>='c-1021=w' and b<= 'c-1023=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -drop table if exists t4; -create table t4 (a varchar(10), b int, c char(10), filler char(200), -key idx1 (a, b, c)); -insert into t4 (filler) select concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'b-1',NULL,'c-1', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'b-1',NULL,'c-222', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'bb-1',NULL,'cc-2', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'zz-1',NULL,'cc-2', 'filler-data' from t2 order by a limit 500; -explain -select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1' - or c='no-such-row2'); -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 range idx1 idx1 29 NULL 16 Using where; Rowid-ordered scan -select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1' - or c='no-such-row2'); -a b c filler -NULL NULL NULL NULL-15 -NULL NULL NULL NULL-14 -NULL NULL NULL NULL-13 -NULL NULL NULL NULL-12 -NULL NULL NULL NULL-11 -NULL NULL NULL NULL-10 -NULL NULL NULL NULL-9 -NULL NULL NULL NULL-8 -NULL NULL NULL NULL-7 -NULL NULL NULL NULL-6 -NULL NULL NULL NULL-5 -NULL NULL NULL NULL-4 -NULL NULL NULL NULL-3 -NULL NULL NULL NULL-2 -NULL NULL NULL NULL-1 -explain -select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 range idx1 idx1 29 NULL 32 Using where; Rowid-ordered scan -select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -a b c filler -b-1 NULL c-1 NULL-15 -b-1 NULL c-1 NULL-14 -b-1 NULL c-1 NULL-13 -b-1 NULL c-1 NULL-12 -b-1 NULL c-1 NULL-11 -b-1 NULL c-1 NULL-10 -b-1 NULL c-1 NULL-9 -b-1 NULL c-1 NULL-8 -b-1 NULL c-1 NULL-7 -b-1 NULL c-1 NULL-6 -b-1 NULL c-1 NULL-5 -b-1 NULL c-1 NULL-4 -b-1 NULL c-1 NULL-3 -b-1 NULL c-1 NULL-2 -b-1 NULL c-1 NULL-1 -bb-1 NULL cc-2 NULL-15 -bb-1 NULL cc-2 NULL-14 -bb-1 NULL cc-2 NULL-13 -bb-1 NULL cc-2 NULL-12 -bb-1 NULL cc-2 NULL-11 -bb-1 NULL cc-2 NULL-10 -bb-1 NULL cc-2 NULL-9 -bb-1 NULL cc-2 NULL-8 -bb-1 NULL cc-2 NULL-7 -bb-1 NULL cc-2 NULL-6 -bb-1 NULL cc-2 NULL-5 -bb-1 NULL cc-2 NULL-4 -bb-1 NULL cc-2 NULL-3 -bb-1 NULL cc-2 NULL-2 -bb-1 NULL cc-2 NULL-1 -select * from t4 ignore index(idx1) where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -a b c filler -b-1 NULL c-1 NULL-15 -b-1 NULL c-1 NULL-14 -b-1 NULL c-1 NULL-13 -b-1 NULL c-1 NULL-12 -b-1 NULL c-1 NULL-11 -b-1 NULL c-1 NULL-10 -b-1 NULL c-1 NULL-9 -b-1 NULL c-1 NULL-8 -b-1 NULL c-1 NULL-7 -b-1 NULL c-1 NULL-6 -b-1 NULL c-1 NULL-5 -b-1 NULL c-1 NULL-4 -b-1 NULL c-1 NULL-3 -b-1 NULL c-1 NULL-2 -b-1 NULL c-1 NULL-1 -bb-1 NULL cc-2 NULL-15 -bb-1 NULL cc-2 NULL-14 -bb-1 NULL cc-2 NULL-13 -bb-1 NULL cc-2 NULL-12 -bb-1 NULL cc-2 NULL-11 -bb-1 NULL cc-2 NULL-10 -bb-1 NULL cc-2 NULL-9 -bb-1 NULL cc-2 NULL-8 -bb-1 NULL cc-2 NULL-7 -bb-1 NULL cc-2 NULL-6 -bb-1 NULL cc-2 NULL-5 -bb-1 NULL cc-2 NULL-4 -bb-1 NULL cc-2 NULL-3 -bb-1 NULL cc-2 NULL-2 -bb-1 NULL cc-2 NULL-1 -drop table t1, t2, t3, t4; -create table t1 (a int, b int not null,unique key (a,b),index(b)); -insert ignore into t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(null,7),(9,9),(8,8),(7,7),(null,9),(null,9),(6,6); -Warnings: -Warning 1062 Duplicate entry '6-6' for key 'a' -create table t2 like t1; -insert into t2 select * from t1; -alter table t1 modify b blob not null, add c int not null, drop key a, add unique key (a,b(20),c), drop key b, add key (b(10)); -select * from t1 where a is null; -a b c -NULL 7 0 -NULL 9 0 -NULL 9 0 -select * from t1 where (a is null or a > 0 and a < 3) and b > 7 limit 3; -a b c -NULL 9 0 -NULL 9 0 -select * from t1 where a is null and b=9 or a is null and b=7 limit 3; -a b c -NULL 7 0 -NULL 9 0 -NULL 9 0 -drop table t1, t2; -set storage_engine= @save_storage_engine; -set @mrr_buffer_size_save= @@mrr_buffer_size; -set mrr_buffer_size=64; -Warnings: -Warning 1292 Truncated incorrect mrr_buffer_size value: '64' -create table t1(a int); -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t2(a char(8), b char(8), c char(8), filler char(100), key(a,b,c) ) engine=TokuDB; -insert into t2 select -concat('a-', 1000 + A.a, '-a'), -concat('b-', 1000 + B.a, '-b'), -concat('c-', 1000 + C.a, '-c'), -'filler' -from t1 A, t1 B, t1 C; -explain -select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a'; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range a a 9 NULL 100 Using where; Rowid-ordered scan -select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a'; -count(length(a) + length(filler)) -100 -drop table t2; -create table t2 (a char(100), b char(100), c char(100), d int, -filler char(10), key(d), primary key (a,b,c)) engine= tokudb; -insert into t2 select A.a, B.a, B.a, A.a, 'filler' from t1 A, t1 B; -explain select * from t2 force index (d) where d < 10; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range d d 5 NULL # Using where; Rowid-ordered scan -drop table t2; -drop table t1; -set @@mrr_buffer_size= @mrr_buffer_size_save; -create table t1 (f1 int not null, f2 int not null,f3 int not null, f4 char(1), primary key (f1,f2), key ix(f3))Engine=tokuDB; -select * from t1 where (f3>=5 and f3<=10) or (f3>=1 and f3<=4); -f1 f2 f3 f4 -1 1 1 A -2 2 2 A -3 3 3 A -4 4 4 A -5 5 5 A -6 6 6 A -7 7 7 A -8 8 8 A -9 9 9 A -10 10 10 A -drop table t1; - -BUG#37977: Wrong result returned on GROUP BY + OR + innodb - -CREATE TABLE t1 ( -`pk` int(11) NOT NULL AUTO_INCREMENT, -`int_nokey` int(11) NOT NULL, -`int_key` int(11) NOT NULL, -`date_key` date NOT NULL, -`date_nokey` date NOT NULL, -`time_key` time NOT NULL, -`time_nokey` time NOT NULL, -`datetime_key` datetime NOT NULL, -`datetime_nokey` datetime NOT NULL, -`varchar_key` varchar(5) DEFAULT NULL, -`varchar_nokey` varchar(5) DEFAULT NULL, -PRIMARY KEY (`pk`), -KEY `int_key` (`int_key`), -KEY `date_key` (`date_key`), -KEY `time_key` (`time_key`), -KEY `datetime_key` (`datetime_key`), -KEY `varchar_key` (`varchar_key`) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES -(1,5,5,'2009-10-16','2009-10-16','09:28:15','09:28:15','2007-09-14 05:34:08','2007-09-14 05:34:08','qk','qk'), -(2,6,6,'0000-00-00','0000-00-00','23:06:39','23:06:39','0000-00-00 00:00:00','0000-00-00 00:00:00','j','j'), -(3,10,10,'2000-12-18','2000-12-18','22:16:19','22:16:19','2006-11-04 15:42:50','2006-11-04 15:42:50','aew','aew'), -(4,0,0,'2001-09-18','2001-09-18','00:00:00','00:00:00','2004-03-23 13:23:35','2004-03-23 13:23:35',NULL,NULL), -(5,6,6,'2007-08-16','2007-08-16','22:13:38','22:13:38','2004-08-19 11:01:28','2004-08-19 11:01:28','qu','qu'); -select pk from t1 WHERE `varchar_key` > 'kr' group by pk; -pk -1 -5 -select pk from t1 WHERE `int_nokey` IS NULL OR `varchar_key` > 'kr' group by pk; -pk -1 -5 -drop table t1; -# -# BUG#39447: Error with NOT NULL condition and LIMIT 1 -# -CREATE TABLE t1 ( -id int(11) NOT NULL, -parent_id int(11) DEFAULT NULL, -name varchar(10) DEFAULT NULL, -PRIMARY KEY (id), -KEY ind_parent_id (parent_id) -) ENGINE=TokuDB; -insert into t1 (id, parent_id, name) values -(10,NULL,'A'), -(20,10,'B'), -(30,10,'C'), -(40,NULL,'D'), -(50,40,'E'), -(60,40,'F'), -(70,NULL,'J'); -SELECT id FROM t1 WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; -id -60 -This must show type=index, extra=Using where -explain SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL PRIMARY 4 NULL 1 Using where -SELECT * FROM t1 WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; -id parent_id name -60 40 F -drop table t1; -# -# BUG#628785: multi_range_read.cc:430: int DsMrr_impl::dsmrr_init(): Assertion `do_sort_keys || do_rowid_fetch' failed -# -set @save_join_cache_level= @@join_cache_level; -set @save_optimizer_switch= @@optimizer_switch; -SET SESSION join_cache_level=9; -Warnings: -Warning 1292 Truncated incorrect join_cache_level value: '9' -SET SESSION optimizer_switch='mrr_sort_keys=off'; -CREATE TABLE `t1` ( -`pk` int(11) NOT NULL AUTO_INCREMENT, -`col_int_nokey` int(11) DEFAULT NULL, -`col_int_key` int(11) DEFAULT NULL, -`col_varchar_key` varchar(1) DEFAULT NULL, -`col_varchar_nokey` varchar(1) DEFAULT NULL, -PRIMARY KEY (`pk`), -KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB AUTO_INCREMENT=101 DEFAULT CHARSET=latin1; -INSERT INTO `t1` VALUES (1,6,NULL,'r','r'); -INSERT INTO `t1` VALUES (2,8,0,'c','c'); -INSERT INTO `t1` VALUES (97,7,0,'z','z'); -INSERT INTO `t1` VALUES (98,1,1,'j','j'); -INSERT INTO `t1` VALUES (99,7,8,'c','c'); -INSERT INTO `t1` VALUES (100,2,5,'f','f'); -SELECT table1 .`col_varchar_key` -FROM t1 table1 STRAIGHT_JOIN ( t1 table3 JOIN t1 table4 ON table4 .`pk` = table3 .`col_int_nokey` ) ON table4 .`col_varchar_nokey` ; -col_varchar_key -Warnings: -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -DROP TABLE t1; -set join_cache_level=@save_join_cache_level; -set optimizer_switch=@save_optimizer_switch; -# -# BUG#623300: Query with join_cache_level = 6 returns extra rows in maria-5.3-dsmrr-cpk -# -CREATE TABLE t1 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_int_nokey int(11) DEFAULT NULL, -PRIMARY KEY (pk) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES (10,7); -INSERT INTO t1 VALUES (11,1); -INSERT INTO t1 VALUES (12,5); -INSERT INTO t1 VALUES (13,3); -INSERT INTO t1 VALUES (14,6); -INSERT INTO t1 VALUES (15,92); -INSERT INTO t1 VALUES (16,7); -INSERT INTO t1 VALUES (17,NULL); -INSERT INTO t1 VALUES (18,3); -INSERT INTO t1 VALUES (19,5); -INSERT INTO t1 VALUES (20,1); -INSERT INTO t1 VALUES (21,2); -INSERT INTO t1 VALUES (22,NULL); -INSERT INTO t1 VALUES (23,1); -INSERT INTO t1 VALUES (24,0); -INSERT INTO t1 VALUES (25,210); -INSERT INTO t1 VALUES (26,8); -INSERT INTO t1 VALUES (27,7); -INSERT INTO t1 VALUES (28,5); -INSERT INTO t1 VALUES (29,NULL); -CREATE TABLE t2 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_int_nokey int(11) DEFAULT NULL, -PRIMARY KEY (pk) -) ENGINE=TokuDB; -INSERT INTO t2 VALUES (1,NULL); -INSERT INTO t2 VALUES (2,7); -INSERT INTO t2 VALUES (3,9); -INSERT INTO t2 VALUES (4,7); -INSERT INTO t2 VALUES (5,4); -INSERT INTO t2 VALUES (6,2); -INSERT INTO t2 VALUES (7,6); -INSERT INTO t2 VALUES (8,8); -INSERT INTO t2 VALUES (9,NULL); -INSERT INTO t2 VALUES (10,5); -INSERT INTO t2 VALUES (11,NULL); -INSERT INTO t2 VALUES (12,6); -INSERT INTO t2 VALUES (13,188); -INSERT INTO t2 VALUES (14,2); -INSERT INTO t2 VALUES (15,1); -INSERT INTO t2 VALUES (16,1); -INSERT INTO t2 VALUES (17,0); -INSERT INTO t2 VALUES (18,9); -INSERT INTO t2 VALUES (19,NULL); -INSERT INTO t2 VALUES (20,4); -set @my_save_join_cache_level= @@join_cache_level; -SET join_cache_level = 0; -SELECT table2.col_int_nokey -FROM t1 table1 JOIN t2 table2 ON table2.pk = table1.col_int_nokey -WHERE table1.pk ; -col_int_nokey -2 -4 -4 -4 -6 -6 -6 -7 -8 -9 -9 -NULL -NULL -NULL -SET join_cache_level = 6; -SELECT table2.col_int_nokey -FROM t1 table1 JOIN t2 table2 ON table2.pk = table1.col_int_nokey -WHERE table1.pk ; -col_int_nokey -2 -4 -4 -4 -6 -6 -6 -7 -8 -9 -9 -NULL -NULL -NULL -set join_cache_level= @my_save_join_cache_level; -drop table t1, t2; -# -# BUG#623315: Query returns less rows when run with join_cache_level=6 on maria-5.3-dsmrr-cpk -# -CREATE TABLE t1 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_int_nokey int(11) DEFAULT NULL, -col_int_key int(11) DEFAULT NULL, -col_varchar_key varchar(1) DEFAULT NULL, -PRIMARY KEY (pk), -KEY col_int_key (col_int_key), -KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES (10,7,8,'v'); -INSERT INTO t1 VALUES (11,1,9,'r'); -INSERT INTO t1 VALUES (12,5,9,'a'); -INSERT INTO t1 VALUES (13,3,186,'m'); -INSERT INTO t1 VALUES (14,6,NULL,'y'); -INSERT INTO t1 VALUES (15,92,2,'j'); -INSERT INTO t1 VALUES (16,7,3,'d'); -INSERT INTO t1 VALUES (17,NULL,0,'z'); -INSERT INTO t1 VALUES (18,3,133,'e'); -INSERT INTO t1 VALUES (19,5,1,'h'); -INSERT INTO t1 VALUES (20,1,8,'b'); -INSERT INTO t1 VALUES (21,2,5,'s'); -INSERT INTO t1 VALUES (22,NULL,5,'e'); -INSERT INTO t1 VALUES (23,1,8,'j'); -INSERT INTO t1 VALUES (24,0,6,'e'); -INSERT INTO t1 VALUES (25,210,51,'f'); -INSERT INTO t1 VALUES (26,8,4,'v'); -INSERT INTO t1 VALUES (27,7,7,'x'); -INSERT INTO t1 VALUES (28,5,6,'m'); -INSERT INTO t1 VALUES (29,NULL,4,'c'); -set @my_save_join_cache_level= @@join_cache_level; -SET join_cache_level=6; -select count(*) from -(SELECT table2.pk FROM -t1 LEFT JOIN t1 table2 JOIN t1 table3 ON table3.col_varchar_key = table2.col_varchar_key -ON table3.col_int_nokey) foo; -count(*) -480 -SET join_cache_level=0; -select count(*) from -(SELECT table2.pk FROM -t1 LEFT JOIN t1 table2 JOIN t1 table3 ON table3.col_varchar_key = table2.col_varchar_key -ON table3.col_int_nokey) foo; -count(*) -480 -set join_cache_level= @my_save_join_cache_level; -drop table t1; -# -# BUG#671340: Diverging results in with mrr_sort_keys=ON|OFF and join_cache_level=5 -# -CREATE TABLE t1 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_int_key int(11) NOT NULL, -col_varchar_key varchar(1) NOT NULL, -col_varchar_nokey varchar(1) NOT NULL, -PRIMARY KEY (pk), -KEY col_int_key (col_int_key), -KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES -(10,8,'v','v'), -(11,8,'f','f'), -(12,5,'v','v'), -(13,8,'s','s'), -(14,8,'a','a'), -(15,6,'p','p'), -(16,7,'z','z'), -(17,2,'a','a'), -(18,5,'h','h'), -(19,7,'h','h'), -(20,2,'v','v'), -(21,9,'v','v'), -(22,142,'b','b'), -(23,3,'y','y'), -(24,0,'v','v'), -(25,3,'m','m'), -(26,5,'z','z'), -(27,9,'n','n'), -(28,1,'d','d'), -(29,107,'a','a'); -CREATE TABLE t2 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_int_key int(11) NOT NULL, -col_varchar_key varchar(1) NOT NULL, -col_varchar_nokey varchar(1) NOT NULL, -PRIMARY KEY (pk), -KEY col_int_key (col_int_key), -KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t2 VALUES -(1,9,'x','x'), -(2,5,'g','g'), -(3,1,'o','o'), -(4,0,'g','g'), -(5,1,'v','v'), -(6,190,'m','m'), -(7,6,'x','x'), -(8,3,'c','c'), -(9,4,'z','z'), -(10,3,'i','i'), -(11,186,'x','x'), -(12,1,'g','g'), -(13,8,'q','q'), -(14,226,'m','m'), -(15,133,'p','p'), -(16,6,'e','e'), -(17,3,'t','t'), -(18,8,'j','j'), -(19,5,'h','h'), -(20,7,'w','w'); -SELECT count(*), sum(table1.col_int_key*table2.pk) -FROM -t2 AS table1, t1 AS table2, t2 AS table3 -WHERE -table3.col_varchar_nokey = table2.col_varchar_key AND table3.pk > table2.col_varchar_nokey ; -count(*) sum(table1.col_int_key*table2.pk) -240 185955 -Warnings: -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'z' -Warning 1292 Truncated incorrect DOUBLE value: 'z' -Warning 1292 Truncated incorrect DOUBLE value: 'z' -Warning 1292 Truncated incorrect DOUBLE value: 'z' -set @my_save_join_cache_level= @@join_cache_level; -set @my_save_join_buffer_size= @@join_buffer_size; -set join_cache_level=6; -set join_buffer_size=1536; -SELECT count(*), sum(table1.col_int_key*table2.pk) -FROM -t2 AS table1, t1 AS table2, t2 AS table3 -WHERE -table3.col_varchar_nokey = table2.col_varchar_key AND table3.pk > table2.col_varchar_nokey ; -count(*) sum(table1.col_int_key*table2.pk) -240 185955 -drop table t1,t2; -set join_cache_level=@my_save_join_cache_level; -set join_buffer_size=@my_save_join_buffer_size; -# -# BUG#665669: Result differences on query re-execution -# -create table t1 (pk int primary key, b int, c int default 0, index idx(b)) engine=Tokudb; -insert into t1(pk,b) values (3, 30), (2, 20), (9, 90), (7, 70), (4, 40), (5, 50), (10, 100), (12, 120); -set @bug665669_tmp=@@optimizer_switch; -set optimizer_switch='mrr=off'; -explain select * from t1 where b > 1000; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range idx idx 5 NULL 1 Using where -# The following two must produce indentical results: -select * from t1 where pk < 2 or pk between 3 and 4; -pk b c -3 30 0 -4 40 0 -select * from t1 where pk < 2 or pk between 3 and 4; -pk b c -3 30 0 -4 40 0 -drop table t1; -set optimizer_switch = @bug665669_tmp; -# -# Bug#43360 - Server crash with a simple multi-table update -# -CREATE TABLE t1 ( -a CHAR(2) NOT NULL PRIMARY KEY, -b VARCHAR(20) NOT NULL, -KEY (b) -) ENGINE=TokuDB; -CREATE TABLE t2 ( -a CHAR(2) NOT NULL PRIMARY KEY, -b VARCHAR(20) NOT NULL, -KEY (b) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES -('AB','MySQLAB'), -('JA','Sun Microsystems'), -('MS','Microsoft'), -('IB','IBM- Inc.'), -('GO','Google Inc.'); -INSERT INTO t2 VALUES -('AB','Sweden'), -('JA','USA'), -('MS','United States of America'), -('IB','North America'), -('GO','South America'); -Warnings: -Warning 1265 Data truncated for column 'b' at row 3 -UPDATE t1,t2 SET t1.b=UPPER(t1.b) WHERE t1.b LIKE 'United%'; -SELECT * FROM t1; -a b -GO Google Inc. -IB IBM- Inc. -MS Microsoft -AB MySQLAB -JA Sun Microsystems -SELECT * FROM t2; -a b -IB North America -GO South America -AB Sweden -MS United States of Ame -JA USA -DROP TABLE t1,t2; -# -# Testcase backport: Bug#43249 -# (Note: Fixed by patch for BUG#42580) -# -CREATE TABLE t1(c1 TIME NOT NULL, c2 TIME NULL, c3 DATE, PRIMARY KEY(c1), UNIQUE INDEX(c2)) engine=Tokudb; -INSERT INTO t1 VALUES('8:29:45',NULL,'2009-02-01'); -SELECT * FROM t1 WHERE c2 <=> NULL ORDER BY c2 LIMIT 2; -c1 c2 c3 -08:29:45 NULL 2009-02-01 -SELECT * FROM t1 WHERE c2 <=> NULL ORDER BY c2 LIMIT 2; -c1 c2 c3 -08:29:45 NULL 2009-02-01 -drop table `t1`; -# -# BUG#707925: Wrong result with join_cache_level=6 optimizer_use_mrr = -# force (incremental, BKA join) -# -set @_save_join_cache_level= @@join_cache_level; -set join_cache_level = 6; -CREATE TABLE t1 ( -f1 int(11), f2 int(11), f3 varchar(1), f4 varchar(1), -PRIMARY KEY (f1), -KEY (f3), -KEY (f2) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES ('11','8','f','f'),('12','5','v','v'),('13','8','s','s'), -('14','8','a','a'),('15','6','p','p'),('16','7','z','z'),('17','2','a','a'), -('18','5','h','h'),('19','7','h','h'),('20','2','v','v'),('21','9','v','v'), -('22','142','b','b'),('23','3','y','y'),('24','0','v','v'),('25','3','m','m'), -('26','5','z','z'),('27','9','n','n'),('28','1','d','d'),('29','107','a','a'); -select count(*) from ( -SELECT alias1.f2 -FROM -t1 AS alias1 JOIN ( -t1 AS alias2 FORCE KEY (f3) JOIN -t1 AS alias3 FORCE KEY (f2) ON alias3.f2 = alias2.f2 AND alias3.f4 = alias2.f3 -) ON alias3.f1 <= alias2.f1 -) X; -count(*) -361 -set join_cache_level=@_save_join_cache_level; -set optimizer_switch= @innodb_mrr_tmp; -drop table t1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/suite.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/suite.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/suite.opt 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/suite.opt 2014-10-08 13:19:52.000000000 +0000 @@ -1 +1 @@ ---tokudb --plugin-load=$HA_TOKUDB_SO +--tokudb --plugin-load=$HA_TOKUDB_SO --loose-tokudb-check-jemalloc=0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/4676.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/4676.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/4676.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/4676.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ ---source include/have_partition.inc - -let $engine='tokudb'; - ---disable_warnings -DROP TABLE IF EXISTS t; ---enable_warnings - -eval CREATE TABLE t (a INT) ENGINE=$engine PARTITION BY KEY (a) (PARTITION part0, PARTITION part1); -SHOW CREATE TABLE t; - -ALTER TABLE t TRUNCATE PARTITION part0; -ALTER TABLE t TRUNCATE PARTITION part1; - -DROP TABLE IF EXISTS t; \ No newline at end of file diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/4677.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/4677.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/4677.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/4677.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,30 +0,0 @@ -let $engine='tokudb'; - ---disable_warnings -drop table if exists t; ---enable_warnings - -eval create table t (a int primary key) engine=$engine; - -connect (conn1,localhost,root,,); - -connection default; -begin; -insert into t values (1); -insert into t values (3); - -connection conn1; -begin; -insert into t values (2); -insert into t values (4); - -connection default; -commit; - -connection conn1; -commit; - -connection default; -disconnect conn1; - -drop table t; \ No newline at end of file diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/fractional_time.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/fractional_time.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/fractional_time.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/fractional_time.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -SET DEFAULT_STORAGE_ENGINE = 'tokudb'; - ---disable_warnings -DROP TABLE IF EXISTS foo; ---enable_warnings -create table foo (a timestamp(6), b timestamp(4), c timestamp(5), primary key (a))engine=tokudb; - -insert into foo values ('2010-12-10 14:12:09.123452', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123454', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123451', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123453', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); - -select * from foo; -explain select * from foo where a > '2010-12-10 14:12:09.123452'; -select * from foo where a > '2010-12-10 14:12:09.123452'; - -alter table foo change a a datetime(6), change b b datetime(4), change c c datetime(5); -show create table foo; -select * from foo; -explain select * from foo where a > '2010-12-10 14:12:09.123452'; -select * from foo where a > '2010-12-10 14:12:09.123452'; -drop table foo; - -create table foo (a time(6), b time(4), c time(5), primary key (a))engine=TokuDB; -insert into foo values ('14:12:09.123452', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123454', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123451', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123453', '14:12:09.123416', '14:12:09.123451'); - -select * from foo; -explain select * from foo where a > '14:12:09.123452'; -select * from foo where a > '14:12:09.123452'; - - -# Final cleanup. -DROP TABLE foo; \ No newline at end of file diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/mdev5932.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/mdev5932.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/mdev5932.test 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/mdev5932.test 2014-10-08 13:19:51.000000000 +0000 @@ -4,6 +4,7 @@ disable_warnings; drop table if exists t1,t2; drop table if exists t1i,t2i; +drop table if exists tsub,t3; enable_warnings; CREATE TABLE t1 (a CHAR(3), INDEX(a)) ENGINE=TokuDB; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_mixed_replace_into.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_mixed_replace_into.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_mixed_replace_into.test 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_mixed_replace_into.test 2014-10-08 13:19:51.000000000 +0000 @@ -1,6 +1,6 @@ source include/have_tokudb.inc; -source include/master-slave.inc; source include/have_binlog_format_mixed.inc; +source include/master-slave.inc; set default_storage_engine='tokudb'; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_row_replace_into.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_row_replace_into.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_row_replace_into.test 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_row_replace_into.test 2014-10-08 13:19:51.000000000 +0000 @@ -1,6 +1,6 @@ source include/have_tokudb.inc; -source include/master-slave.inc; source include/have_binlog_format_row.inc; +source include/master-slave.inc; set default_storage_engine='tokudb'; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_stmt_replace_into.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_stmt_replace_into.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_stmt_replace_into.test 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/rpl_stmt_replace_into.test 2014-10-08 13:19:51.000000000 +0000 @@ -1,6 +1,6 @@ source include/have_tokudb.inc; -source include/master-slave.inc; source include/have_binlog_format_statement.inc; +source include/master-slave.inc; set default_storage_engine='tokudb'; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/simple_icp.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/simple_icp.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/simple_icp.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/simple_icp.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,43 +0,0 @@ -#-- source include/have_tokudb.inc - ---disable_warnings -drop table if exists a,b,c,foo; ---enable_warnings - -create table a (a int auto_increment, primary key (a)) engine=TokuDB; -create table b (a int auto_increment, primary key (a)) engine=TokuDB; -create table c (a int auto_increment, primary key (a)) engine=TokuDB; - -insert into a values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); -insert into b values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); -insert into c values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); - -create table foo (a int, b int, c int, d int, e int, key(a,b,c)) engine=TokuDB; - -insert into foo (a,b,c) select * from a,b,c; - -flush status; -show status like '%Handler_read_next%'; -explain select * from foo where a between 5 and 6 and c=10; -select * from foo where a between 5 and 6 and c=10; -show status like '%Handler_read_next%'; - -flush status; -show status like '%Handler_read_prev%'; -explain select * from foo where a between 5 and 6 and c=10; -select * from foo where a between 5 and 6 and c=10 order by a desc; -show status like '%Handler_read_prev%'; - -flush status; -show status like '%Handler_read_prev%'; -explain select * from foo where a > 19 and c=10; -select * from foo where a > 19 and c=10 order by a desc; -show status like '%Handler_read_prev%'; - -flush status; -show status like '%Handler_read_next%'; -explain select * from foo where a > 19 and c=10; -select * from foo where a > 19 and c=10; -show status like '%Handler_read_next%'; - -drop table foo,a,b,c; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb718.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb718.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb718.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb718.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,13 @@ +# test DB-718, a crash caused by broken error handling in tokudb's fractal_tree_info information schema +source include/have_tokudb.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t; +enable_warnings; +create table t (id int primary key); +begin; +insert into t values (1),(2); +--error 34542 +select * from information_schema.tokudb_fractal_tree_info; +commit; +drop table t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr2.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr2.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr2.test 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr2.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,213 +0,0 @@ --- source include/have_maria.inc -# -# MRR/Maria tests. -# - ---disable_warnings -drop table if exists t1,t2,t3,t4; ---enable_warnings - -set @maria_mrr_tmp=@@optimizer_switch; -set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on'; - -set @mrr_buffer_size_save= @@mrr_buffer_size; - -set @save_storage_engine= @@storage_engine; -set storage_engine=TokuDB; - ---source include/mrr_tests.inc -set storage_engine= @save_storage_engine; - -set @@mrr_buffer_size= @mrr_buffer_size_save; - ---echo # ---echo # Crash in quick_range_seq_next() in maria-5.3-dsmrr-cpk with join_cache_level = {8,1} ---echo # -set @save_join_cache_level= @@join_cache_level; -SET SESSION join_cache_level = 8; -CREATE TABLE `t1` ( - `col_int_key` int(11) DEFAULT NULL, - `col_datetime_key` datetime DEFAULT NULL, - `col_varchar_key` varchar(1) DEFAULT NULL, - `col_varchar_nokey` varchar(1) DEFAULT NULL, - KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB DEFAULT CHARSET=latin1; -INSERT INTO `t1` VALUES (6,'2005-10-07 00:00:00','e','e'); -INSERT INTO `t1` VALUES (51,'2000-07-15 05:00:34','f','f'); -CREATE TABLE `t2` ( - `col_int_key` int(11) DEFAULT NULL, - `col_datetime_key` datetime DEFAULT NULL, - `col_varchar_key` varchar(1) DEFAULT NULL, - `col_varchar_nokey` varchar(1) DEFAULT NULL, - KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1; -INSERT INTO `t2` VALUES (2,'2004-10-11 18:13:16','w','w'); -INSERT INTO `t2` VALUES (2,'1900-01-01 00:00:00','d','d'); -SELECT table2 .`col_datetime_key` -FROM t2 JOIN ( t1 table2 JOIN t2 table3 ON table3 .`col_varchar_key` < table2 .`col_varchar_key` ) ON table3 .`col_varchar_nokey` ; - -drop table t1, t2; -set join_cache_level=@save_join_cache_level; - -# -# Bug #665049: index condition pushdown with Maria -# - -CREATE TABLE t1( - pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, - PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES - (1,9,'x'), (2,5,'g'), (3,1,'o'), (4,0,'g'), (5,1,'v'), - (6,190,'m'), (7,6,'x'), (8,3,'c'), (9,4,'z'), (10,3,'i'), - (11,186,'x'), (12,1,'g'), (13,8,'q'), (14,226,'m'), (15,133,'p'); - -CREATE TABLE t2( - pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, - PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t2 SELECT * FROM t1; -INSERT INTO t2 VALUES (77, 333, 'z'); - -CREATE TABLE t3( - pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, - PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t3 SELECT * FROM t1; -INSERT INTO t3 VALUES - (88, 442, 'y'), (99, 445, 'w'), (87, 442, 'z'), (98, 445, 'v'), (86, 442, 'x'), - (97, 445, 't'), (85, 442, 'b'), (96, 445, 'l'), (84, 442, 'a'), (95, 445, 'k'); - -set @save_join_cache_level=@@join_cache_level; -set join_cache_level=1; - -SELECT COUNT(t1.v) FROM t1, t2 IGNORE INDEX (idx), t3 IGNORE INDEX (idx) - WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -EXPLAIN -SELECT COUNT(t1.v) FROM t1, t2 IGNORE INDEX (idx), t3 IGNORE INDEX (idx) - WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; - -SELECT COUNT(t1.v) FROM t1, t2, t3 - WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -EXPLAIN - SELECT COUNT(t1.v) FROM t1, t2, t3 - WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; - -set join_cache_level=@save_join_cache_level; - -DROP TABLE t1,t2,t3; - ---echo # ---echo # BUG#671361: virtual int Mrr_ordered_index_reader::refill_buffer(): Assertion `!know_key_tuple_params ---echo # (works only on Maria because we need 1024-byte long key) ---echo # - -SET SESSION join_cache_level = 6; -SET SESSION join_buffer_size = 1024; -CREATE TABLE t1 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_varchar_1024_latin1_key varchar(1024) DEFAULT NULL, - PRIMARY KEY (pk), - KEY col_varchar_1024_latin1_key (col_varchar_1024_latin1_key) -) ENGINE=TokuDB; - -INSERT INTO t1 VALUES - (1,'z'), (2,'abcdefjhjkl'), (3,'in'), (4,'abcdefjhjkl'), (6,'abcdefjhjkl'), - (11,'zx'), (12,'abcdefjhjm'), (13,'jn'), (14,'abcdefjhjp'), (16,'abcdefjhjr'); - -CREATE TABLE t2 ( - col_varchar_10_latin1 varchar(10) DEFAULT NULL -) ENGINE=TokuDB; -INSERT INTO t2 VALUES ('foo'), ('foo'); - -EXPLAIN SELECT count(*) -FROM t1 AS table1, t2 AS table2 -WHERE - table1.col_varchar_1024_latin1_key = table2.col_varchar_10_latin1 AND table1.pk<>0 ; - -SELECT count(*) -FROM t1 AS table1, t2 AS table2 -WHERE - table1.col_varchar_1024_latin1_key = table2.col_varchar_10_latin1 AND table1.pk<>0 ; - -drop table t1, t2; - ---echo # ---echo # BUG#693747: Assertion multi_range_read.cc:908: int DsMrr_impl::dsmrr_init( ---echo # -set @_save_join_cache_level= @@join_cache_level; -set @_save_join_buffer_size= @@join_buffer_size; - -set join_cache_level=8; -set join_buffer_size=10240; - -CREATE TABLE t1 ( - f2 varchar(32) COLLATE latin1_swedish_ci, - f3 int(11), - f4 varchar(1024) COLLATE utf8_bin, - f5 varchar(1024) COLLATE latin1_bin, - KEY (f5) -) ENGINE=TokuDB; - ---echo # Fill the table with some data ---disable_query_log -INSERT IGNORE INTO t1 VALUES -('cueikuirqr','0','f4-data','hcueikuirqrzflno'),('her','0','f4-data','ehcueikuirqrzfln'), -('YKAOE','0','f4-data','qieehcueikuirqrz'),('youre','0','f4-data','nkqieehcueikuirq'), -('b','0','f4-data','the'),('MGUDG','0','f4-data','m'), -('UXAGU','0','f4-data','HZXVA'),('bwbgsnkqie','0','f4-data','something'), -('s','0','f4-data','slelfhjawbwbgsnk'),('the','0','f4-data','if'), -('TDLKE','0','f4-data','MGWNJ'),('do','0','f4-data','see'), -('why','0','f4-data','mean'),('THKCG','0','f4-data','YFLDY'), -('x','0','f4-data','e'),('yncitaeysb','0','f4-data','tgyncitaeysbgucs'), -('ZEOXX','0','f4-data','jawbwbgsnkqieehc'),('hjawbwbgsn','0','f4-data','fhjawbwbgsnkqiee'), -('all','0','f4-data','sbgucsgqslelfhja'),('the','0','f4-data','would'), -('mtgyncitae','0','f4-data','ISNQQ'),('KNCUI','0','f4-data','want'), -('is','0','f4-data','i'),('out','0','f4-data','jvcmjlmtgyncitae'), -('it','0','f4-data','you'),('LHDIH','0','f4-data','txmtxyjvcmjlmtgy'), -('z','0','f4-data','ntxmtxyjvcmjlmtg'),('vyhnmvgmcn','0','f4-data','AIGQK'), -('ytvyhnmvgm','0','f4-data','z'),('t','0','f4-data','on'), -('xqegbytvyh','0','f4-data','ixqegbytvyhnmvgm'),('WGVRU','0','f4-data','h'), -('b','0','f4-data','z'),('who','0','f4-data','gddixqegbytvy'), -('PMLFL','0','f4-data','vgmcntxmtxyjvcmj'),('back','0','f4-data','n'), -('i','0','f4-data','PZGUB'),('f','0','f4-data','the'), -('PNXVP','0','f4-data','v'),('MAKKL','0','f4-data','CGCWF'), -('RMDAV','0','f4-data','v'),('l','0','f4-data','n'), -('rhnoypgddi','0','f4-data','VIZNE'),('t','0','f4-data','a'), -('like','0','f4-data','JSHPZ'),('pskeywslmk','0','f4-data','q'), -('QZZJJ','0','f4-data','c'),('atlxepskey','0','f4-data','YJRMA'), -('YUVOU','0','f4-data','eywslmkdrhnoypgd'),('some','0','f4-data','r'), -('c','0','f4-data','her'),('o','0','f4-data','EMURT'), -('if','0','f4-data','had'),('when','0','f4-data','CLVWT'), -('blfufrcdjm','0','f4-data','IZCZN'),('vutblfufrc','0','f4-data','how'), -('why','0','f4-data','I'),('IXLYQ','0','f4-data','weuwuvutblfufrcd'), -('here','0','f4-data','m'),('ZOCTJ','0','f4-data','IDSFD'), -('kqsweuwuvu','0','f4-data','oh'),('ykqsweuwuv','0','f4-data','zykqsweuwuvutblf'), -('zezykqsweu','0','f4-data','t'),('q','0','f4-data','o'), -('IBKAU','0','f4-data','oh'),('ivjisuzezy','0','f4-data','XHXKE'), -('xsivjisuze','0','f4-data','plxsivjisuzezykq'),('have','0','f4-data','uvplxsivjisuzezy'), -('on','0','f4-data','me'),('ijkfuvplxs','0','f4-data','OGEHV'), -('u','0','f4-data','okay'),('i','0','f4-data','pajzbbojshnijkfu'), -('of','0','f4-data','g'),('for','0','f4-data','Im'), -('or','0','f4-data','ZOJHX'),('n','0','f4-data','you'), -('that','0','f4-data','just'),('bbojshnijk','0','f4-data','JYGSJ'), -('k','0','f4-data','y'),('k','0','f4-data','y'), -('be','0','f4-data','m'),('fnbmxwicrk','0','f4-data','t'), -('yaffpegvav','0','f4-data','have'),('crkdymahya','0','f4-data','QQWQI'), -('t','0','f4-data','hnijkfuvplxsivji'),('dgxpajzbbo','0','f4-data','vavdgxpajzbbojsh'), -('g','0','f4-data','pegvavdgxpajzbbo'),('Im','0','f4-data','ffpegvavdgxpajzb'); ---enable_query_log - - -SELECT alias2.* , alias1.f2 -FROM - t1 AS alias1 - LEFT JOIN t1 AS alias2 ON alias1.f2 = alias2.f5 -WHERE - alias2.f3 < 0; - -set join_cache_level=@_save_join_cache_level; -set join_buffer_size=@_save_join_buffer_size; -set optimizer_switch=@maria_mrr_tmp; - -drop table t1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr.test 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,462 +0,0 @@ -#-- source include/have_tokudb.inc - ---disable_warnings -drop table if exists t1,t2,t3,t4; ---enable_warnings - -set @save_storage_engine= @@storage_engine; -set storage_engine=TokuDB; - -set @innodb_mrr_tmp=@@optimizer_switch; -set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on'; - ---source include/mrr_tests.inc - -set storage_engine= @save_storage_engine; - -# Try big rowid sizes -set @mrr_buffer_size_save= @@mrr_buffer_size; -set mrr_buffer_size=64; - -# By default InnoDB will fill values only for key parts used by the query, -# which will cause DS-MRR to supply an invalid tuple on scan restoration. -# Verify that DS-MRR's code extra(HA_EXTRA_RETRIEVE_ALL_COLS) call has effect: -create table t1(a int); -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t2(a char(8), b char(8), c char(8), filler char(100), key(a,b,c) ) engine=TokuDB; - -insert into t2 select - concat('a-', 1000 + A.a, '-a'), - concat('b-', 1000 + B.a, '-b'), - concat('c-', 1000 + C.a, '-c'), - 'filler' -from t1 A, t1 B, t1 C; - -explain -select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a'; -select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a'; -drop table t2; - -# Try a very big rowid -create table t2 (a char(100), b char(100), c char(100), d int, - filler char(10), key(d), primary key (a,b,c)) engine= tokudb; -insert into t2 select A.a, B.a, B.a, A.a, 'filler' from t1 A, t1 B; ---replace_column 9 # -explain select * from t2 force index (d) where d < 10; -drop table t2; - -drop table t1; -set @@mrr_buffer_size= @mrr_buffer_size_save; - -# -# BUG#33033 "MySQL/InnoDB crashes with simple select range query" -# -create table t1 (f1 int not null, f2 int not null,f3 int not null, f4 char(1), primary key (f1,f2), key ix(f3))Engine=tokuDB; - ---disable_query_log -let $1=55; - -while ($1) -{ - eval insert into t1(f1,f2,f3,f4) values ($1,$1,$1,'A'); - dec $1; -} ---enable_query_log - -# The following must not crash: -select * from t1 where (f3>=5 and f3<=10) or (f3>=1 and f3<=4); - -drop table t1; - ---echo ---echo BUG#37977: Wrong result returned on GROUP BY + OR + innodb ---echo -CREATE TABLE t1 ( - `pk` int(11) NOT NULL AUTO_INCREMENT, - `int_nokey` int(11) NOT NULL, - `int_key` int(11) NOT NULL, - `date_key` date NOT NULL, - `date_nokey` date NOT NULL, - `time_key` time NOT NULL, - `time_nokey` time NOT NULL, - `datetime_key` datetime NOT NULL, - `datetime_nokey` datetime NOT NULL, - `varchar_key` varchar(5) DEFAULT NULL, - `varchar_nokey` varchar(5) DEFAULT NULL, - PRIMARY KEY (`pk`), - KEY `int_key` (`int_key`), - KEY `date_key` (`date_key`), - KEY `time_key` (`time_key`), - KEY `datetime_key` (`datetime_key`), - KEY `varchar_key` (`varchar_key`) -) ENGINE=TokuDB; - -INSERT INTO t1 VALUES -(1,5,5,'2009-10-16','2009-10-16','09:28:15','09:28:15','2007-09-14 05:34:08','2007-09-14 05:34:08','qk','qk'), -(2,6,6,'0000-00-00','0000-00-00','23:06:39','23:06:39','0000-00-00 00:00:00','0000-00-00 00:00:00','j','j'), -(3,10,10,'2000-12-18','2000-12-18','22:16:19','22:16:19','2006-11-04 15:42:50','2006-11-04 15:42:50','aew','aew'), -(4,0,0,'2001-09-18','2001-09-18','00:00:00','00:00:00','2004-03-23 13:23:35','2004-03-23 13:23:35',NULL,NULL), -(5,6,6,'2007-08-16','2007-08-16','22:13:38','22:13:38','2004-08-19 11:01:28','2004-08-19 11:01:28','qu','qu'); -select pk from t1 WHERE `varchar_key` > 'kr' group by pk; -select pk from t1 WHERE `int_nokey` IS NULL OR `varchar_key` > 'kr' group by pk; -drop table t1; - ---echo # ---echo # BUG#39447: Error with NOT NULL condition and LIMIT 1 ---echo # -CREATE TABLE t1 ( - id int(11) NOT NULL, - parent_id int(11) DEFAULT NULL, - name varchar(10) DEFAULT NULL, - PRIMARY KEY (id), - KEY ind_parent_id (parent_id) -) ENGINE=TokuDB; - -insert into t1 (id, parent_id, name) values -(10,NULL,'A'), -(20,10,'B'), -(30,10,'C'), -(40,NULL,'D'), -(50,40,'E'), -(60,40,'F'), -(70,NULL,'J'); - -SELECT id FROM t1 WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; ---echo This must show type=index, extra=Using where -explain SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; -SELECT * FROM t1 WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; -drop table t1; - - --- echo # --- echo # BUG#628785: multi_range_read.cc:430: int DsMrr_impl::dsmrr_init(): Assertion `do_sort_keys || do_rowid_fetch' failed --- echo # -set @save_join_cache_level= @@join_cache_level; -set @save_optimizer_switch= @@optimizer_switch; -SET SESSION join_cache_level=9; -SET SESSION optimizer_switch='mrr_sort_keys=off'; - -CREATE TABLE `t1` ( - `pk` int(11) NOT NULL AUTO_INCREMENT, - `col_int_nokey` int(11) DEFAULT NULL, - `col_int_key` int(11) DEFAULT NULL, - `col_varchar_key` varchar(1) DEFAULT NULL, - `col_varchar_nokey` varchar(1) DEFAULT NULL, - PRIMARY KEY (`pk`), - KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB AUTO_INCREMENT=101 DEFAULT CHARSET=latin1; -INSERT INTO `t1` VALUES (1,6,NULL,'r','r'); -INSERT INTO `t1` VALUES (2,8,0,'c','c'); -INSERT INTO `t1` VALUES (97,7,0,'z','z'); -INSERT INTO `t1` VALUES (98,1,1,'j','j'); -INSERT INTO `t1` VALUES (99,7,8,'c','c'); -INSERT INTO `t1` VALUES (100,2,5,'f','f'); -SELECT table1 .`col_varchar_key` -FROM t1 table1 STRAIGHT_JOIN ( t1 table3 JOIN t1 table4 ON table4 .`pk` = table3 .`col_int_nokey` ) ON table4 .`col_varchar_nokey` ; -DROP TABLE t1; -set join_cache_level=@save_join_cache_level; -set optimizer_switch=@save_optimizer_switch; - ---echo # ---echo # BUG#623300: Query with join_cache_level = 6 returns extra rows in maria-5.3-dsmrr-cpk ---echo # -CREATE TABLE t1 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_int_nokey int(11) DEFAULT NULL, - PRIMARY KEY (pk) -) ENGINE=TokuDB; - -INSERT INTO t1 VALUES (10,7); -INSERT INTO t1 VALUES (11,1); -INSERT INTO t1 VALUES (12,5); -INSERT INTO t1 VALUES (13,3); -INSERT INTO t1 VALUES (14,6); -INSERT INTO t1 VALUES (15,92); -INSERT INTO t1 VALUES (16,7); -INSERT INTO t1 VALUES (17,NULL); -INSERT INTO t1 VALUES (18,3); -INSERT INTO t1 VALUES (19,5); -INSERT INTO t1 VALUES (20,1); -INSERT INTO t1 VALUES (21,2); -INSERT INTO t1 VALUES (22,NULL); -INSERT INTO t1 VALUES (23,1); -INSERT INTO t1 VALUES (24,0); -INSERT INTO t1 VALUES (25,210); -INSERT INTO t1 VALUES (26,8); -INSERT INTO t1 VALUES (27,7); -INSERT INTO t1 VALUES (28,5); -INSERT INTO t1 VALUES (29,NULL); - -CREATE TABLE t2 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_int_nokey int(11) DEFAULT NULL, - PRIMARY KEY (pk) -) ENGINE=TokuDB; -INSERT INTO t2 VALUES (1,NULL); -INSERT INTO t2 VALUES (2,7); -INSERT INTO t2 VALUES (3,9); -INSERT INTO t2 VALUES (4,7); -INSERT INTO t2 VALUES (5,4); -INSERT INTO t2 VALUES (6,2); -INSERT INTO t2 VALUES (7,6); -INSERT INTO t2 VALUES (8,8); -INSERT INTO t2 VALUES (9,NULL); -INSERT INTO t2 VALUES (10,5); -INSERT INTO t2 VALUES (11,NULL); -INSERT INTO t2 VALUES (12,6); -INSERT INTO t2 VALUES (13,188); -INSERT INTO t2 VALUES (14,2); -INSERT INTO t2 VALUES (15,1); -INSERT INTO t2 VALUES (16,1); -INSERT INTO t2 VALUES (17,0); -INSERT INTO t2 VALUES (18,9); -INSERT INTO t2 VALUES (19,NULL); -INSERT INTO t2 VALUES (20,4); - -set @my_save_join_cache_level= @@join_cache_level; -SET join_cache_level = 0; - ---sorted_result -SELECT table2.col_int_nokey -FROM t1 table1 JOIN t2 table2 ON table2.pk = table1.col_int_nokey -WHERE table1.pk ; - -SET join_cache_level = 6; - ---sorted_result -SELECT table2.col_int_nokey -FROM t1 table1 JOIN t2 table2 ON table2.pk = table1.col_int_nokey -WHERE table1.pk ; - -set join_cache_level= @my_save_join_cache_level; -drop table t1, t2; - ---echo # ---echo # BUG#623315: Query returns less rows when run with join_cache_level=6 on maria-5.3-dsmrr-cpk ---echo # -CREATE TABLE t1 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_int_nokey int(11) DEFAULT NULL, - col_int_key int(11) DEFAULT NULL, - col_varchar_key varchar(1) DEFAULT NULL, - PRIMARY KEY (pk), - KEY col_int_key (col_int_key), - KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES (10,7,8,'v'); -INSERT INTO t1 VALUES (11,1,9,'r'); -INSERT INTO t1 VALUES (12,5,9,'a'); -INSERT INTO t1 VALUES (13,3,186,'m'); -INSERT INTO t1 VALUES (14,6,NULL,'y'); -INSERT INTO t1 VALUES (15,92,2,'j'); -INSERT INTO t1 VALUES (16,7,3,'d'); -INSERT INTO t1 VALUES (17,NULL,0,'z'); -INSERT INTO t1 VALUES (18,3,133,'e'); -INSERT INTO t1 VALUES (19,5,1,'h'); -INSERT INTO t1 VALUES (20,1,8,'b'); -INSERT INTO t1 VALUES (21,2,5,'s'); -INSERT INTO t1 VALUES (22,NULL,5,'e'); -INSERT INTO t1 VALUES (23,1,8,'j'); -INSERT INTO t1 VALUES (24,0,6,'e'); -INSERT INTO t1 VALUES (25,210,51,'f'); -INSERT INTO t1 VALUES (26,8,4,'v'); -INSERT INTO t1 VALUES (27,7,7,'x'); -INSERT INTO t1 VALUES (28,5,6,'m'); -INSERT INTO t1 VALUES (29,NULL,4,'c'); - -set @my_save_join_cache_level= @@join_cache_level; -SET join_cache_level=6; -select count(*) from -(SELECT table2.pk FROM - t1 LEFT JOIN t1 table2 JOIN t1 table3 ON table3.col_varchar_key = table2.col_varchar_key - ON table3.col_int_nokey) foo; - -SET join_cache_level=0; -select count(*) from -(SELECT table2.pk FROM - t1 LEFT JOIN t1 table2 JOIN t1 table3 ON table3.col_varchar_key = table2.col_varchar_key - ON table3.col_int_nokey) foo; - -set join_cache_level= @my_save_join_cache_level; -drop table t1; - - ---echo # ---echo # BUG#671340: Diverging results in with mrr_sort_keys=ON|OFF and join_cache_level=5 ---echo # -CREATE TABLE t1 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_int_key int(11) NOT NULL, - col_varchar_key varchar(1) NOT NULL, - col_varchar_nokey varchar(1) NOT NULL, - PRIMARY KEY (pk), - KEY col_int_key (col_int_key), - KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES - (10,8,'v','v'), - (11,8,'f','f'), - (12,5,'v','v'), - (13,8,'s','s'), - (14,8,'a','a'), - (15,6,'p','p'), - (16,7,'z','z'), - (17,2,'a','a'), - (18,5,'h','h'), - (19,7,'h','h'), - (20,2,'v','v'), - (21,9,'v','v'), - (22,142,'b','b'), - (23,3,'y','y'), - (24,0,'v','v'), - (25,3,'m','m'), - (26,5,'z','z'), - (27,9,'n','n'), - (28,1,'d','d'), - (29,107,'a','a'); - -CREATE TABLE t2 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_int_key int(11) NOT NULL, - col_varchar_key varchar(1) NOT NULL, - col_varchar_nokey varchar(1) NOT NULL, - PRIMARY KEY (pk), - KEY col_int_key (col_int_key), - KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t2 VALUES - (1,9,'x','x'), - (2,5,'g','g'), - (3,1,'o','o'), - (4,0,'g','g'), - (5,1,'v','v'), - (6,190,'m','m'), - (7,6,'x','x'), - (8,3,'c','c'), - (9,4,'z','z'), - (10,3,'i','i'), - (11,186,'x','x'), - (12,1,'g','g'), - (13,8,'q','q'), - (14,226,'m','m'), - (15,133,'p','p'), - (16,6,'e','e'), - (17,3,'t','t'), - (18,8,'j','j'), - (19,5,'h','h'), - (20,7,'w','w'); - -SELECT count(*), sum(table1.col_int_key*table2.pk) -FROM - t2 AS table1, t1 AS table2, t2 AS table3 -WHERE - table3.col_varchar_nokey = table2.col_varchar_key AND table3.pk > table2.col_varchar_nokey ; - -set @my_save_join_cache_level= @@join_cache_level; -set @my_save_join_buffer_size= @@join_buffer_size; -set join_cache_level=6; -set join_buffer_size=1536; ---disable_warnings -SELECT count(*), sum(table1.col_int_key*table2.pk) -FROM - t2 AS table1, t1 AS table2, t2 AS table3 -WHERE - table3.col_varchar_nokey = table2.col_varchar_key AND table3.pk > table2.col_varchar_nokey ; ---enable_warnings -drop table t1,t2; -set join_cache_level=@my_save_join_cache_level; -set join_buffer_size=@my_save_join_buffer_size; - - ---echo # ---echo # BUG#665669: Result differences on query re-execution ---echo # -create table t1 (pk int primary key, b int, c int default 0, index idx(b)) engine=Tokudb; -insert into t1(pk,b) values (3, 30), (2, 20), (9, 90), (7, 70), (4, 40), (5, 50), (10, 100), (12, 120); -set @bug665669_tmp=@@optimizer_switch; -set optimizer_switch='mrr=off'; -explain select * from t1 where b > 1000; ---echo # The following two must produce indentical results: -select * from t1 where pk < 2 or pk between 3 and 4; -select * from t1 where pk < 2 or pk between 3 and 4; -drop table t1; -set optimizer_switch = @bug665669_tmp; ---echo # ---echo # Bug#43360 - Server crash with a simple multi-table update ---echo # -CREATE TABLE t1 ( - a CHAR(2) NOT NULL PRIMARY KEY, - b VARCHAR(20) NOT NULL, - KEY (b) -) ENGINE=TokuDB; - -CREATE TABLE t2 ( - a CHAR(2) NOT NULL PRIMARY KEY, - b VARCHAR(20) NOT NULL, - KEY (b) -) ENGINE=TokuDB; - -INSERT INTO t1 VALUES -('AB','MySQLAB'), -('JA','Sun Microsystems'), -('MS','Microsoft'), -('IB','IBM- Inc.'), -('GO','Google Inc.'); - -INSERT INTO t2 VALUES -('AB','Sweden'), -('JA','USA'), -('MS','United States of America'), -('IB','North America'), -('GO','South America'); - -UPDATE t1,t2 SET t1.b=UPPER(t1.b) WHERE t1.b LIKE 'United%'; - -SELECT * FROM t1; - -SELECT * FROM t2; - -DROP TABLE t1,t2; - ---echo # ---echo # Testcase backport: Bug#43249 ---echo # (Note: Fixed by patch for BUG#42580) ---echo # -CREATE TABLE t1(c1 TIME NOT NULL, c2 TIME NULL, c3 DATE, PRIMARY KEY(c1), UNIQUE INDEX(c2)) engine=Tokudb; -INSERT INTO t1 VALUES('8:29:45',NULL,'2009-02-01'); -# first time, good results: -SELECT * FROM t1 WHERE c2 <=> NULL ORDER BY c2 LIMIT 2; -# second time, bad results: -SELECT * FROM t1 WHERE c2 <=> NULL ORDER BY c2 LIMIT 2; -drop table `t1`; - ---echo # ---echo # BUG#707925: Wrong result with join_cache_level=6 optimizer_use_mrr = ---echo # force (incremental, BKA join) ---echo # -set @_save_join_cache_level= @@join_cache_level; -set join_cache_level = 6; -CREATE TABLE t1 ( - f1 int(11), f2 int(11), f3 varchar(1), f4 varchar(1), - PRIMARY KEY (f1), - KEY (f3), - KEY (f2) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES ('11','8','f','f'),('12','5','v','v'),('13','8','s','s'), -('14','8','a','a'),('15','6','p','p'),('16','7','z','z'),('17','2','a','a'), -('18','5','h','h'),('19','7','h','h'),('20','2','v','v'),('21','9','v','v'), -('22','142','b','b'),('23','3','y','y'),('24','0','v','v'),('25','3','m','m'), -('26','5','z','z'),('27','9','n','n'),('28','1','d','d'),('29','107','a','a'); - -select count(*) from ( - SELECT alias1.f2 - FROM - t1 AS alias1 JOIN ( - t1 AS alias2 FORCE KEY (f3) JOIN - t1 AS alias3 FORCE KEY (f2) ON alias3.f2 = alias2.f2 AND alias3.f4 = alias2.f3 - ) ON alias3.f1 <= alias2.f1 -) X; - -set join_cache_level=@_save_join_cache_level; -set optimizer_switch= @innodb_mrr_tmp; -drop table t1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/xa-3.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/xa-3.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/xa-3.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/xa-3.test 2014-10-08 13:19:51.000000000 +0000 @@ -1,6 +1,7 @@ -- source include/have_innodb.inc -- source include/have_tokudb.inc -- source include/have_debug.inc +-- source include/not_embedded.inc --disable_warnings drop table if exists t1, t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/xa-4.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/xa-4.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_bugs/t/xa-4.test 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_bugs/t/xa-4.test 2014-10-08 13:19:51.000000000 +0000 @@ -1,6 +1,7 @@ -- source include/have_innodb.inc -- source include/have_tokudb.inc -- source include/have_debug.inc +-- source include/not_embedded.inc --disable_warnings drop table if exists t1, t2; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_mariadb/r/autoinc.result mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_mariadb/r/autoinc.result --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_mariadb/r/autoinc.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_mariadb/r/autoinc.result 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,36 @@ +create table t1 (a int auto_increment, b bigint(20), primary key (b,a)) engine=tokudb; +start transaction; +insert t1 (b) values (1); +set tokudb_lock_timeout=1; +insert t1 (b) values (1); +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +set tokudb_lock_timeout=default; +insert t1 (b) values (1); +insert t1 (b) values (1); +commit; +commit; +select * from t1; +a b +1 1 +2 1 +3 1 +alter table t1 partition by range (b) (partition p0 values less than (9)); +start transaction; +insert t1 (b) values (2); +set tokudb_lock_timeout=1; +insert t1 (b) values (2); +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +set tokudb_lock_timeout=default; +insert t1 (b) values (2); +insert t1 (b) values (2); +commit; +commit; +select * from t1; +a b +1 1 +2 1 +3 1 +1 2 +2 2 +3 2 +drop table t1; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_mariadb/suite.opt mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_mariadb/suite.opt --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_mariadb/suite.opt 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_mariadb/suite.opt 2014-10-08 13:19:52.000000000 +0000 @@ -1 +1 @@ ---tokudb --plugin-load=$HA_TOKUDB_SO +--tokudb --plugin-load=$HA_TOKUDB_SO --loose-tokudb-check-jemalloc=0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_mariadb/t/autoinc.test mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_mariadb/t/autoinc.test --- mariadb-5.5-5.5.39/storage/tokudb/mysql-test/tokudb_mariadb/t/autoinc.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/mysql-test/tokudb_mariadb/t/autoinc.test 2014-10-08 13:19:51.000000000 +0000 @@ -0,0 +1,51 @@ +# +# MDEV-6605 Multiple Clients Inserting Causing Error: Failed to read auto-increment value from storage engine +# + +--source include/have_partition.inc +create table t1 (a int auto_increment, b bigint(20), primary key (b,a)) engine=tokudb; + +# first, without partitions +start transaction; +insert t1 (b) values (1); + +--connect(con2,localhost,root) +set tokudb_lock_timeout=1; +# auto-inc value is locked +--error ER_LOCK_WAIT_TIMEOUT +insert t1 (b) values (1); +# but no deadlock +set tokudb_lock_timeout=default; +--send insert t1 (b) values (1) +--connection default +insert t1 (b) values (1); +commit; +--connection con2 +--reap +commit; +select * from t1; + +# now with partitions +--connection default +alter table t1 partition by range (b) (partition p0 values less than (9)); +start transaction; +insert t1 (b) values (2); + +--connection con2 +set tokudb_lock_timeout=1; +# auto-inc value is locked +--error ER_LOCK_WAIT_TIMEOUT +insert t1 (b) values (2); +# but no deadlock +set tokudb_lock_timeout=default; +--send insert t1 (b) values (2) +--connection default +insert t1 (b) values (2); +commit; +--connection con2 +--reap +commit; +select * from t1; + +drop table t1; + diff -Nru mariadb-5.5-5.5.39/storage/tokudb/README.md mariadb-5.5-5.5.40/storage/tokudb/README.md --- mariadb-5.5-5.5.39/storage/tokudb/README.md 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/README.md 2014-10-08 13:19:51.000000000 +0000 @@ -1,17 +1,17 @@ TokuDB ====== -TokuDB is a high-performance, transactional storage engine for MySQL and +TokuDB is a high-performance, write optimized, transactional storage engine for MySQL and MariaDB. For more details, see our [product page][products]. -This repository contains the MySQL plugin that uses the [TokuKV][tokukv] +This repository contains the MySQL plugin that uses the [TokuFT][tokuft] core. There are also patches to the MySQL and MariaDB kernels, available in our forks of [mysql][mysql] and [mariadb][mariadb]. [products]: http://www.tokutek.com/products/tokudb-for-mysql/ -[tokukv]: http://github.com/Tokutek/ft-index +[tokuft]: http://github.com/Tokutek/ft-index [mysql]: http://github.com/Tokutek/mysql [mariadb]: http://github.com/Tokutek/mariadb @@ -24,14 +24,14 @@ engine, called `make.mysql.bash`. This script will download copies of the needed source code from github and build everything. -To build MySQL 5.5.37 with TokuDB 7.1.6: +To build MySQL 5.5.38 with TokuDB 7.1.7: ```sh -scripts/make.mysql.bash --mysqlbuild=mysql-5.5.37-tokudb-7.1.6-linux-x86_64 +scripts/make.mysql.bash --mysqlbuild=mysql-5.5.38-tokudb-7.1.7-linux-x86_64 ``` -To build MariaDB 5.5.37 with TokuDB 7.1.6: +To build MariaDB 5.5.38 with TokuDB 7.1.7: ```sh -scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.37-tokudb-7.1.6-linux-x86_64 +scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.38-tokudb-7.1.7-linux-x86_64 ``` Before you start, make sure you have a C++11-compatible compiler (GCC >= diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/atc_ontime_create_covered.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/atc_ontime_create_covered.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/atc_ontime_create_covered.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/atc_ontime_create_covered.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,103 +0,0 @@ -CREATE TABLE `ontime` ( - `Year` year(4) DEFAULT NULL, - `Quarter` tinyint(4) DEFAULT NULL, - `Month` tinyint(4) DEFAULT NULL, - `DayofMonth` tinyint(4) DEFAULT NULL, - `DayOfWeek` tinyint(4) DEFAULT NULL, - `FlightDate` date DEFAULT NULL, - `UniqueCarrier` char(7) DEFAULT NULL, - `AirlineID` int(11) DEFAULT NULL, - `Carrier` char(2) DEFAULT NULL, - `TailNum` varchar(50) DEFAULT NULL, - `FlightNum` varchar(10) DEFAULT NULL, - `Origin` char(5) DEFAULT NULL, - `OriginCityName` varchar(100) DEFAULT NULL, - `OriginState` char(2) DEFAULT NULL, - `OriginStateFips` varchar(10) DEFAULT NULL, - `OriginStateName` varchar(100) DEFAULT NULL, - `OriginWac` int(11) DEFAULT NULL, - `Dest` char(5) DEFAULT NULL, - `DestCityName` varchar(100) DEFAULT NULL, - `DestState` char(2) DEFAULT NULL, - `DestStateFips` varchar(10) DEFAULT NULL, - `DestStateName` varchar(100) DEFAULT NULL, - `DestWac` int(11) DEFAULT NULL, - `CRSDepTime` int(11) DEFAULT NULL, - `DepTime` int(11) DEFAULT NULL, - `DepDelay` int(11) DEFAULT NULL, - `DepDelayMinutes` int(11) DEFAULT NULL, - `DepDel15` int(11) DEFAULT NULL, - `DepartureDelayGroups` int(11) DEFAULT NULL, - `DepTimeBlk` varchar(20) DEFAULT NULL, - `TaxiOut` int(11) DEFAULT NULL, - `WheelsOff` int(11) DEFAULT NULL, - `WheelsOn` int(11) DEFAULT NULL, - `TaxiIn` int(11) DEFAULT NULL, - `CRSArrTime` int(11) DEFAULT NULL, - `ArrTime` int(11) DEFAULT NULL, - `ArrDelay` int(11) DEFAULT NULL, - `ArrDelayMinutes` int(11) DEFAULT NULL, - `ArrDel15` int(11) DEFAULT NULL, - `ArrivalDelayGroups` int(11) DEFAULT NULL, - `ArrTimeBlk` varchar(20) DEFAULT NULL, - `Cancelled` tinyint(4) DEFAULT NULL, - `CancellationCode` char(1) DEFAULT NULL, - `Diverted` tinyint(4) DEFAULT NULL, - `CRSElapsedTime` INT(11) DEFAULT NULL, - `ActualElapsedTime` INT(11) DEFAULT NULL, - `AirTime` INT(11) DEFAULT NULL, - `Flights` INT(11) DEFAULT NULL, - `Distance` INT(11) DEFAULT NULL, - `DistanceGroup` TINYINT(4) DEFAULT NULL, - `CarrierDelay` INT(11) DEFAULT NULL, - `WeatherDelay` INT(11) DEFAULT NULL, - `NASDelay` INT(11) DEFAULT NULL, - `SecurityDelay` INT(11) DEFAULT NULL, - `LateAircraftDelay` INT(11) DEFAULT NULL, - `FirstDepTime` varchar(10) DEFAULT NULL, - `TotalAddGTime` varchar(10) DEFAULT NULL, - `LongestAddGTime` varchar(10) DEFAULT NULL, - `DivAirportLandings` varchar(10) DEFAULT NULL, - `DivReachedDest` varchar(10) DEFAULT NULL, - `DivActualElapsedTime` varchar(10) DEFAULT NULL, - `DivArrDelay` varchar(10) DEFAULT NULL, - `DivDistance` varchar(10) DEFAULT NULL, - `Div1Airport` varchar(10) DEFAULT NULL, - `Div1WheelsOn` varchar(10) DEFAULT NULL, - `Div1TotalGTime` varchar(10) DEFAULT NULL, - `Div1LongestGTime` varchar(10) DEFAULT NULL, - `Div1WheelsOff` varchar(10) DEFAULT NULL, - `Div1TailNum` varchar(10) DEFAULT NULL, - `Div2Airport` varchar(10) DEFAULT NULL, - `Div2WheelsOn` varchar(10) DEFAULT NULL, - `Div2TotalGTime` varchar(10) DEFAULT NULL, - `Div2LongestGTime` varchar(10) DEFAULT NULL, - `Div2WheelsOff` varchar(10) DEFAULT NULL, - `Div2TailNum` varchar(10) DEFAULT NULL, - `Div3Airport` varchar(10) DEFAULT NULL, - `Div3WheelsOn` varchar(10) DEFAULT NULL, - `Div3TotalGTime` varchar(10) DEFAULT NULL, - `Div3LongestGTime` varchar(10) DEFAULT NULL, - `Div3WheelsOff` varchar(10) DEFAULT NULL, - `Div3TailNum` varchar(10) DEFAULT NULL, - `Div4Airport` varchar(10) DEFAULT NULL, - `Div4WheelsOn` varchar(10) DEFAULT NULL, - `Div4TotalGTime` varchar(10) DEFAULT NULL, - `Div4LongestGTime` varchar(10) DEFAULT NULL, - `Div4WheelsOff` varchar(10) DEFAULT NULL, - `Div4TailNum` varchar(10) DEFAULT NULL, - `Div5Airport` varchar(10) DEFAULT NULL, - `Div5WheelsOn` varchar(10) DEFAULT NULL, - `Div5TotalGTime` varchar(10) DEFAULT NULL, - `Div5LongestGTime` varchar(10) DEFAULT NULL, - `Div5WheelsOff` varchar(10) DEFAULT NULL, - `Div5TailNum` varchar(10) DEFAULT NULL, - KEY `Year` (`Year`,`Month`), - KEY `Year_2` (`Year`,`DayOfWeek`), - KEY `Year_3` (`Year`,`DepDelay`,`DayOfWeek`), - KEY `DayOfWeek` (`DayOfWeek`,`Year`,`DepDelay`), - KEY `Year_4` (`Year`,`DepDelay`,`Origin`,`Carrier`), - KEY `DepDelay` (`DepDelay`,`Year`), - KEY `Year_5` (`Year`,`DestCityName`,`OriginCityName`), - KEY `DestCityName` (`DestCityName`,`OriginCityName`,`Year`) -) ENGINE=TOKUDB; \ No newline at end of file diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/atc_ontime_create.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/atc_ontime_create.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/atc_ontime_create.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/atc_ontime_create.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,95 +0,0 @@ -CREATE TABLE `ontime` ( - `Year` year(4) DEFAULT NULL, - `Quarter` tinyint(4) DEFAULT NULL, - `Month` tinyint(4) DEFAULT NULL, - `DayofMonth` tinyint(4) DEFAULT NULL, - `DayOfWeek` tinyint(4) DEFAULT NULL, - `FlightDate` date DEFAULT NULL, - `UniqueCarrier` char(7) DEFAULT NULL, - `AirlineID` int(11) DEFAULT NULL, - `Carrier` char(2) DEFAULT NULL, - `TailNum` varchar(50) DEFAULT NULL, - `FlightNum` varchar(10) DEFAULT NULL, - `Origin` char(5) DEFAULT NULL, - `OriginCityName` varchar(100) DEFAULT NULL, - `OriginState` char(2) DEFAULT NULL, - `OriginStateFips` varchar(10) DEFAULT NULL, - `OriginStateName` varchar(100) DEFAULT NULL, - `OriginWac` int(11) DEFAULT NULL, - `Dest` char(5) DEFAULT NULL, - `DestCityName` varchar(100) DEFAULT NULL, - `DestState` char(2) DEFAULT NULL, - `DestStateFips` varchar(10) DEFAULT NULL, - `DestStateName` varchar(100) DEFAULT NULL, - `DestWac` int(11) DEFAULT NULL, - `CRSDepTime` int(11) DEFAULT NULL, - `DepTime` int(11) DEFAULT NULL, - `DepDelay` int(11) DEFAULT NULL, - `DepDelayMinutes` int(11) DEFAULT NULL, - `DepDel15` int(11) DEFAULT NULL, - `DepartureDelayGroups` int(11) DEFAULT NULL, - `DepTimeBlk` varchar(20) DEFAULT NULL, - `TaxiOut` int(11) DEFAULT NULL, - `WheelsOff` int(11) DEFAULT NULL, - `WheelsOn` int(11) DEFAULT NULL, - `TaxiIn` int(11) DEFAULT NULL, - `CRSArrTime` int(11) DEFAULT NULL, - `ArrTime` int(11) DEFAULT NULL, - `ArrDelay` int(11) DEFAULT NULL, - `ArrDelayMinutes` int(11) DEFAULT NULL, - `ArrDel15` int(11) DEFAULT NULL, - `ArrivalDelayGroups` int(11) DEFAULT NULL, - `ArrTimeBlk` varchar(20) DEFAULT NULL, - `Cancelled` tinyint(4) DEFAULT NULL, - `CancellationCode` char(1) DEFAULT NULL, - `Diverted` tinyint(4) DEFAULT NULL, - `CRSElapsedTime` INT(11) DEFAULT NULL, - `ActualElapsedTime` INT(11) DEFAULT NULL, - `AirTime` INT(11) DEFAULT NULL, - `Flights` INT(11) DEFAULT NULL, - `Distance` INT(11) DEFAULT NULL, - `DistanceGroup` TINYINT(4) DEFAULT NULL, - `CarrierDelay` INT(11) DEFAULT NULL, - `WeatherDelay` INT(11) DEFAULT NULL, - `NASDelay` INT(11) DEFAULT NULL, - `SecurityDelay` INT(11) DEFAULT NULL, - `LateAircraftDelay` INT(11) DEFAULT NULL, - `FirstDepTime` varchar(10) DEFAULT NULL, - `TotalAddGTime` varchar(10) DEFAULT NULL, - `LongestAddGTime` varchar(10) DEFAULT NULL, - `DivAirportLandings` varchar(10) DEFAULT NULL, - `DivReachedDest` varchar(10) DEFAULT NULL, - `DivActualElapsedTime` varchar(10) DEFAULT NULL, - `DivArrDelay` varchar(10) DEFAULT NULL, - `DivDistance` varchar(10) DEFAULT NULL, - `Div1Airport` varchar(10) DEFAULT NULL, - `Div1WheelsOn` varchar(10) DEFAULT NULL, - `Div1TotalGTime` varchar(10) DEFAULT NULL, - `Div1LongestGTime` varchar(10) DEFAULT NULL, - `Div1WheelsOff` varchar(10) DEFAULT NULL, - `Div1TailNum` varchar(10) DEFAULT NULL, - `Div2Airport` varchar(10) DEFAULT NULL, - `Div2WheelsOn` varchar(10) DEFAULT NULL, - `Div2TotalGTime` varchar(10) DEFAULT NULL, - `Div2LongestGTime` varchar(10) DEFAULT NULL, - `Div2WheelsOff` varchar(10) DEFAULT NULL, - `Div2TailNum` varchar(10) DEFAULT NULL, - `Div3Airport` varchar(10) DEFAULT NULL, - `Div3WheelsOn` varchar(10) DEFAULT NULL, - `Div3TotalGTime` varchar(10) DEFAULT NULL, - `Div3LongestGTime` varchar(10) DEFAULT NULL, - `Div3WheelsOff` varchar(10) DEFAULT NULL, - `Div3TailNum` varchar(10) DEFAULT NULL, - `Div4Airport` varchar(10) DEFAULT NULL, - `Div4WheelsOn` varchar(10) DEFAULT NULL, - `Div4TotalGTime` varchar(10) DEFAULT NULL, - `Div4LongestGTime` varchar(10) DEFAULT NULL, - `Div4WheelsOff` varchar(10) DEFAULT NULL, - `Div4TailNum` varchar(10) DEFAULT NULL, - `Div5Airport` varchar(10) DEFAULT NULL, - `Div5WheelsOn` varchar(10) DEFAULT NULL, - `Div5TotalGTime` varchar(10) DEFAULT NULL, - `Div5LongestGTime` varchar(10) DEFAULT NULL, - `Div5WheelsOff` varchar(10) DEFAULT NULL, - `Div5TailNum` varchar(10) DEFAULT NULL -) ENGINE=TOKUDB; \ No newline at end of file diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/nodistinct.q8.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/nodistinct.q8.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/nodistinct.q8.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/nodistinct.q8.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -# Q8: As final I tested most popular destination in sense count of direct connected cities for different diapason of years. -SELECT DestCityName, COUNT( OriginCityName) FROM ontime WHERE Year BETWEEN 2006 and 2007 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q0.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q0.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q0.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q0.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -avg(c1) -485021.3730 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q0.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q0.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q0.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q0.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -select avg(c1) from (select year,month,count(*) as c1 from ontime group by YEAR,month) t; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q1.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q1.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q1.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q1.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -DayOfWeek c -5 8732424 -1 8730614 -4 8710843 -3 8685626 -2 8639632 -7 8274367 -6 7514194 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q1.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q1.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q1.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q1.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DayOfWeek, count(*) AS c FROM ontime WHERE Year BETWEEN 2000 AND 2008 GROUP BY DayOfWeek ORDER BY c DESC; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q2.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q2.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q2.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q2.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -DayOfWeek c -5 2088300 -4 1918325 -1 1795120 -7 1782292 -3 1640798 -2 1538291 -6 1391984 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q2.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q2.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q2.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q2.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DayOfWeek, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year BETWEEN 2000 AND 2008 GROUP BY DayOfWeek ORDER BY c DESC; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q3.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q3.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q3.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q3.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -Origin c -ORD 846692 -ATL 822955 -DFW 601318 -LAX 391247 -PHX 391191 -LAS 351713 -DEN 345108 -EWR 292916 -DTW 289233 -IAH 283861 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q3.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q3.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q3.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q3.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT Origin, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year BETWEEN 2000 AND 2008 GROUP BY Origin ORDER BY c DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q4.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q4.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q4.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q4.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -carrier count(*) -WN 296293 -AA 176203 -MQ 145630 -US 135987 -UA 128174 -OO 127426 -EV 101796 -XE 99915 -DL 93675 -NW 90429 -CO 76662 -YV 67905 -FL 59460 -OH 59034 -B6 50740 -9E 46948 -AS 42830 -F9 23035 -AQ 4299 -HA 2746 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q4.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q4.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q4.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q4.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY carrier ORDER BY 2 DESC; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q5.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q5.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q5.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q5.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -carrier c c2 c3 -EV 101796 286234 355.6391 -US 135987 485447 280.1274 -AA 176203 633857 277.9854 -MQ 145630 540494 269.4387 -AS 42830 160185 267.3783 -B6 50740 191450 265.0300 -UA 128174 490002 261.5785 -WN 296293 1168871 253.4865 -OH 59034 236032 250.1102 -CO 76662 323151 237.2327 -F9 23035 97760 235.6281 -YV 67905 294362 230.6853 -XE 99915 434773 229.8096 -FL 59460 263159 225.9471 -NW 90429 414526 218.1504 -OO 127426 597880 213.1297 -DL 93675 475889 196.8421 -9E 46948 258851 181.3707 -AQ 4299 46360 92.7308 -HA 2746 56175 48.8830 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q5.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q5.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q5.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q5.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT t.carrier, c, c2, c*1000/c2 as c3 FROM (SELECT carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY carrier) t JOIN (SELECT carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY carrier) t2 ON (t.Carrier=t2.Carrier) ORDER BY c3 DESC; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q6.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q6.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q6.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q6.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -carrier c c2 c3 -UA 1096646 490002 2238.0439 -AS 354145 160185 2210.8500 -DL 1050448 475889 2207.3383 -AA 1276555 633857 2013.9479 -US 909154 485447 1872.8182 -WN 2165483 1168871 1852.6279 -NW 725076 414526 1749.1689 -MQ 876799 540494 1622.2178 -CO 522219 323151 1616.0216 -EV 461050 286234 1610.7451 -OH 301681 236032 1278.1360 -FL 298916 263159 1135.8760 -B6 197249 191450 1030.2899 -OO 556247 597880 930.3656 -F9 72150 97760 738.0319 -YV 198787 294362 675.3147 -XE 233488 434773 537.0343 -AQ 17239 46360 371.8507 -9E 89391 258851 345.3377 -HA 15968 56175 284.2546 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q6.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q6.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q6.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q6.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT t.carrier, c, c2, c*1000/c2 as c3 FROM (SELECT carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year BETWEEN 2000 and 2008 GROUP BY carrier) t JOIN (SELECT carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY carrier) t2 ON (t.Carrier=t2.Carrier) ORDER BY c3 DESC; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q7.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q7.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q7.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q7.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -Year c1/c2 -1988 166.1709 -1989 199.5009 -1990 166.4513 -1991 147.2163 -1992 146.7543 -1993 154.2498 -1994 165.6803 -1995 193.9344 -1996 221.8281 -1997 191.6513 -1998 193.5638 -1999 200.8742 -2000 231.7167 -2001 189.0581 -2002 162.3769 -2003 150.2455 -2004 192.4838 -2005 207.5929 -2006 231.5599 -2007 245.3487 -2008 219.9228 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q7.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q7.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q7.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q7.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT t.Year, c1/c2 FROM (select Year,count(*)*1000 as c1 from ontime WHERE DepDelay>10 GROUP BY Year) t JOIN (select Year,count(*) as c2 from ontime GROUP BY Year) t2 ON (t.Year=t2.Year); diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.10y.destcityname.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.10y.destcityname.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.10y.destcityname.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.10y.destcityname.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -DestCityName COUNT( DISTINCT OriginCityName) -Atlanta, GA 190 -Chicago, IL 159 -Dallas/Ft.Worth, TX 151 -Cincinnati, OH 139 -Minneapolis, MN 131 -Houston, TX 127 -Detroit, MI 121 -Denver, CO 120 -Salt Lake City, UT 116 -New York, NY 111 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.10y.destcityname.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.10y.destcityname.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.10y.destcityname.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.10y.destcityname.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime FORCE INDEX(DestCityName) WHERE Year BETWEEN 1999 and 2009 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.10y.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.10y.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.10y.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.10y.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -DestCityName COUNT( DISTINCT OriginCityName) -Atlanta, GA 190 -Chicago, IL 159 -Dallas/Ft.Worth, TX 151 -Cincinnati, OH 139 -Minneapolis, MN 131 -Houston, TX 127 -Detroit, MI 121 -Denver, CO 120 -Salt Lake City, UT 116 -New York, NY 111 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.10y.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.10y.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.10y.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.10y.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 1999 and 2009 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.1y.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.1y.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.1y.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.1y.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 1999 and 1999 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.1y.year5.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.1y.year5.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.1y.year5.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.1y.year5.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime USE INDEX(year_5) WHERE Year BETWEEN 1999 and 1999 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.2y.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.2y.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.2y.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.2y.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 1999 and 2000 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.3y.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.3y.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.3y.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.3y.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 1999 and 2001 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.4y.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.4y.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.4y.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.4y.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 1999 and 2002 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -DestCityName COUNT( DISTINCT OriginCityName) -Atlanta, GA 183 -Chicago, IL 147 -Dallas/Ft.Worth, TX 133 -Cincinnati, OH 129 -Minneapolis, MN 128 -Houston, TX 114 -Detroit, MI 112 -Denver, CO 111 -Salt Lake City, UT 108 -New York, NY 101 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime WHERE Year BETWEEN 2006 and 2007 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.year5.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.year5.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q8.year5.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q8.year5.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SELECT DestCityName, COUNT( DISTINCT OriginCityName) FROM ontime use index(year_5) WHERE Year BETWEEN 2006 and 2007 GROUP BY DestCityName ORDER BY 2 DESC LIMIT 10; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q9.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q9.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q9.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q9.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -year c1 -1988 5202096 -1989 5041200 -1990 5270893 -1991 5076925 -1992 5092157 -1993 5070501 -1994 5180048 -1995 5327435 -1996 5351983 -1997 5411843 -1998 5384721 -1999 5527884 -2000 5683047 -2001 5967780 -2002 5271359 -2003 6488540 -2004 7129270 -2005 7140596 -2006 7141922 -2007 7455458 -2008 7009728 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q9.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q9.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/q9.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/q9.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -select year,count(*) as c1 from ontime group by YEAR; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/qcount.main.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/qcount.main.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/qcount.main.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/qcount.main.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -select count(*) from ontime use index(); - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/qcount.result mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/qcount.result --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/qcount.result 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/qcount.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -count(*) -122225386 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/qcount.sql mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/qcount.sql --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.ontime/qcount.sql 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.ontime/qcount.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -select count(*) from ontime; diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.readme mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.readme --- mariadb-5.5-5.5.39/storage/tokudb/scripts/atc.readme 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/atc.readme 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ -The script to run the load the air traffic ontime database and run queries against it -is called run.atc.ontime.bas. - -The queries are in the tokudb-engine/scripts/atc.ontime directory. - -The data for the ontime database is in the amazon s3 bucket called tokutek-mysql-data. - -$ s3ls -l tokutek-mysql-data --prefix=atc_On_Time_Performance -2010-06-15T13:07:09.000Z 1073741824 atc_On_Time_Performance.mysql.csv.gz.aa -2010-06-15T13:08:19.000Z 1073741824 atc_On_Time_Performance.mysql.csv.gz.ab -2010-06-15T13:09:38.000Z 1073741824 atc_On_Time_Performance.mysql.csv.gz.ac -2010-06-15T13:10:54.000Z 446709742 atc_On_Time_Performance.mysql.csv.gz.ad -2010-06-15T13:11:26.000Z 503 atc_On_Time_Performance.mysql.csv.gz.xml - -The raw data is also stored in the amazon s3 bucket called tokutek-mysql-data. - -$ s3ls -l tokutek-mysql-data --prefix=atc - - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/make.mysql.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/make.mysql.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/make.mysql.bash 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/make.mysql.bash 2014-10-08 13:19:52.000000000 +0000 @@ -52,7 +52,7 @@ mysql_tree= tokudbengine_tree= ftindex_tree= -jemalloc_version=3.3.0 +jemalloc_version=3.6.0 jemalloc_tree= backup_tree= diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/make.mysql.debug.env.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/make.mysql.debug.env.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/make.mysql.debug.env.bash 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/make.mysql.debug.env.bash 2014-10-08 13:19:52.000000000 +0000 @@ -57,7 +57,7 @@ mysql=mysql-5.5 mysql_tree=mysql-5.5.35 jemalloc=jemalloc -jemalloc_tree=3.3.1 +jemalloc_tree=3.6.0 tokudbengine=tokudb-engine tokudbengine_tree=master ftindex=ft-index diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/nightly.mysql.build.and.test.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/nightly.mysql.build.and.test.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/nightly.mysql.build.and.test.bash 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/nightly.mysql.build.and.test.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "run nightly mysql and fractal tree regressions" - echo "uses gearman to schedule jobs onto test machines" -} - -# generate a script that makes a mysql release and run tests on it -function make_and_test_mysql() { - echo $(date) $* >>$nightlytrace 2>&1 - echo "bash -x \$HOME/github/tokudb-engine/scripts/tokutek.make.mysql.bash $* >>$mysqltrace 2>&1; \ - buildexitcode=\$?; \ - echo \$(date) \$HOME/github/tokudb-engine/scripts/tokutek.make.mysql.bash -$* \$buildexitcode >>$mysqltrace; \ - if [ \$buildexitcode -eq 0 ] ; then \$HOME/bin/test.mysql.bash $* >>/tmp/mysql.test.trace 2>&1; fi" \ - | $gearmandir/bin/gearman -b -f mysql-build-$system-$arch -h $gearmandhost -p 4730 >>$nightlytrace 2>&1 -} - -# make a mysql release -function make_mysql() { - echo $(date) $* >>$nightlytrace 2>&1 - echo "\$HOME/github/tokudb-engine/scripts/tokutek.make.mysql.bash $* >>$mysqltrace 2>&1" | $gearmandir/bin/gearman -b -f mysql-build-$system-$arch -h $gearmandhost -p 4730 >>$nightlytrace 2>&1 -} - -# setup the PATH since cron gives us a minimal PATH -PATH=$HOME/bin:$HOME/usr/local/bin:/usr/local/bin:$PATH -source /etc/profile - -github_token= -gearmandhost=localhost -gearmandir=/usr/local/gearmand-1.1.6 -system=$(uname -s | tr '[:upper:]' '[:lower:]') -arch=$(uname -m | tr '[:upper:]' '[:lower:]') -now_ts=$(date +%s) -cc=gcc -cxx=g++ - -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1; - fi -done - -nightlytrace=/tmp/$(whoami).nightly.trace -mysqltrace=/tmp/$(whoami).mysql.build.trace.$now_ts - -make_and_test_mysql --mysqlbuild=mysql-5.6.16-tokudb-${now_ts}-debug-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token -make_and_test_mysql --mysqlbuild=mysql-5.6.16-tokudb-${now_ts}-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token --tests=run.mysql.tests.bash:run.sql.bench.bash - -make_and_test_mysql --mysqlbuild=mysql-5.5.36-tokudb-${now_ts}-debug-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token -make_and_test_mysql --mysqlbuild=mysql-5.5.36-tokudb-${now_ts}-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token --tests=run.mysql.tests.bash:run.sql.bench.bash - -make_and_test_mysql --mysqlbuild=mariadb-5.5.35-tokudb-${now_ts}-debug-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token -make_and_test_mysql --mysqlbuild=mariadb-5.5.35-tokudb-${now_ts}-e-${system}-${arch} --cc=$cc --cxx=$cxx --github_token=$github_token --tests=run.mysql.tests.bash:run.sql.bench.bash - -exit 0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/nightly.mysql.build.and.test.my.cnf mariadb-5.5-5.5.40/storage/tokudb/scripts/nightly.mysql.build.and.test.my.cnf --- mariadb-5.5-5.5.39/storage/tokudb/scripts/nightly.mysql.build.and.test.my.cnf 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/nightly.mysql.build.and.test.my.cnf 1970-01-01 00:00:00.000000000 +0000 @@ -1,7 +0,0 @@ -[mysqld] -tmpdir=/data/mysql/tmp -max_connections=1024 -table_open_cache=1024 -loose_tokudb_cache_size=8G -loose_tokudb_directio=1 - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/run.atc.ontime.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/run.atc.ontime.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/run.atc.ontime.bash 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/run.atc.ontime.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,267 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "run the atc ontime load and run" - echo "--mysqlbuild=$mysqlbuild" - echo "[--commit=$commit]" - echo "[--dbname=$dbname]" - echo "[--load=$load] [--check=$check] [--run=$run]" - echo "[--engine=$engine]" - echo "[--tokudb_load_save_space=$tokudb_load_save_space] [--tokudb_row_format=$tokudb_row_format] [--tokudb_loader_memory_size=$tokudb_loader_memory_size]" -} - -function retry() { - local cmd - local retries - local exitcode - cmd=$* - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 10 - done - test $exitcode = 0 -} - -mysqlbuild= -commit=0 -mysqlserver=`hostname` -mysqluser=`whoami` -mysqlsocket=/tmp/mysql.sock -svnserver=https://svn.tokutek.com/tokudb -basedir=$HOME/svn.build -builddir=$basedir/mysql.build -dbname=atc -tblname=ontime -load=1 -check=1 -run=1 -engine=tokudb -tokudb_load_save_space=0 -tokudb_row_format= -tokudb_loader_memory_size= -verbose=0 -svn_server=https://svn.tokutek.com/tokudb -svn_branch=. -svn_revision=HEAD - -# parse the command line -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -if [[ $mysqlbuild =~ (.*)-(tokudb\-.*)-(linux)-(x86_64) ]] ; then - mysql=${BASH_REMATCH[1]} - tokudb=${BASH_REMATCH[2]} - system=${BASH_REMATCH[3]} - arch=${BASH_REMATCH[4]} -else - exit 1 -fi - -if [ -d /usr/local/mysql/bin ] ; then - export PATH=/usr/local/mysql/bin:$PATH -fi - -if [ -d /usr/local/mysql/lib/mysql ] ; then - export LD_LIBRARY_PATH=/usr/local/mysql/lib/mysql:$PATH -fi - -# goto the base directory -if [ ! -d $basedir ] ; then mkdir $basedir; fi - -pushd $basedir - -# update the build directory -if [ ! -d $builddir ] ; then mkdir $builddir; fi - -date=`date +%Y%m%d` -testresultsdir=$builddir/$date -pushd $builddir - while [ ! -d $date ] ; do - svn mkdir $svn_server/mysql.build/$date -m "" - svn checkout $svn_server/mysql.build/$date - if [ $? -ne 0 ] ; then rm -rf $date; fi - done -popd - -if [ $dbname = "atc" -a $engine != "tokudb" ] ; then dbname="atc_$engine"; fi - -runfile=$testresultsdir/$dbname-$tblname-$mysqlbuild-$mysqlserver -if [ $tokudb_load_save_space != 0 ] ; then runfile=$runfile-compress; fi -if [ "$tokudb_row_format" != "" ] ; then runfile=$runfile-$tokudb_row_format; fi -if [ "$tokudb_loader_memory_size" != "" ] ; then runfile=$runfile-$tokudb_loader_memory_size; fi -rm -rf $runfile - -testresult="PASS" - -# maybe get the atc data from s3 -if [ $testresult = "PASS" ] ; then - f=atc_On_Time_Performance.mysql.csv - if [ ! -f $f ] ; then - f=$f.gz - if [ ! -f $f ] ; then - echo `date` s3get --bundle tokutek-mysql-data $f >>$runfile 2>&1 - s3get --verbose --bundle tokutek-mysql-data $f >>$runfile 2>&1 - exitcode=$? - echo `date` s3get --bundle tokutek-mysql-data $f $exitcode >>$runfile 2>&1 - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - if [ $testresult = "PASS" ] ; then - echo `date` gunzip $f >>$runfile 2>&1 - gunzip $f - exitcode=$? - echo `date` gunzip $f $exitcode >>$runfile 2>&1 - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - fi - fi - fi -fi - -# checkout the atc test from svn -atc=atc-$mysqlbuild -if [ $testresult = "PASS" ] ; then - if [ -d atc-$mysqlbuild ] ; then rm -rf atc-$mysqlbuild; fi - - retry svn export -r $svn_revision $svn_server/$svn_branch/mysql/tests/atc atc-$mysqlbuild - exitcode=$? - echo `date` svn export -r $svn_revision $svn_server/$svn_branch/mysql/tests/atc $exitcode >>$runfile 2>&1 - if [ $exitcode != 0 ] ; then - retry svn export -r $svn_revision $svn_server/mysql/tests/atc atc-$mysqlbuild - exitcode=$? - echo `date` svn export -r $svn_revision $svn_server/mysql/tests/atc $exitcode >>$runfile 2>&1 - fi - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi -fi - -# create the database -if [ $load -ne 0 -a $testresult = "PASS" ] ; then - echo `date` drop database if exists $dbname >>$runfile - mysql -S $mysqlsocket -u $mysqluser -e "drop database if exists $dbname" >>$runfile 2>&1 - exitcode=$? - echo `date` drop database if exists $dbname $exitcode>>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - echo `date` create database $dbname >>$runfile - mysql -S $mysqlsocket -u $mysqluser -e "create database $dbname" >>$runfile 2>&1 - exitcode=$? - echo `date` create database $dbname $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi -fi - -# create the table -if [ $load -ne 0 -a $testresult = "PASS" ] ; then - echo `date` create table $dbname.$tblname >>$runfile - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "source $atc/atc_ontime_create_covered.sql" >>$runfile 2>&1 - exitcode=$? - echo `date` create table $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi -fi - -if [ $load -ne 0 -a $testresult = "PASS" -a "$tokudb_row_format" != "" ] ; then - echo `date` create table $dbname.$tblname >>$runfile - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "alter table $tblname row_format=$tokudb_row_format" >>$runfile 2>&1 - exitcode=$? - echo `date` create table $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi -fi - -if [ $load -ne 0 -a $testresult = "PASS" -a $engine != "tokudb" ] ; then - echo `date` alter table $engine >>$runfile - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "alter table $tblname engine=$engine" >>$runfile 2>&1 - exitcode=$? - echo `date` alter table $engine $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi -fi - -if [ $testresult = "PASS" ] ; then - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "show create table $tblname" >>$runfile 2>&1 -fi - -if [ $testresult = "PASS" ] ; then - let default_loader_memory_size="$(mysql -S $mysqlsocket -u $mysqluser -e'select @@tokudb_loader_memory_size' --silent --skip-column-names)" - exitcode=$? - echo `date` get tokudb_loader_memory_size $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - if [ "$tokudb_loader_memory_size" = "" ] ; then tokudb_loader_memory_size=$default_loader_memory_size; fi -fi - -# load the data -if [ $load -ne 0 -a $testresult = "PASS" ] ; then - echo `date` load data >>$runfile - start=$(date +%s) - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "set tokudb_loader_memory_size=$tokudb_loader_memory_size;\ - set tokudb_load_save_space=$tokudb_load_save_space; load data infile '$basedir/atc_On_Time_Performance.mysql.csv' into table $tblname" >>$runfile 2>&1 - exitcode=$? - let loadtime=$(date +%s)-$start - echo `date` load data loadtime=$loadtime $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi -fi - -# check the tables -if [ $check -ne 0 -a $testresult = "PASS" ] ; then - echo `date` check table $tblname >> $runfile - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "check table $tblname" >>$runfile 2>&1 - exitcode=$? - echo `date` check table $tblname $exitcode >> $runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi -fi - -# run the queries -if [ $run -ne 0 -a $testresult = "PASS" ] ; then - pushd $atc - for qfile in q*.sql ; do - if [[ $qfile =~ q(.*)\.sql ]] ; then - qname=${BASH_REMATCH[1]} - q=`cat $qfile` - qrun=q${qname}.run - - echo `date` explain $qfile >>$runfile - if [ $verbose -ne 0 ] ; then echo explain $q >>$runfile; fi - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "explain $q" >$qrun - exitcode=$? - echo `date` explain $qfile $exitcode >>$runfile - if [ $verbose -ne 0 ] ; then cat $qrun >>$runfile; fi - - echo `date` $qfile >>$runfile - start=$(date +%s) - if [ $verbose -ne 0 ] ; then echo $q >>$runfile; fi - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "$q" >$qrun - exitcode=$? - let qtime=$(date +%s)-$start - echo `date` $qfile qtime=$qtime $exitcode >>$runfile - if [ $verbose -ne 0 ] ; then cat $qrun >>$runfile; fi - if [ $exitcode -ne 0 ] ; then - testresult="FAIL" - else - if [ -f q${qname}.result ] ; then - diff $qrun q${qname}.result >>$runfile - exitcode=$? - if [ $exitcode -ne 0 ] ; then - testresult="FAIL" - fi - fi - fi - fi - done - popd -fi - -# commit results -if [ $commit != 0 ] ; then - svn add $runfile - retry svn commit -m \"$testresult $dbname $tblname $mysqlbuild $mysqlserver\" $runfile -fi - -popd - -if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi -exit $exitcode diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/run.iibench.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/run.iibench.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/run.iibench.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/run.iibench.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,172 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "run iibench" - echo "--mysqlbuild=$mysqlbuild" - echo "[--max_row=$max_rows] [--rows_per_report=$rows_per_report] [--insert_only=$insert_only] [ --check=$check]" - echo "[--commit=$commit]" -} - -function retry() { - local cmd=$* - local retries - local exitcode - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 10 - done - test $exitcode = 0 -} - -mysqlbuild= -commit=0 -check=1 -mysqlserver=`hostname` -mysqluser=`whoami` -mysqlsocket=/tmp/mysql.sock -svn_server=https://svn.tokutek.com/tokudb -svn_branch=. -svn_revision=HEAD -basedir=$HOME/svn.build -builddir=$basedir/mysql.build -system=`uname -s | tr [:upper:] [:lower:]` -instancetype= -testinstance= -arch=`uname -m | tr [:upper:] [:lower:]` -tracefile=/tmp/run.iibench.trace -cmd=iibench -dbname=$cmd -engine=tokudb -tblname=testit -max_rows=50000000 -rows_per_report=1000000 -insert_only=1 - -# parse the command line -while [ $# -gt 0 ] ; do - arg=$1; shift - if [ $arg = "--replace_into" ] ; then - cmd=replace_into - elif [ $arg = "--insert_ignore" ] ; then - cmd=insert_ignore - elif [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -if [[ $mysqlbuild =~ (.*)-(tokudb-.*)-(linux)-(x86_64) ]] ; then - mysql=${BASH_REMATCH[1]} - tokudb=${BASH_REMATCH[2]} - system=${BASH_REMATCH[3]} - arch=${BASH_REMATCH[4]} -else - exit 1 -fi - -# setup the dbname -if [ $dbname = "iibench" ] ; then dbname=${cmd}_${engine}; fi -if [ "$testinstance" != "" ] ; then dbname=${dbname}_${testinstance}; fi - -if [ -d /usr/local/mysql ] ; then - export PATH=/usr/local/mysql/bin:$PATH -fi - -if [ -d /usr/local/mysql/lib/mysql ] ; then - export LD_LIBRARY_PATH=/usr/local/mysql/lib/mysql:$PATH -fi - -# goto the base directory -if [ ! -d $basedir ] ; then mkdir $basedir; fi -pushd $basedir - -# update the build directory -if [ $commit != 0 ] ; then - if [ ! -d $builddir ] ; then mkdir $builddir; fi - - date=`date +%Y%m%d` - testresultsdir=$builddir/$date - pushd $builddir - while [ ! -d $date ] ; do - svn mkdir $svn_server/mysql.build/$date -m "" - svn checkout -q $svn_server/mysql.build/$date - if [ $? -ne 0 ] ; then rm -rf $date; fi - done - popd -else - testresultsdir=$PWD -fi - -# checkout the code -testdir=iibench-$mysqlbuild-$mysqlserver -if [ "$testinstance" != "" ] ; then testdir=$testdir-$testinstance; fi -rm -rf $testdir -retry svn export -q -r $svn_revision $svn_server/$svn_branch/iibench $testdir -exitcode=$? -if [ $exitcode != 0 ] ; then - retry svn export -q -r $svn_revision $svn_server/iibench $testdir - exitcode=$? -fi -if [ $exitcode != 0 ] ; then exit 1; fi - -# create the iibench database -mysql -S $mysqlsocket -u root -e "grant all on *.* to '$mysqluser'@'$mysqlserver'" -exitcode=$? -if [ $exitcode != 0 ] ; then exit 1; fi - -mysql -S $mysqlsocket -u $mysqluser -e "drop database if exists $dbname" -exitcode=$? -if [ $exitcode != 0 ] ; then exit 1; fi - -mysql -S $mysqlsocket -u $mysqluser -e "create database $dbname" -exitcode=$? -if [ $exitcode != 0 ] ; then exit 1; fi - -# run -if [ $cmd = "iibench" -a $insert_only != 0 ] ; then - runfile=$testresultsdir/$dbname-insert_only-$max_rows-$mysqlbuild-$mysqlserver -else - runfile=$testresultsdir/$dbname-$max_rows-$mysqlbuild-$mysqlserver -fi -if [ "$instancetype" != "" ] ; then runfile=$runfile-$instancetype; fi -testresult="PASS" - -pushd $testdir/py - echo `date` $cmd start $mysql $svn_branch $svn_revision $max_rows $rows_per_report >>$runfile - runcmd=$cmd.py - args="--db_user=$mysqluser --db_name=$dbname --db_socket=$mysqlsocket --engine=$engine --setup --max_rows=$max_rows --rows_per_report=$rows_per_report --table_name=$tblname" - if [ $cmd = "iibench" -a $insert_only != 0 ] ; then runcmd="$runcmd --insert_only"; fi - if [ $cmd = "replace_into" ] ; then runcmd="replace_into.py --use_replace_into"; fi - if [ $cmd = "insert_ignore" ] ; then runcmd="replace_into.py"; fi - ./$runcmd $args >>$runfile 2>&1 - exitcode=$? - echo `date` $cmd complete $exitcode >>$runfile - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi -popd - -if [ $check != 0 -a $testresult = "PASS" ] ; then - echo `date` check table $tblname >>$runfile - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "check table $tblname" >>$runfile 2>&1 - exitcode=$? - echo `date` check table $tblname $exitcode >>$runfile - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi -fi - -# commit results -if [ $commit != 0 ] ; then - if [ $cmd = "iibench" -a $insert_only != 0 ] ; then cmd="$cmd insert_only"; fi - svn add $runfile - retry svn commit -m \"$testresult $cmd $max_rows $dbname $mysqlbuild $mysqlserver `hostname`\" $runfile -fi - -popd - -if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi -exit $exitcode diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/run.mysql.tests.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/run.mysql.tests.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/run.mysql.tests.bash 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/run.mysql.tests.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,196 +0,0 @@ -#!/usr/bin/env bash -# ident 4, no tabs - -function usage() { - echo "run the tokudb mysql tests" - echo "--mysqlbuild=$mysqlbuild" - echo "--commit=$commit" - echo "--tests=$tests --engine=$engine" -} - -function retry() { - local cmd - local retries - local exitcode - cmd=$* - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 10 - done - test $exitcode = 0 -} - -svnserver=https://svn.tokutek.com/tokudb -basedir=$HOME/svn.build -builddir=$basedir/mysql.build -mysqlbuild= -mysql_basedir=/usr/local/mysql -mysqlserver=`hostname` -commit=0 -tests="*" -engine="" -parallel=auto - -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -if [[ $mysqlbuild =~ (.*)-(tokudb\-.*)-(linux)-(x86_64) ]] ; then - mysql=${BASH_REMATCH[1]} - tokudb=${BASH_REMATCH[2]} - system=${BASH_REMATCH[3]} - arch=${BASH_REMATCH[4]} -else - echo $mysqlbuild is not a tokudb build -fi - -if [ -d $mysql_basedir/lib/mysql ] ; then - export LD_LIBRARY_PATH=$mysql_basedir/lib/mysql -fi - -# update the build directory -if [ ! -d $basedir ] ; then mkdir $basedir ; fi - -pushd $basedir -if [ $? != 0 ] ; then exit 1; fi - -if [ ! -d $builddir ] ; then mkdir $builddir; fi - -# make the subversion directory that will hold the test results -date=`date +%Y%m%d` -testresultsdir=$builddir/$date -pushd $builddir -if [ $? = 0 ] ; then - while [ ! -d $date ] ; do - svn mkdir $svnserver/mysql.build/$date -m "" - svn checkout -q $svnserver/mysql.build/$date - if [ $? -ne 0 ] ; then rm -rf $date; fi - done - popd -fi - -# generate a trace file name -if [ -z $engine ] ; then - tracefile=mysql-test-$mysqlbuild-$mysqlserver -else - tracefile=mysql-engine-$engine-$mysqlbuild-$mysqlserver -fi -echo >$testresultsdir/$tracefile - -if [ -z $engine ] ; then - - # run all test suites including main - teststorun_original="main" - teststorun_tokudb="" - pushd $mysql_basedir/mysql-test/suite - if [ $? = 0 ] ; then - for t in $tests ; do - if [[ $t =~ .*\.xfail$ ]] ; then continue; fi - if [ $t = "perfschema_stress" ] ; then continue; fi - if [ $t = "large_tests" ] ; then continue; fi - if [ $t = "pbxt" ] ; then continue; fi - if [ -d $t/t ] ; then - if [[ $t =~ tokudb* ]] ; then - if [ -z $teststorun_tokudb ] ; then teststorun_tokudb="$t" ; else teststorun_tokudb="$teststorun_tokudb,$t"; fi - else - teststorun_original="$teststorun_original,$t"; - fi - fi - done - popd - fi - - # run the tests - pushd $mysql_basedir/mysql-test - if [ $? = 0 ] ; then - if [[ $mysqlbuild =~ tokudb ]] ; then - # run standard tests - if [[ $mysqlbuild =~ 5\\.5 ]] ; then - ./mysql-test-run.pl --suite=$teststorun_original --big-test --max-test-fail=0 --force --retry=1 --testcase-timeout=60 \ - --mysqld=--default-storage-engine=myisam --mysqld=--sql-mode="" \ - --mysqld=--loose-tokudb_debug=3072 \ - --parallel=$parallel >>$testresultsdir/$tracefile 2>&1 - else - ./mysql-test-run.pl --suite=$teststorun_original --big-test --max-test-fail=0 --force --retry=1 --testcase-timeout=60 \ - --mysqld=--loose-tokudb_debug=3072 \ - --parallel=$parallel >>$testresultsdir/$tracefile 2>&1 - fi - - # run tokudb tests - ./mysql-test-run.pl --suite=$teststorun_tokudb --big-test --max-test-fail=0 --force --retry=1 --testcase-timeout=60 \ - --mysqld=--loose-tokudb_debug=3072 \ - --parallel=$parallel >>$testresultsdir/$tracefile 2>&1 - # setup for engines tests - engine="tokudb" - else - ./mysql-test-run.pl --suite=$teststorun_original --big-test --max-test-fail=0 --force --retry=1 --testcase-timeout=60 \ - --parallel=$parallel >>$testresultsdir/$tracefile 2>&1 - fi - popd - fi -fi - -if [ ! -z $engine ] ; then - teststorun="engines/funcs,engines/iuds" - pushd $mysql_basedir/mysql-test - if [ $? = 0 ] ; then - if [[ $mysqlbuild =~ 5\\.6 ]] ; then - ./mysql-test-run.pl --suite=$teststorun --force --retry-failure=0 --max-test-fail=0 --nowarnings --testcase-timeout=60 \ - --mysqld=--default-storage-engine=$engine --mysqld=--default-tmp-storage-engine=$engine \ - --parallel=$parallel >>$testresultsdir/$tracefile 2>&1 - else - ./mysql-test-run.pl --suite=$teststorun --force --retry-failure=0 --max-test-fail=0 --nowarnings --testcase-timeout=60 \ - --mysqld=--default-storage-engine=$engine \ - --parallel=$parallel >>$testresultsdir/$tracefile 2>&1 - fi - popd - fi -fi - -# summarize the results -let tests_failed=0 -let tests_passed=0 -while read line ; do - if [[ "$line" =~ (Completed|Timeout):\ Failed\ ([0-9]+)\/([0-9]+) ]] ; then - # failed[2]/total[3] - let tests_failed=tests_failed+${BASH_REMATCH[2]} - let tests_passed=tests_passed+${BASH_REMATCH[3]}-${BASH_REMATCH[2]} - elif [[ "$line" =~ Completed:\ All\ ([0-9]+)\ tests ]] ; then - # passed[1] - let tests_passed=tests_passed+${BASH_REMATCH[1]} - fi -done <$testresultsdir/$tracefile - -# commit the results -if [ $tests_failed = 0 ] ; then - testresult="PASS=$tests_passed" -else - testresult="FAIL=$tests_failed PASS=$tests_passed" -fi -pushd $testresultsdir -if [ $? = 0 ] ; then - if [ $commit != 0 ] ; then - svn add $tracefile - if [[ $tracefile =~ "mysql-test" ]] ; then test=mysql-test; else test=mysql-engine-$engine; fi - retry svn commit -m \"$testresult $test $mysqlbuild $mysqlserver\" $tracefile - fi - popd -fi - -popd # $basedir - -if [[ $testresult =~ "PASS" ]] ; then exitcode=0; else exitcode=1; fi -exit $exitcode - - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/run.sql.bench.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/run.sql.bench.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/run.sql.bench.bash 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/run.sql.bench.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,153 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "run the sql bench tests" - echo "--mysqlbuild=$mysqlbuild" - echo "--commit=$commit" -} - -function retry() { - local cmd - local retries - local exitcode - cmd=$* - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 10 - done - test $exitcode = 0 -} - -svnserver=https://svn.tokutek.com/tokudb -basedir=$HOME/svn.build -builddir=$basedir/mysql.build -mysqlbuild= -mysqlserver=`hostname` -commit=0 -engine=tokudb -socket=/tmp/mysql.sock -system=`uname -s | tr [:upper:] [:lower:]` -arch=`uname -m | tr [:upper:] [:lower:]` - -# parse the command line -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -if [[ $mysqlbuild =~ (.*)-(tokudb-.*)-(linux)-(x86_64) ]] ; then - mysql=${BASH_REMATCH[1]} - tokudb=${BASH_REMATCH[2]} - system=${BASH_REMATCH[3]} - arch=${BASH_REMATCH[4]} -else - echo $mysqlbuild is not a tokudb build -fi - -# goto the base directory -if [ ! -d $basedir ] ; then mkdir $basedir; fi -pushd $basedir - -# update the build directory -if [ ! -d $builddir ] ; then mkdir $builddir; fi - -date=`date +%Y%m%d` -testresultsdir=$builddir/$date -pushd $builddir -while [ ! -d $date ] ; do - svn mkdir $svnserver/mysql.build/$date -m "" - svn checkout -q $svnserver/mysql.build/$date - if [ $? -ne 0 ] ; then rm -rf $date; fi -done -popd - -# run the tests -pushd /usr/local/mysql/sql-bench - -tracefile=sql-bench-$engine-$mysqlbuild-$mysqlserver.trace -summaryfile=sql-bench-$engine-$mysqlbuild-$mysqlserver.summary - -function mydate() { - date +"%Y%m%d %H:%M:%S" -} - -function runtests() { - testargs=$* - for testname in test-* ; do - chmod +x ./$testname - echo `mydate` $testname $testargs - ./$testname $testargs - exitcode=$? - echo `mydate` - if [ $exitcode != 0 ] ; then - # assume that the test failure due to a crash. allow mysqld to restart. - sleep 60 - fi - done -} - ->$testresultsdir/$tracefile - -runtests --create-options=engine=$engine --socket=$socket --verbose --small-test >>$testresultsdir/$tracefile 2>&1 -runtests --create-options=engine=$engine --socket=$socket --verbose --small-test --fast >>$testresultsdir/$tracefile 2>&1 -runtests --create-options=engine=$engine --socket=$socket --verbose >>$testresultsdir/$tracefile 2>&1 -runtests --create-options=engine=$engine --socket=$socket --verbose --fast >>$testresultsdir/$tracefile 2>&1 -runtests --create-options=engine=$engine --socket=$socket --verbose --fast --fast-insert >>$testresultsdir/$tracefile 2>&1 -runtests --create-options=engine=$engine --socket=$socket --verbose --fast --lock-tables >>$testresultsdir/$tracefile 2>&1 - -popd - -# summarize the results -while read l ; do - if [[ $l =~ ^([0-9]{8}\ [0-9]{2}:[0-9]{2}:[0-9]{2})(.*)$ ]] ; then - t=${BASH_REMATCH[1]} - cmd=${BASH_REMATCH[2]} - if [ -z "$cmd" ] ; then - let duration=$(date -d "$t" +%s)-$(date -d "$tlast" +%s) - printf "%4s %s %8d %s\n" "$status" "$tlast" "$duration" "$cmdlast" - else - cmdlast=$cmd - tlast=$t - status=PASS - fi - else - if [[ $l =~ Got\ error|Died ]] ; then - status=FAIL - fi - fi -done <$testresultsdir/$tracefile >$testresultsdir/$summaryfile - -testresult="" -pf=`mktemp` -egrep "^PASS" $testresultsdir/$summaryfile >$pf 2>&1 -if [ $? -eq 0 ] ; then testresult="PASS=`cat $pf | wc -l` $testresult"; fi -egrep "^FAIL" $testresultsdir/$summaryfile >$pf 2>&1 -if [ $? -eq 0 ] ; then testresult="FAIL=`cat $pf | wc -l` $testresult"; fi -rm $pf -if [ "$testresult" = "" ] ; then testresult="?"; fi - -# commit the results -pushd $testresultsdir -if [ $commit != 0 ] ; then - svn add $tracefile $summaryfile - retry svn commit -m \"$testresult sql-bench $mysqlbuild $mysqlserver\" $tracefile $summaryfile -fi -popd - -popd - -if [[ $testresult =~ "PASS" ]] ; then exitcode=0; else exitcode=1; fi -exit $exitcode - - - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/run.tpch.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/run.tpch.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/run.tpch.bash 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/run.tpch.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,342 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "run the TPCH load and compare test" - echo "[--SCALE=$SCALE] [--ENGINE=$ENGINE]" - echo "[--dbgen=$dbgen] [--load=$load] [--check=$check] [--compare=$compare] [--query=$query]" - echo "[--mysqlbuild=$mysqlbuild] [--commit=$commit]" - echo "[--testinstance=$testinstance]" - echo "[--tokudb_load_save_space=$tokudb_load_save_space] [--tokudb_row_format=$tokudb_row_format] [--tokudb_loader_memory_size=$tokudb_loader_memory_size]" -} - -function retry() { - local cmd - local retries - local exitcode - cmd=$* - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 1 - done - test $exitcode = 0 -} - -SCALE=1 -ENGINE=tokudb -TABLES="part partsupp customer lineitem nation orders region supplier" -dbgen=1 -load=1 -compare=1 -query=0 -check=1 -datadir=/usr/local/mysql/data -mysqlbuild= -commit=0 -mysqlserver=`hostname` -mysqluser=`whoami` -mysqlsocket=/tmp/mysql.sock -basedir=$HOME/svn.build -builddir=$basedir/mysql.build -system=`uname -s | tr [:upper:] [:lower:]` -arch=`uname -m | tr [:upper:] [:lower:]` -testinstance= -tokudb_load_save_space=0 -tokudb_row_format= -tokudb_loader_memory_size= -svn_server=https://svn.tokutek.com/tokudb -svn_branch=. -svn_revision=HEAD - -# parse the command line -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -if [[ $mysqlbuild =~ (.*)-(tokudb\-.*)-(linux)-(x86_64) ]] ; then - mysql=${BASH_REMATCH[1]} - tokudb=${BASH_REMATCH[2]} - system=${BASH_REMATCH[3]} - arch=${BASH_REMATCH[4]} -else - exit 1 -fi - -dbname=tpch${SCALE}G_${ENGINE} -if [ "$testinstance" != "" ] ; then dbname=${dbname}_${testinstance}; fi -tpchdir=$basedir/tpch${SCALE}G - -if [ -d /usr/local/mysql ] ; then - export PATH=/usr/local/mysql/bin:$PATH -fi - -if [ -d /usr/local/mysql/lib/mysql ] ; then - export LD_LIBRARY_PATH=/usr/local/mysql/lib/mysql:$PATH -fi - -# goto the base directory -if [ ! -d $basedir ] ; then mkdir $basedir; fi - -pushd $basedir - -# update the build directory -if [ $commit != 0 ] ; then - if [ ! -d $builddir ] ; then mkdir $builddir; fi - - date=`date +%Y%m%d` - testresultsdir=$builddir/$date - pushd $builddir - while [ ! -d $date ] ; do - svn mkdir $svn_server/mysql.build/$date -m "" - svn checkout -q $svn_server/mysql.build/$date - if [ $? -ne 0 ] ; then rm -rf $date; fi - done - popd -else - testresultsdir=$PWD -fi - -runfile=$testresultsdir/$dbname -if [ $tokudb_load_save_space != 0 ] ; then runfile=$runfile-compress; fi -if [ "$tokudb_row_format" != "" ] ; then runfile=$runfile-$tokudb_row_format; fi -if [ "$tokudb_loader_memory_size" != "" ] ; then runfile=$runfile-$tokudb_loader_memory_size; fi -runfile=$runfile-$mysqlbuild-$mysqlserver -rm -rf $runfile - -testresult="PASS" - -# maybe get the tpch data from AWS S3 -if [ $compare != 0 ] && [ ! -d $tpchdir ] ; then - tpchtarball=tpch${SCALE}G_data_dump.tar - if [ ! -f $tpchtarball ] ; then - echo `date` s3get --bundle tokutek-mysql-data $tpchtarball >>$runfile 2>&1 - s3get --verbose --bundle tokutek-mysql-data $tpchtarball >>$runfile 2>&1 - exitcode=$? - echo `date` s3get --bundle tokutek-mysql-data $tpchtarball $exitcode >>$runfile 2>&1 - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - fi - if [ $testresult = "PASS" ] ; then - tar xf $tpchtarball - exitcode=$? - echo `date` tar xf $tpchtarball $exitcode >>$runfile 2>&1 - if [ $exitcode -ne 0 ] ; then - testresult="FAIL" - else - # gunzip the data files - pushd tpch${SCALE}G/data/tpch${SCALE}G - for f in *.gz ; do - echo `date` gunzip $f >>$runfile 2>&1 - gunzip $f - done - ls -l >>$runfile 2>&1 - popd - fi - fi -fi - -# checkout the tpch scripts -tpchtestdir=tpch-$mysqlbuild -if [ "$testinstance" != "" ] ; then tpchtestdir=${tpchtestdir}_${testinstance}; fi -if [ $testresult = "PASS" ] ; then - rm -rf $tpchtestdir - retry svn export -q -r $svn_revision $svn_server/$svn_branch/tpch $tpchtestdir - exitcode=$? - echo `date` export $svn_server/$svn_branch/tpch $exitcode >>$runfile 2>&1 - if [ $exitcode != 0 ] ; then - retry svn export -q -r $svn_revision $svn_server/tpch $tpchtestdir - exitcode=$? - echo `date` export $svn_server/tpch $exitcode >>$runfile 2>&1 - fi - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi -fi - -# generate the tpch data -if [ $dbgen != 0 -a $testresult = "PASS" ] ; then - pushd $tpchtestdir/dbgen - make - exitcode=$? - echo `date` make dbgen $exitcode >>$runfile 2>&1 - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd - if [ $testresult = "PASS" ] ; then - dbgen=0 - mkdir -p tpch${SCALE}G/data/tpch${SCALE}G - pushd tpch${SCALE}G/data/tpch${SCALE}G - if [ ! -f lineitem.tbl ] ; then dbgen=1; fi - popd - if [ $dbgen != 0 ] ; then - pushd $tpchtestdir/dbgen - ./dbgen -fF -s $SCALE - exitcode=$? - echo `date` dbgen -fF -s $SCALE $exitcode >>$runfile 2>&1 - if [ $exitcode != 0 ] ; then - testresult="FAIL" - else - ls -l *.tbl >>$runfile - chmod 0644 *.tbl - ls -l *.tbl >>$runfile - mv *.tbl $basedir/tpch${SCALE}G/data/tpch${SCALE}G - fi - popd - fi - fi -fi - -# create the tpch database -if [ $load != 0 -a $testresult = "PASS" ] ; then - echo `date` drop database if exists $dbname >>$runfile - mysql -S $mysqlsocket -u $mysqluser -e "drop database if exists $dbname" >>$runfile 2>&1 - exitcode=$? - echo `date` drop database if exists $dbname $exitcode>>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - echo `date` create database $dbname >>$runfile - mysql -S $mysqlsocket -u $mysqluser -e "create database $dbname" >>$runfile 2>&1 - exitcode=$? - echo `date` create database $dbname $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi -fi - -# create the tpch tables -if [ $load != 0 -a $testresult = "PASS" ] ; then - echo `date` create table >>$runfile - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "source $basedir/tpch-$mysqlbuild/scripts/${ENGINE}_tpch_create_table.sql" >>$runfile 2>&1 - exitcode=$? - echo `date` create table $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi -fi - -# get the current loader memory size -if [ $load != 0 -a $testresult = "PASS" ] ; then - let default_loader_memory_size="$(mysql -S $mysqlsocket -u $mysqluser -e'select @@tokudb_loader_memory_size' --silent --skip-column-names)" - exitcode=$? - echo `date` get tokudb_loader_memory_size $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - if [ "$tokudb_loader_memory_size" = "" ] ; then tokudb_loader_memory_size=$default_loader_memory_size; fi -fi - -# load the data -if [ $load != 0 -a $testresult = "PASS" ] ; then - for tblname in $TABLES ; do - echo `date` load table $tblname >>$runfile - ls -l $tpchdir/data/tpch${SCALE}G/$tblname.tbl >>$runfile - start=$(date +%s) - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "set tokudb_loader_memory_size=$tokudb_loader_memory_size;\ - set tokudb_load_save_space=$tokudb_load_save_space;\ - load data infile '$tpchdir/data/tpch${SCALE}G/$tblname.tbl' into table $tblname fields terminated by '|';" >>$runfile 2>&1 - exitcode=$? - let loadtime=$(date +%s)-$start - echo `date` load table $tblname $exitcode loadtime=$loadtime>>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - done -fi - -if [ $check != 0 -a $testresult = "PASS" ] ; then - for tblname in lineitem ; do - echo `date` add clustering index $tblname >>$runfile - start=$(date +%s) - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "set tokudb_loader_memory_size=$tokudb_loader_memory_size;\ - set tokudb_load_save_space=$tokudb_load_save_space;\ - set tokudb_create_index_online=0;\ - create clustering index i_shipdate on lineitem (l_shipdate);" >>$runfile 2>&1 - exitcode=$? - let loadtime=$(date +%s)-$start - echo `date` add clustering index $tblname $exitcode loadtime=$loadtime >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - done -fi - -# check the tables -if [ $check != 0 -a $testresult = "PASS" ] ; then - for tblname in $TABLES ; do - echo `date` check table $tblname >>$runfile - start=$(date +%s) - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "check table $tblname" >>$runfile 2>&1 - exitcode=$? - let checktime=$(date +%s)-$start - echo `date` check table $tblname $exitcode checktime=$checktime >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - done -fi - -if [ $check != 0 -a $testresult = "PASS" ] ; then - for tblname in lineitem ; do - echo `date` drop index $tblname >>$runfile - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "drop index i_shipdate on lineitem" >>$runfile 2>&1 - exitcode=$? - echo `date` drop index $tblname $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi - done -fi - -# compare the data -if [ $compare != 0 -a $testresult = "PASS" ] ; then - if [ -d $tpchdir/dump/tpch${SCALE}G ] ; then - mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "source $basedir/tpch-$mysqlbuild/scripts/dumptpch.sql" >>$runfile 2>&1 - exitcode=$? - echo `date` dump data $exitcode >>$runfile - if [ $exitcode -ne 0 ] ; then - testresult="FAIL" - else - # force the permissions on the dumpdir open - pushd $datadir/$dbname - exitcode=$? - if [ $exitcode != 0 ] ; then - sudo chmod g+rwx $datadir - sudo chmod g+rwx $datadir/$dbname - pushd $datadir/$dbname - exitcode=$? - fi - if [ $exitcode = 0 ] ; then - popd - fi - - # compare the dump files - dumpdir=$datadir/$dbname - comparedir=$tpchdir/dump/tpch${SCALE}G - for f in $dumpdir/dump* ; do - d=`basename $f` - if [ ! -f $comparedir/$d ] && [ -f $comparedir/$d.gz ] ; then - pushd $comparedir; gunzip $d.gz; popd - fi - if [ -f $comparedir/$d ] ; then - diff -q $dumpdir/$d $comparedir/$d - if [ $? = 0 ] ; then - result="PASS" - else - result="FAIL" - testresult="FAIL" - fi - else - result="MISSING" - testresult="FAIL" - fi - echo `date` $d $result >>$runfile - done - if [ $testresult = "PASS" ] ; then - # remove the dump files - rm -f $datadir/$dbname/dump* - fi - fi - fi -fi - -# commit results -if [ $commit != 0 ] ; then - svn add $runfile - retry svn commit -m \"$testresult $dbname $mysqlbuild $mysqlserver\" $runfile -fi - -popd - -if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi -exit $exitcode diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/setup.mysql.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/setup.mysql.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/setup.mysql.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/setup.mysql.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,231 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "setup.mysql.bash" - echo "--mysqlbuild=$mysqlbuild --shutdown=$shutdown --install=$install --startup=$startup" -} - -function download_file() { - local file=$1 - s3get $s3bucket $file $file -} - -function download_tarball() { - local tarball=$1 - if [ ! -f $tarball ] ; then - download_file $tarball - if [ $? -ne 0 ] ; then test 0 = 1; return; fi - fi - if [ ! -f $tarball.md5 ] ; then - download_file $tarball.md5 - if [ $? -ne 0 ] ; then test 0 = 1; return; fi - fi -} - -function install_tarball() { - local basedir=$1; local tarball=$2 - tar -x -z -f $basedir/$tarball - if [ $? -ne 0 ] ; then test 0 = 1; return; fi -} - -function check_md5() { - local tarball=$1 - md5sum --check $tarball.md5 - if [ $? -ne 0 ] ; then - # support jacksum md5 output which is almost the same as md5sum - diff -b <(cat $tarball.md5) <(md5sum $tarball) - if [ $? -ne 0 ] ; then test 0 = 1; return; fi - fi -} - -mysqlbuild= -shutdown=1 -install=1 -startup=1 -s3bucket=tokutek-mysql-build -sleeptime=60 -builtins="mysqlbuild shutdown install startup s3bucket sleeptime" -mysqld_args="--user=mysql --core-file --core-file-size=unlimited" -sudo=/usr/bin/sudo -defaultsfile="" -if [ -f /etc/$(whoami).my.cnf ] ; then - defaultsfile=/etc/$(whoami).my.cnf -fi - -function is_builtin() { - local v=$1; shift - local x - for x in $* ; do - if [ $v = $x ] ; then echo 1; return; fi - done - echo 0 -} - -while [ $# -gt 0 ] ; do - arg=$1; shift - if [ $arg = "--help" -o $arg = "-h" -o $arg = "-?" ] ; then - usage; exit 1 - elif [[ $arg =~ --(.*)=(.*) ]] ; then - r=$(is_builtin ${BASH_REMATCH[1]} $builtins) - if [ $r = 1 ] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - mysqld_args="$mysqld_args $arg" - fi - else - mysqld_args="$mysqld_args $arg" - fi -done - -if [ -d /data/mysql/tmp ] ; then mysqld_args="$mysqld_args --tmpdir=/data/mysql/tmp"; fi - -if [[ $mysqlbuild =~ (.*)-(tokudb\-.*)-(linux)-(x86_64) ]] ; then - mysql=${BASH_REMATCH[1]} - tokudb=${BASH_REMATCH[2]} - system=${BASH_REMATCH[3]} - arch=${BASH_REMATCH[4]} -else - echo $mysqlbuild is not a tokudb build -fi - -if [ ! -d downloads ] ; then mkdir downloads; fi - -pushd downloads -if [ $? != 0 ] ; then exit 1; fi - -basedir=$PWD - -mysqltarball=$mysqlbuild.tar.gz - -# get the tarball -download_tarball $mysqltarball -if [ $? -ne 0 ] ; then exit 1; fi - -# check the md5 sum -check_md5 $mysqltarball -if [ $? -ne 0 ] ; then exit 1; fi - -tokudbtarball="" -if [[ $mysqltarball =~ ^(Percona-Server.*)\.(Linux\.x86_64.*)$ ]] ; then - tar tzf $mysqltarball | egrep ha_tokudb.so >/dev/null 2>&1 - if [ $? -ne 0 ] ; then - tokudbtarball=${BASH_REMATCH[1]}.TokuDB.${BASH_REMATCH[2]} - download_tarball $tokudbtarball - if [ $? -ne 0 ] ; then exit 1; fi - check_md5 $tokudbtarball - if [ $? -ne 0 ] ; then exit 1; fi - fi -fi - -# set ldpath -ldpath="" -if [ -d /usr/local/gcc-4.7/lib64 ] ; then - echo skip ldpath="export LD_LIBRARY_PATH=/usr/local/gcc-4.7/lib64:\$LD_LIBRARY_PATH;" -fi - -# shutdown mysql -if [ $shutdown -ne 0 ] ; then - if [ -x /etc/init.d/mysql ] ; then - $sudo setsid /etc/init.d/mysql stop - else - /usr/local/mysql/bin/mysqladmin shutdown - fi - sleep $sleeptime -fi - -pushd /usr/local -if [ $? = 0 ] ; then - rm mysql - popd -fi - -# install the release -pushd /usr/local/mysqls 2>/dev/null -if [ $? = 0 ] ; then - mysqldir=mysqls/$mysqlbuild -else - pushd /usr/local - if [ $? -ne 0 ] ; then exit 1; fi - mysqldir=$mysqlbuild -fi - -if [ ! -d $mysqlbuild ] || [ $install -ne 0 ] ; then - rm mysql - if [ -d $mysqlbuild ] ; then $sudo rm -rf $mysqlbuild; fi - - install_tarball $basedir $mysqltarball - if [ $? -ne 0 ] ; then exit 1; fi - - if [ $tokudbtarball ] ; then - install_tarball $basedir $tokudbtarball - if [ $? -ne 0 ] ; then exit 1; fi - fi - - ln -s $mysqldir /usr/local/mysql - if [ $? -ne 0 ] ; then exit 1; fi - ln -s $mysqldir /usr/local/$mysqlbuild - if [ $? -ne 0 ] ; then exit 1; fi - - installdb=$mysqlbuild/bin/mysql_install_db - if [ ! -f $installdb ] ; then - installdb=$mysqlbuild/scripts/mysql_install_db - fi - - $sudo chown -R mysql $mysqlbuild/data - $sudo chgrp -R mysql $mysqlbuild/data - - # 5.6 debug build needs this - if [ ! -f $mysqlbuild/bin/mysqld ] && [ -f $mysqlbuild/bin/mysqld-debug ] ; then - ln $mysqlbuild/bin/mysqld-debug $mysqlbuild/bin/mysqld - fi - - if [ -z "$defaultsfile" ] ; then - default_arg="" - else - default_arg="--defaults-file=$defaultsfile" - fi - $sudo bash -c "$ldpath $installdb $default_arg --user=mysql --basedir=$PWD/$mysqlbuild --datadir=$PWD/$mysqlbuild/data" - if [ $? -ne 0 ] ; then exit 1; fi -else - # create link - rm /usr/local/mysql - ln -s $mysqldir /usr/local/mysql - if [ $? -ne 0 ] ; then exit 1; fi - rm /usr/local/$mysqlbuild - ln -s $mysqldir /usr/local/$mysqlbuild - if [ $? -ne 0 ] ; then exit 1; fi -fi -popd - -# start mysql -if [ $startup -ne 0 ] ; then - ulimit -a - # increase the open file limit - ulimit -n 10240 - exitcode=$? - echo ulimit -n 10240 exitcode $exitcode - - if [ -x /etc/init.d/mysql ] ; then - $sudo setsid /etc/init.d/mysql start - else - if [ -z "$defaultsfile" ] ; then - default_arg="" - else - default_arg="--defaults-file=$defaultsfile" - fi - j=/usr/local/mysql/lib/mysql/libjemalloc.so - if [ -f $j ] ; then - default_arg="$default_arg --malloc-lib=$j" - fi - $sudo -b bash -c "$ldpath /usr/local/mysql/bin/mysqld_safe $default_arg $mysqld_args" >/dev/null 2>&1 & - fi - sleep $sleeptime - - # add mysql grants - /usr/local/mysql/bin/mysql -u root -e "grant all on *.* to tokubuild@localhost" - /usr/local/mysql/bin/mysql -u root -e "grant all on *.* to 'ec2-user'@localhost" -fi - -popd - -exit 0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/testbuildfromsrc.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/testbuildfromsrc.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/testbuildfromsrc.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/testbuildfromsrc.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -#!/usr/bin/env bash - -# for all source tarballs and their coresponding md5 files, build a binary release tarball - -system=$(uname -s|tr [:upper:] [:lower:]) -arch=$(uname -m) - -function expand() { - echo $* | tr ,: " " -} - -for f in *.md5; do - if [[ $f =~ (.*).tar.gz.md5 ]] ; then - mysqlsrc=${BASH_REMATCH[1]} - else - exit 1 - fi - if [ -d $mysqlsrc ] ; then continue; fi - md5sum --check $mysqlsrc.tar.gz.md5 - if [ $? != 0 ] ; then exit 1; fi - tar xzf $mysqlsrc.tar.gz - if [ $? != 0 ] ; then exit 1; fi - mkdir $mysqlsrc/build.RelWithDebInfo - pushd $mysqlsrc/build.RelWithDebInfo - if [ $? != 0 ] ; then exit 1; fi - cmake -D BUILD_CONFIG=mysql_release -D CMAKE_BUILD_TYPE=RelWithDebInfo -D BUILD_TESTING=OFF .. - if [ $? != 0 ] ; then exit 1; fi - make -j4 package - if [ $? != 0 ] ; then exit 1; fi - if [ ! -f $mysqlsrc-$system-$arch.tar.gz ] ; then exit 1; fi - popd -done diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/test.mysql.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/test.mysql.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/test.mysql.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/test.mysql.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "run the mysql tests" - echo "--mysqlbuild=$mysqlbuild --tests=$tests" -} - -function expand() { - echo $* | tr ,: " " -} - -mysqlbuild= -mysqlsocket=/tmp/mysql.sock -gearmandir=/usr/local/gearmand-1.1.6 -gearmandhost=localhost -system=$(uname -s | tr [:upper:] [:lower:]) -arch=$(uname -m | tr [:upper:] [:lower:]) -tests=run.mysql.tests.bash - -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1; - fi -done - -if [ -z $mysqlbuild ] ; then exit 1; fi - -for testname in $(expand $tests) ; do - if [ $testname = "run.mysql.tests.bash" ] ; then - run_mysqld=0 - else - run_mysqld=1 - fi - if [ $run_mysqld = 0 ] ; then - setupextra="--shutdown=1 --install=1 --startup=0" - else - setupextra="--shutdown=1 --install=1 --startup=1" - fi - echo "echo \$(date) $mysqlbuild >>/tmp/$(whoami).$testname.trace 2>&1; \ - \$HOME/bin/setup.mysql.bash --mysqlbuild=$mysqlbuild $setupextra >>/tmp/$(whoami).$testname.trace 2>&1; \ - testexitcode=\$?; \ - echo \$(date) $mysqlbuild \$testexitcode >>/tmp/$(whoami).$testname.trace 2>&1; \ - if [ \$testexitcode -ne 0 ] ; then exit 1; fi; \ - \$HOME/bin/$testname --mysqlbuild=$mysqlbuild --commit=1 >>/tmp/$(whoami).$testname.trace 2>&1; \ - if [ $run_mysqld != 0 ] ; then mysqladmin -S$mysqlsocket shutdown; fi" | $gearmandir/bin/gearman -b -f mysql-test-$system-$arch -h $gearmandhost -p 4730 -done - -exit 0 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/testsandbox.bash mariadb-5.5-5.5.40/storage/tokudb/scripts/testsandbox.bash --- mariadb-5.5-5.5.39/storage/tokudb/scripts/testsandbox.bash 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/testsandbox.bash 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -#!/usr/bin/env bash - -# for all tokudb binary tarballs, verify that we can create and run the tarball using the MySQL sandbox. - -function expand() { - echo $* | tr ,: " " -} - -let n=0 -for f in *.md5; do - if [[ $f =~ (.*).tar.gz.md5 ]] ; then - mysqlbuild=${BASH_REMATCH[1]} - else - exit 1 - fi - - md5sum --check $f - if [ $? != 0 ] ; then exit 1; fi - make_sandbox --add_prefix=test$n- $mysqlbuild.tar.gz -- --sandbox_directory=test$n - if [ $? != 0 ] ; then exit 1; fi - pushd $HOME/sandboxes - if [ $? = 0 ] ; then - ./use_all 'show engines' - ./use_all 'create table test.t (a int primary key, b bigint, c varchar(256), d blob(500000), clustering key(b))' - ./use_all 'show create table test.t' - ./stop_all - popd - fi - let n=n+1 -done - - diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/tokustat.py mariadb-5.5-5.5.40/storage/tokudb/scripts/tokustat.py --- mariadb-5.5-5.5.39/storage/tokudb/scripts/tokustat.py 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/tokustat.py 2014-10-08 13:19:52.000000000 +0000 @@ -9,10 +9,10 @@ print "diff the tokudb engine status" print "--host=HOSTNAME (default: localhost)" print "--port=PORT" - print "--sleeptime=SLEEPTIME (default: 10 seconds)" + print "--iterations=MAX_ITERATIONS (default: forever)" + print "--interval=TIME_BETWEEN_SAMPLES (default: 10 seconds)" print "--q='show engine tokudb status'" print "--q='select * from information_schema.global_status'" - return 1 def convert(v): @@ -23,14 +23,11 @@ v = float(v) return v -def printit(stats, rs, sleeptime): - # print rs +def printit(stats, rs, interval): for t in rs: l = len(t) # grab the last 2 fields in t k = t[l-2] v = t[l-1] - # print k, v # debug - # try to convert v try: v = convert(v) except: @@ -41,11 +38,11 @@ print k, "|", oldv, "|", v, try: d = v - oldv - if sleeptime != 1: - if d >= sleeptime: - e = d / sleeptime + if interval != 1: + if d >= interval: + e = d / interval else: - e = float(d) / sleeptime + e = float(d) / interval print "|", d, "|", e else: print "|", d @@ -59,7 +56,9 @@ port = None user = None passwd = None - sleeptime = 10 + interval = 10 + iterations = 0 + q = 'show engine tokudb status' for a in sys.argv[1:]: @@ -71,6 +70,9 @@ continue return usage() + iterations = int(iterations) + interval = int(interval) + connect_parameters = {} if host is not None: if host[0] == '/': @@ -93,7 +95,9 @@ print "connected" stats = {} - while 1: + i = 0 + while iterations == 0 or i <= iterations: + i += 1 try: c = db.cursor() n = c.execute(q) @@ -105,8 +109,8 @@ return 2 try: - printit(stats, rs, int(sleeptime)) - time.sleep(int(sleeptime)) + printit(stats, rs, interval) + time.sleep(interval) except: print "printit", sys.exc_info() return 3 diff -Nru mariadb-5.5-5.5.39/storage/tokudb/scripts/tpch.readme mariadb-5.5-5.5.40/storage/tokudb/scripts/tpch.readme --- mariadb-5.5-5.5.39/storage/tokudb/scripts/tpch.readme 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/tokudb/scripts/tpch.readme 1970-01-01 00:00:00.000000000 +0000 @@ -1,34 +0,0 @@ -TPCH is an industry standard data warehouse benchmark. We use TPCH databases to test the TokuDB loader. - -The run.tpch.bash script loads a TPCH database at a given scale factor into TokuDB. It then uses the SQL -CHECK TABLE statement to verify the fractal tree invariants. Finally, it dumps the database and compares -with a precomputed dump of the database from InnoDB. - -Here are some TPCH databases dumped from InnoDB. These dumps are used to compare with TPCH data loaded -into TokuDB. - -$ s3ls tokutek-mysql-data -l --prefix=tpch -2010-08-16T21:21:10.000Z 1073741824 tpch10G_data_dump.tar.0 -2010-08-16T21:38:45.000Z 1073741824 tpch10G_data_dump.tar.1 -2010-08-16T21:56:43.000Z 1073741824 tpch10G_data_dump.tar.2 -2010-08-16T22:14:49.000Z 1073741824 tpch10G_data_dump.tar.3 -2010-08-16T22:32:38.000Z 1073741824 tpch10G_data_dump.tar.4 -2010-08-16T22:51:04.000Z 1073741824 tpch10G_data_dump.tar.5 -2010-08-16T23:08:51.000Z 91262976 tpch10G_data_dump.tar.6 -2010-08-16T23:10:21.000Z 654 tpch10G_data_dump.tar.xml -2010-08-12T17:45:09.000Z 633579520 tpch1G_data_dump.tar -2010-08-12T17:56:30.000Z 160 tpch1G_data_dump.tar.xml -2010-08-06T13:57:51.000Z 633610240 tpch1G_data_dump_innodb.tar -2010-08-06T14:07:09.000Z 174 tpch1G_data_dump_innodb.tar.xml -2010-11-28T12:20:58.000Z 886 tpch30G_data_dump.tar.xml -2010-09-14T19:16:30.000Z 1073741824 tpch30G_dump_data.tar.0 -2010-09-14T19:40:02.000Z 1073741824 tpch30G_dump_data.tar.1 -2010-09-14T20:12:22.000Z 1073741824 tpch30G_dump_data.tar.2 -2010-09-14T20:45:23.000Z 1073741824 tpch30G_dump_data.tar.3 -2010-09-14T21:14:07.000Z 1073741824 tpch30G_dump_data.tar.4 -2010-09-14T21:37:54.000Z 1073741824 tpch30G_dump_data.tar.5 -2010-09-14T21:57:02.000Z 1073741824 tpch30G_dump_data.tar.6 -2010-09-14T22:16:59.000Z 1073741824 tpch30G_dump_data.tar.7 -2010-09-14T22:36:22.000Z 1073741824 tpch30G_dump_data.tar.8 -2010-09-14T22:55:25.000Z 382511104 tpch30G_dump_data.tar.9 -2010-09-14T23:02:04.000Z 886 tpch30G_dump_data.tar.xml diff -Nru mariadb-5.5-5.5.39/storage/xtradb/buf/buf0buf.c mariadb-5.5-5.5.40/storage/xtradb/buf/buf0buf.c --- mariadb-5.5-5.5.39/storage/xtradb/buf/buf0buf.c 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/buf/buf0buf.c 2014-10-08 13:19:51.000000000 +0000 @@ -4028,6 +4028,7 @@ " because of" " a corrupt database page.\n", stderr); + ut_error; } } diff -Nru mariadb-5.5-5.5.39/storage/xtradb/CMakeLists.txt mariadb-5.5-5.5.40/storage/xtradb/CMakeLists.txt --- mariadb-5.5-5.5.39/storage/xtradb/CMakeLists.txt 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/CMakeLists.txt 2014-10-08 13:19:51.000000000 +0000 @@ -117,6 +117,25 @@ }" HAVE_IB_GCC_ATOMIC_BUILTINS_64 ) + CHECK_C_SOURCE_RUNS( + "#include + int main() + { + __sync_synchronize(); + return(0); + }" + HAVE_IB_GCC_SYNC_SYNCHRONISE + ) + CHECK_C_SOURCE_RUNS( + "#include + int main() + { + __atomic_thread_fence(__ATOMIC_ACQUIRE); + __atomic_thread_fence(__ATOMIC_RELEASE); + return(0); + }" + HAVE_IB_GCC_ATOMIC_THREAD_FENCE + ) ENDIF() IF(HAVE_IB_GCC_ATOMIC_BUILTINS) @@ -127,6 +146,14 @@ ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1) ENDIF() +IF(HAVE_IB_GCC_SYNC_SYNCHRONISE) + ADD_DEFINITIONS(-DHAVE_IB_GCC_SYNC_SYNCHRONISE=1) +ENDIF() + +IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) + ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1) +ENDIF() + # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not IF(NOT CMAKE_CROSSCOMPILING) CHECK_C_SOURCE_RUNS( @@ -205,10 +232,21 @@ return(0); } " HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) + CHECK_C_SOURCE_COMPILES( + "#include + int main() { + __machine_r_barrier(); + __machine_w_barrier(); + return(0); + }" + HAVE_IB_MACHINE_BARRIER_SOLARIS) ENDIF() IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1) ENDIF() + IF(HAVE_IB_MACHINE_BARRIER_SOLARIS) + ADD_DEFINITIONS(-DHAVE_IB_MACHINE_BARRIER_SOLARIS=1) + ENDIF() ENDIF() @@ -226,6 +264,7 @@ IF(MSVC) ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS) + ADD_DEFINITIONS(-DHAVE_WINDOWS_MM_FENCE) # Avoid "unreferenced label" warning in generated file GET_FILENAME_COMPONENT(_SRC_DIR ${CMAKE_CURRENT_LIST_FILE} PATH) diff -Nru mariadb-5.5-5.5.39/storage/xtradb/dict/dict0crea.c mariadb-5.5-5.5.40/storage/xtradb/dict/dict0crea.c --- mariadb-5.5-5.5.39/storage/xtradb/dict/dict0crea.c 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/dict/dict0crea.c 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1648,6 +1648,8 @@ ulint i; pars_info_t* info; + ut_ad(mutex_own(&(dict_sys->mutex))); + if (foreign->id == NULL) { /* Generate a new constraint id */ ulint namelen = strlen(table->name); @@ -1726,6 +1728,37 @@ "END;\n" , table, foreign, trx); + if (error == DB_SUCCESS) { + + + if (foreign->foreign_table != NULL) { + ib_rbt_t* rbt + = foreign->foreign_table->foreign_rbt; + + if (rbt == NULL) { + rbt = dict_table_init_foreign_rbt( + foreign->foreign_table); + } else { + rbt_delete(rbt, foreign->id); + } + + rbt_insert(rbt, foreign->id, &foreign); + } + + if (foreign->referenced_table != NULL) { + ib_rbt_t* rbt + = foreign->referenced_table->referenced_rbt; + + if (rbt == NULL) { + rbt = dict_table_init_referenced_rbt( + foreign->referenced_table); + } else { + rbt_delete(rbt, foreign->id); + } + rbt_insert(rbt, foreign->id, &foreign); + } + } + return(error); } @@ -1750,6 +1783,7 @@ dict_foreign_t* foreign; ulint number = start_id + 1; ulint error; + DBUG_ENTER("dict_create_add_foreigns_to_dictionary"); ut_ad(mutex_own(&(dict_sys->mutex))); @@ -1758,7 +1792,7 @@ "InnoDB: table SYS_FOREIGN not found" " in internal data dictionary\n"); - return(DB_ERROR); + DBUG_RETURN(DB_ERROR); } for (foreign = UT_LIST_GET_FIRST(table->foreign_list); @@ -1770,9 +1804,9 @@ if (error != DB_SUCCESS) { - return(error); + DBUG_RETURN(error); } } - return(DB_SUCCESS); + DBUG_RETURN(DB_SUCCESS); } diff -Nru mariadb-5.5-5.5.39/storage/xtradb/dict/dict0dict.c mariadb-5.5-5.5.40/storage/xtradb/dict/dict0dict.c --- mariadb-5.5-5.5.39/storage/xtradb/dict/dict0dict.c 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/dict/dict0dict.c 2014-10-08 13:19:51.000000000 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -26,6 +26,7 @@ #include #include "dict0dict.h" +#include "ut0rbt.h" #ifdef UNIV_NONINL #include "dict0dict.ic" @@ -193,6 +194,7 @@ /* mutex protecting the foreign and unique error buffers */ UNIV_INTERN mutex_t dict_foreign_err_mutex; #endif /* !UNIV_HOTBACKUP */ + /******************************************************************//** Makes all characters in a NUL-terminated UTF-8 string lower case. */ UNIV_INTERN @@ -1110,6 +1112,10 @@ UT_LIST_INIT(table->referenced_list); + if (table->referenced_rbt != NULL) { + rbt_clear(table->referenced_rbt); + } + return(TRUE); } @@ -1120,6 +1126,15 @@ foreign = UT_LIST_GET_FIRST(table->foreign_list); while (foreign != NULL) { + + /* The id will be changed. So remove old one */ + rbt_delete(foreign->foreign_table->foreign_rbt, foreign->id); + + if (foreign->referenced_table) { + rbt_delete(foreign->referenced_table->referenced_rbt, + foreign->id); + } + if (ut_strlen(foreign->foreign_table_name) < ut_strlen(table->name)) { /* Allocate a longer name buffer; @@ -1267,6 +1282,14 @@ mem_free(old_id); } + rbt_insert(foreign->foreign_table->foreign_rbt, + foreign->id, &foreign); + + if (foreign->referenced_table) { + rbt_insert(foreign->referenced_table->referenced_rbt, + foreign->id, &foreign); + } + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); } @@ -2614,22 +2637,40 @@ /*===========================*/ dict_foreign_t* foreign) /*!< in, own: foreign constraint */ { + DBUG_ENTER("dict_foreign_remove_from_cache"); + ut_ad(mutex_own(&(dict_sys->mutex))); ut_a(foreign); if (foreign->referenced_table) { + ib_rbt_t* rbt; + UT_LIST_REMOVE(referenced_list, foreign->referenced_table->referenced_list, foreign); + + rbt = foreign->referenced_table->referenced_rbt; + if (rbt != NULL) { + rbt_delete(rbt, foreign->id); + } } if (foreign->foreign_table) { + ib_rbt_t* rbt; + UT_LIST_REMOVE(foreign_list, foreign->foreign_table->foreign_list, foreign); + rbt = foreign->foreign_table->foreign_rbt; + + if (rbt != NULL) { + rbt_delete(rbt, foreign->id); + } } dict_foreign_free(foreign); + + DBUG_VOID_RETURN; } /**********************************************************************//** @@ -2643,33 +2684,36 @@ dict_table_t* table, /*!< in: table object */ const char* id) /*!< in: foreign constraint id */ { - dict_foreign_t* foreign; + const ib_rbt_node_t* node; - ut_ad(mutex_own(&(dict_sys->mutex))); - - foreign = UT_LIST_GET_FIRST(table->foreign_list); + DBUG_ENTER("dict_foreign_find"); - while (foreign) { - if (ut_strcmp(id, foreign->id) == 0) { + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(dict_table_check_foreign_keys(table)); - return(foreign); + if (table->foreign_rbt != NULL) { + ut_a(UT_LIST_GET_LEN(table->foreign_list) + == rbt_size(table->foreign_rbt)); + node = rbt_lookup(table->foreign_rbt, id); + if (node != NULL) { + DBUG_RETURN(*(dict_foreign_t**) node->value); } - - foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } else { + ut_a(UT_LIST_GET_LEN(table->foreign_list) == 0); } - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign) { - if (ut_strcmp(id, foreign->id) == 0) { - - return(foreign); + if (table->referenced_rbt != NULL) { + ut_a(UT_LIST_GET_LEN(table->referenced_list) + == rbt_size(table->referenced_rbt)); + node = rbt_lookup(table->referenced_rbt, id); + if (node != NULL) { + DBUG_RETURN(*(dict_foreign_t**) node->value); } - - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } else { + ut_a(UT_LIST_GET_LEN(table->referenced_list) == 0); } - return(NULL); + DBUG_RETURN(NULL); } /*********************************************************************//** @@ -2907,6 +2951,8 @@ ibool added_to_referenced_list= FALSE; FILE* ef = dict_foreign_err_file; + DBUG_ENTER("dict_foreign_add_to_cache"); + ut_ad(mutex_own(&(dict_sys->mutex))); for_table = dict_table_check_if_in_cache_low( @@ -2916,7 +2962,14 @@ foreign->referenced_table_name_lookup); ut_a(for_table || ref_table); + if (ref_table != NULL && ref_table->referenced_rbt == NULL) { + dict_table_init_referenced_rbt(ref_table); + } + if (for_table) { + if (for_table->foreign_rbt == NULL) { + dict_table_init_foreign_rbt(for_table); + } for_in_cache = dict_foreign_find(for_table, foreign->id); } @@ -2953,18 +3006,22 @@ mem_heap_free(foreign->heap); } - return(DB_CANNOT_ADD_CONSTRAINT); + DBUG_RETURN(DB_CANNOT_ADD_CONSTRAINT); } for_in_cache->referenced_table = ref_table; for_in_cache->referenced_index = index; + UT_LIST_ADD_LAST(referenced_list, - ref_table->referenced_list, - for_in_cache); + ref_table->referenced_list, for_in_cache); added_to_referenced_list = TRUE; + + rbt_insert(ref_table->referenced_rbt, + for_in_cache->id, &for_in_cache); } if (for_in_cache->foreign_table == NULL && for_table) { + index = dict_foreign_find_index( for_table, for_in_cache->foreign_col_names, @@ -2993,22 +3050,28 @@ referenced_list, ref_table->referenced_list, for_in_cache); + rbt_delete(ref_table->referenced_rbt, + for_in_cache->id); } mem_heap_free(foreign->heap); } - return(DB_CANNOT_ADD_CONSTRAINT); + DBUG_RETURN(DB_CANNOT_ADD_CONSTRAINT); } for_in_cache->foreign_table = for_table; for_in_cache->foreign_index = index; + UT_LIST_ADD_LAST(foreign_list, for_table->foreign_list, for_in_cache); + + rbt_insert(for_table->foreign_rbt, for_in_cache->id, + &for_in_cache); } - return(DB_SUCCESS); + DBUG_RETURN(DB_SUCCESS); } #endif /* !UNIV_HOTBACKUP */ @@ -5857,6 +5920,11 @@ ut_ad(space_id > 0); + if (dict_sys == NULL) { + /* This could happen when it's in redo processing. */ + return(NULL); + } + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); num_item = UT_LIST_GET_LEN(dict_sys->table_LRU); diff -Nru mariadb-5.5-5.5.39/storage/xtradb/dict/dict0load.c mariadb-5.5-5.5.40/storage/xtradb/dict/dict0load.c --- mariadb-5.5-5.5.39/storage/xtradb/dict/dict0load.c 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/dict/dict0load.c 2014-10-08 13:19:52.000000000 +0000 @@ -1833,6 +1833,8 @@ const char* err_msg; mtr_t mtr; + DBUG_ENTER("dict_load_table"); + ut_ad(mutex_own(&(dict_sys->mutex))); heap = mem_heap_create(32000); @@ -1866,7 +1868,7 @@ mtr_commit(&mtr); mem_heap_free(heap); - return(NULL); + DBUG_RETURN(NULL); } field = rec_get_nth_field_old(rec, 0, &len); @@ -2028,8 +2030,8 @@ #endif /* 0 */ func_exit: mem_heap_free(heap); - - return(table); + ut_ad(table == NULL || dict_table_check_foreign_keys(table)); + DBUG_RETURN(table); } /***********************************************************************//** @@ -2271,6 +2273,8 @@ dict_table_t* for_table; dict_table_t* ref_table; + DBUG_ENTER("dict_load_foreign"); + ut_ad(mutex_own(&(dict_sys->mutex))); heap2 = mem_heap_create(1000); @@ -2303,7 +2307,7 @@ mtr_commit(&mtr); mem_heap_free(heap2); - return(DB_ERROR); + DBUG_RETURN(DB_ERROR); } field = rec_get_nth_field_old(rec, 0, &len); @@ -2319,7 +2323,7 @@ mtr_commit(&mtr); mem_heap_free(heap2); - return(DB_ERROR); + DBUG_RETURN(DB_ERROR); } /* Read the table names and the number of columns associated @@ -2416,7 +2420,7 @@ a new foreign key constraint but loading one from the data dictionary. */ - return(dict_foreign_add_to_cache(foreign, check_charsets, ignore_err)); + DBUG_RETURN(dict_foreign_add_to_cache(foreign, check_charsets, ignore_err)); } /***********************************************************************//** @@ -2451,6 +2455,8 @@ ulint err; mtr_t mtr; + DBUG_ENTER("dict_load_foreigns"); + ut_ad(mutex_own(&(dict_sys->mutex))); sys_foreign = dict_table_get_low("SYS_FOREIGN", DICT_ERR_IGNORE_NONE); @@ -2462,7 +2468,7 @@ "InnoDB: Error: no foreign key system tables" " in the database\n"); - return(DB_ERROR); + DBUG_RETURN(DB_ERROR); } ut_a(!dict_table_is_comp(sys_foreign)); @@ -2542,7 +2548,7 @@ if (err != DB_SUCCESS) { btr_pcur_close(&pcur); - return(err); + DBUG_RETURN(err); } mtr_start(&mtr); @@ -2571,5 +2577,74 @@ goto start_load; } - return(DB_SUCCESS); + DBUG_RETURN(DB_SUCCESS); +} + +/********************************************************************//** +Check if dict_table_t::foreign_rbt and dict_table::foreign_list +contain the same set of foreign key objects; and check if +dict_table_t::referenced_rbt and dict_table::referenced_list contain +the same set of foreign key objects. +@return TRUE if correct, FALSE otherwise. */ +ibool +dict_table_check_foreign_keys( +/*==========================*/ + const dict_table_t* table) /* in: table object to check */ +{ + dict_foreign_t* foreign; + const ib_rbt_node_t* node; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + if (table->foreign_rbt == NULL) { + + if (UT_LIST_GET_LEN(table->foreign_list) > 0) { + return(FALSE); + } + + } else { + + if (UT_LIST_GET_LEN(table->foreign_list) + != rbt_size(table->foreign_rbt)) { + return(FALSE); + } + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + while (foreign != NULL) { + + node = rbt_lookup(table->foreign_rbt, foreign->id); + if (node == NULL) { + return(FALSE); + } + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + } + + if (table->referenced_rbt == NULL ) { + + if (UT_LIST_GET_LEN(table->referenced_list) > 0) { + return(FALSE); + } + + } else { + + if (UT_LIST_GET_LEN(table->referenced_list) + != rbt_size(table->referenced_rbt)) { + return(FALSE); + } + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign != NULL) { + + node = rbt_lookup(table->referenced_rbt, foreign->id); + if (node == NULL) { + return(FALSE); + } + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + } + + return(TRUE); } diff -Nru mariadb-5.5-5.5.39/storage/xtradb/dict/dict0mem.c mariadb-5.5-5.5.40/storage/xtradb/dict/dict0mem.c --- mariadb-5.5-5.5.39/storage/xtradb/dict/dict0mem.c 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/dict/dict0mem.c 2014-10-08 13:19:52.000000000 +0000 @@ -66,6 +66,7 @@ { dict_table_t* table; mem_heap_t* heap; + DBUG_ENTER("dict_mem_table_create"); ut_ad(name); ut_a(!(flags & (~0 << DICT_TF2_BITS))); @@ -100,8 +101,11 @@ table->is_corrupt = FALSE; #endif /* !UNIV_HOTBACKUP */ + table->foreign_rbt = NULL; + table->referenced_rbt = NULL; + ut_d(table->magic_n = DICT_TABLE_MAGIC_N); - return(table); + DBUG_RETURN(table); } /****************************************************************//** @@ -120,6 +124,15 @@ #ifndef UNIV_HOTBACKUP mutex_free(&(table->autoinc_mutex)); #endif /* UNIV_HOTBACKUP */ + + if (table->foreign_rbt != NULL) { + rbt_free(table->foreign_rbt); + } + + if (table->referenced_rbt != NULL) { + rbt_free(table->referenced_rbt); + } + ut_free(table->name); mem_heap_free(table->heap); } diff -Nru mariadb-5.5-5.5.39/storage/xtradb/handler/ha_innodb.cc mariadb-5.5-5.5.40/storage/xtradb/handler/ha_innodb.cc --- mariadb-5.5-5.5.39/storage/xtradb/handler/ha_innodb.cc 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/handler/ha_innodb.cc 2014-10-08 13:19:52.000000000 +0000 @@ -9166,16 +9166,6 @@ return(ranges + (double) rows / (double) total_rows * time_for_scan); } -UNIV_INTERN -bool -ha_innobase::is_corrupt() const -{ - if (share->ib_table) - return ((bool)share->ib_table->is_corrupt); - else - return (FALSE); -} - /*********************************************************************//** Calculates the key number used inside MySQL for an Innobase index. We will first check the "index translation table" for a match of the index to get diff -Nru mariadb-5.5-5.5.39/storage/xtradb/handler/ha_innodb.h mariadb-5.5-5.5.40/storage/xtradb/handler/ha_innodb.h --- mariadb-5.5-5.5.39/storage/xtradb/handler/ha_innodb.h 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/handler/ha_innodb.h 2014-10-08 13:19:52.000000000 +0000 @@ -141,7 +141,6 @@ double scan_time(); double read_time(uint index, uint ranges, ha_rows rows); my_bool is_fake_change_enabled(THD *thd); - bool is_corrupt() const; int write_row(uchar * buf); int update_row(const uchar * old_data, uchar * new_data); diff -Nru mariadb-5.5-5.5.39/storage/xtradb/handler/i_s.cc mariadb-5.5-5.5.40/storage/xtradb/handler/i_s.cc --- mariadb-5.5-5.5.39/storage/xtradb/handler/i_s.cc 2014-08-03 12:00:33.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/handler/i_s.cc 2014-10-08 13:19:52.000000000 +0000 @@ -43,6 +43,7 @@ #include "i_s.h" #include #include +#include extern "C" { #include "btr0pcur.h" /* for file sys_tables related info. */ @@ -2665,6 +2666,19 @@ table_name = mem_heap_strdup(heap, index->table_name); + DBUG_EXECUTE_IF("mysql_test_print_index_type", + { + char idx_type[3]; + + ut_snprintf(idx_type, + sizeof(idx_type), + "%d", + index->type); + + index_name=mem_heap_strcat(heap, + index_name, + idx_type); + };); } mutex_exit(&dict_sys->mutex); @@ -7384,12 +7398,23 @@ limit_lsn_range_from_condition(table, cond, &min_lsn, &max_lsn); } + + /* If the log tracker is running and our max_lsn > current tracked LSN, + cap the max lsn so that we don't try to read any partial runs as the + tracked LSN advances. */ + if (srv_track_changed_pages) { + ib_uint64_t tracked_lsn = log_get_tracked_lsn(); + if (max_lsn > tracked_lsn) + max_lsn = tracked_lsn; + } if (!log_online_bitmap_iterator_init(&i, min_lsn, max_lsn)) { my_error(ER_CANT_FIND_SYSTEM_REC, MYF(0)); DBUG_RETURN(1); } + DEBUG_SYNC(thd, "i_s_innodb_changed_pages_range_ready"); + while(log_online_bitmap_iterator_next(&i) && (!srv_max_changed_pages || output_rows_num < srv_max_changed_pages) && diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/dict0dict.h mariadb-5.5-5.5.40/storage/xtradb/include/dict0dict.h --- mariadb-5.5-5.5.39/storage/xtradb/include/dict0dict.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/dict0dict.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -39,6 +39,7 @@ #include "ut0rnd.h" #include "ut0byte.h" #include "trx0types.h" +#include "ut0rbt.h" #ifndef UNIV_HOTBACKUP # include "sync0sync.h" @@ -1355,6 +1356,42 @@ ulint space_id, ibool need_mutex); +/**********************************************************************//** +Compares the given foreign key identifier (the key in rb-tree) and the +foreign key identifier in the given fk object (value in rb-tree). +@return negative, 0, or positive if foreign_id is smaller, equal, +or greater than foreign_obj->id, respectively. */ +UNIV_INLINE +int +dict_foreign_rbt_cmp( +/*=================*/ + const void* foreign_id, /*!< in: the foreign key identifier + which is used as a key in rb-tree. */ + const void* foreign_obj); /*!< in: the foreign object itself + which is used as value in rb-tree. */ + +/**********************************************************************//** +Allocate the table->foreign_rbt, which stores all the foreign objects +that is available in table->foreign_list. +@return the allocated rbt object */ +UNIV_INLINE +ib_rbt_t* +dict_table_init_foreign_rbt( +/*========================*/ + dict_table_t* table); /*!< in: the table object whose + table->foreign_rbt will be initialized */ + +/**********************************************************************//** +Allocate the table->referened_rbt, which stores all the foreign objects +that is available in table->referenced_list. +@return the allocated rbt object */ +UNIV_INLINE +ib_rbt_t* +dict_table_init_referenced_rbt( +/*===========================*/ + dict_table_t* table); /*!< in: the table object whose + table->referenced_rbt will be initialized */ + #ifndef UNIV_NONINL #include "dict0dict.ic" #endif diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/dict0dict.ic mariadb-5.5-5.5.40/storage/xtradb/include/dict0dict.ic --- mariadb-5.5-5.5.39/storage/xtradb/include/dict0dict.ic 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/dict0dict.ic 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -958,3 +958,62 @@ } #endif /* !UNIV_HOTBACKUP */ +/**********************************************************************//** +Compares the given foreign key identifier (the key in rb-tree) and the +foreign key identifier in the given fk object (value in rb-tree). +@return negative, 0, or positive if foreign_id is smaller, equal, +or greater than foreign_obj->id, respectively. */ +UNIV_INLINE +int +dict_foreign_rbt_cmp( +/*=================*/ + const void* foreign_id, /*!< in: the foreign key identifier + which is used as a key in rb-tree. */ + const void* foreign_obj) /*!< in: the foreign object itself + which is used as value in rb-tree. */ +{ + return(ut_strcmp((const char*) foreign_id, + (*(dict_foreign_t**) foreign_obj)->id)); +} + +/**********************************************************************//** +Allocate the table->foreign_rbt, which stores all the foreign objects +that is available in table->foreign_list. The caller must hold the +dict_sys->mutex. +@return the allocated rbt object */ +UNIV_INLINE +ib_rbt_t* +dict_table_init_foreign_rbt( +/*========================*/ + dict_table_t* table) /*!< in: the table object whose + table->foreign_rbt will be initialized */ +{ + ut_a(table->foreign_rbt == NULL); + ut_ad(mutex_own(&(dict_sys->mutex))); + + table->foreign_rbt = rbt_create(sizeof(dict_foreign_t*), + dict_foreign_rbt_cmp); + ut_a(table->foreign_rbt != NULL); + return(table->foreign_rbt); +} + +/**********************************************************************//** +Allocate the table->referened_rbt, which stores all the foreign objects +that is available in table->referenced_list. The caller must hold the +dict_sys->mutex. +@return the allocated rbt object */ +UNIV_INLINE +ib_rbt_t* +dict_table_init_referenced_rbt( +/*===========================*/ + dict_table_t* table) /*!< in: the table object whose + table->referenced_rbt will be initialized */ +{ + ut_a(table->referenced_rbt == NULL); + ut_ad(mutex_own(&(dict_sys->mutex))); + + table->referenced_rbt = rbt_create(sizeof(dict_foreign_t*), + dict_foreign_rbt_cmp); + ut_a(table->referenced_rbt != NULL); + return(table->referenced_rbt); +} diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/dict0load.h mariadb-5.5-5.5.40/storage/xtradb/include/dict0load.h --- mariadb-5.5-5.5.39/storage/xtradb/include/dict0load.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/dict0load.h 2014-10-08 13:19:52.000000000 +0000 @@ -32,6 +32,7 @@ #include "ut0byte.h" #include "mem0mem.h" #include "btr0types.h" +#include "ut0rbt.h" /** enum that defines all 6 system table IDs */ enum dict_system_table_id { @@ -344,6 +345,17 @@ ulint* key_cols, /*!< out: KEY_COLS */ ib_uint64_t* diff_vals, /*!< out: DIFF_VALS */ ib_uint64_t* non_null_vals); /*!< out: NON_NULL_VALS */ +/********************************************************************//** +Check if dict_table_t::foreign_rbt and dict_table::foreign_list +contains the same set of foreign key objects; and check if +dict_table_t::referenced_rbt and dict_table::referenced_list contains +the same set of foreign key objects. +@return TRUE if correct, FALSE otherwise. */ +ibool +dict_table_check_foreign_keys( +/*==========================*/ + const dict_table_t* table); /* in: table object to check */ + #ifndef UNIV_NONINL #include "dict0load.ic" #endif diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/dict0mem.h mariadb-5.5-5.5.40/storage/xtradb/include/dict0mem.h --- mariadb-5.5-5.5.39/storage/xtradb/include/dict0mem.h 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/dict0mem.h 2014-10-08 13:19:52.000000000 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -43,6 +43,7 @@ #include "ut0byte.h" #include "hash0hash.h" #include "trx0types.h" +#include "ut0rbt.h" /** Type flags of an index: OR'ing of the flags is allowed to define a combination of types */ @@ -510,7 +511,6 @@ #define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */ /* @} */ - /** Data structure for a database table. Most fields will be initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_struct{ @@ -562,6 +562,14 @@ UT_LIST_BASE_NODE_T(dict_foreign_t) referenced_list;/*!< list of foreign key constraints which refer to this table */ + + ib_rbt_t* foreign_rbt; /*!< a rb-tree of all foreign keys + listed in foreign_list, sorted by + foreign->id */ + ib_rbt_t* referenced_rbt; /*!< a rb-tree of all foreign keys + listed in referenced_list, sorted by + foreign->id */ + UT_LIST_NODE_T(dict_table_t) table_LRU; /*!< node of the LRU list of tables */ ulint n_mysql_handles_opened; diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/log0log.h mariadb-5.5-5.5.40/storage/xtradb/include/log0log.h --- mariadb-5.5-5.5.39/storage/xtradb/include/log0log.h 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/log0log.h 2014-10-08 13:19:52.000000000 +0000 @@ -162,6 +162,13 @@ ib_uint64_t log_get_lsn(void); /*=============*/ +/************************************************************//** +Gets the current lsn. +@return current lsn */ +UNIV_INLINE +lsn_t +log_get_lsn_nowait(void); +/*=============*/ /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. @@ -596,6 +603,18 @@ log_mem_free(void); /*==============*/ +/****************************************************************//** +Safely reads the log_sys->tracked_lsn value. Uses atomic operations +if available, otherwise this field is protected with the log system +mutex. The writer counterpart function is log_set_tracked_lsn() in +log0online.c. + +@return log_sys->tracked_lsn value. */ +UNIV_INLINE +ib_uint64_t +log_get_tracked_lsn(void); +/*=====================*/ + extern log_t* log_sys; /* Values used as flags */ diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/log0log.ic mariadb-5.5-5.5.40/storage/xtradb/include/log0log.ic --- mariadb-5.5-5.5.39/storage/xtradb/include/log0log.ic 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/log0log.ic 2014-10-08 13:19:52.000000000 +0000 @@ -426,6 +426,26 @@ return(lsn); } +/************************************************************//** +Gets the current lsn with a trylock +@return current lsn or 0 if false*/ +UNIV_INLINE +lsn_t +log_get_lsn_nowait(void) +/*=============*/ +{ + lsn_t lsn; + + if (mutex_enter_nowait(&(log_sys->mutex))) + return 0; + + lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + return(lsn); +} + /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. @@ -459,3 +479,24 @@ } } #endif /* !UNIV_HOTBACKUP */ + +/****************************************************************//** +Safely reads the log_sys->tracked_lsn value. Uses atomic operations +if available, otherwise this field is protected with the log system +mutex. The writer counterpart function is log_set_tracked_lsn() in +log0online.c. + +@return log_sys->tracked_lsn value. */ +UNIV_INLINE +ib_uint64_t +log_get_tracked_lsn(void) +/*=====================*/ +{ +#ifdef HAVE_ATOMIC_BUILTINS_64 + return os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); +#else + ut_ad(mutex_own(&(log_sys->mutex))); + return log_sys->tracked_lsn; +#endif +} + diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/os0sync.h mariadb-5.5-5.5.40/storage/xtradb/include/os0sync.h --- mariadb-5.5-5.5.39/storage/xtradb/include/os0sync.h 2014-08-03 12:00:39.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/os0sync.h 2014-10-08 13:19:52.000000000 +0000 @@ -317,6 +317,9 @@ # define os_atomic_test_and_set_byte(ptr, new_val) \ __sync_lock_test_and_set(ptr, (byte) new_val) +# define os_atomic_lock_release_byte(ptr) \ + __sync_lock_release(ptr) + #elif defined(HAVE_IB_SOLARIS_ATOMICS) # define HAVE_ATOMIC_BUILTINS @@ -374,6 +377,9 @@ # define os_atomic_test_and_set_byte(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) +# define os_atomic_lock_release_byte(ptr) \ + (void) atomic_swap_uchar(ptr, 0) + #elif defined(HAVE_WINDOWS_ATOMICS) # define HAVE_ATOMIC_BUILTINS @@ -436,6 +442,57 @@ "Mutexes and rw_locks use InnoDB's own implementation" #endif +/** barrier definitions for memory ordering */ +#ifdef HAVE_IB_GCC_ATOMIC_THREAD_FENCE +# define HAVE_MEMORY_BARRIER +# define os_rmb __atomic_thread_fence(__ATOMIC_ACQUIRE) +# define os_wmb __atomic_thread_fence(__ATOMIC_RELEASE) +#ifdef __powerpc__ +# define os_isync __asm __volatile ("isync":::"memory") +#else +#define os_isync do { } while(0) +#endif + +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "GCC builtin __atomic_thread_fence() is used for memory barrier" + +#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE) +# define HAVE_MEMORY_BARRIER +# define os_rmb __sync_synchronize() +# define os_wmb __sync_synchronize() +# define os_isync __sync_synchronize() +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "GCC builtin __sync_synchronize() is used for memory barrier" + +#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS) +# define HAVE_MEMORY_BARRIER +# include +# define os_rmb __machine_r_barrier() +# define os_wmb __machine_w_barrier() +# define os_isync os_rmb; os_wmb +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Soralis memory ordering functions are used for memory barrier" + +#elif defined(HAVE_WINDOWS_MM_FENCE) +# define HAVE_MEMORY_BARRIER +# include +# define os_rmb _mm_lfence() +# define os_wmb _mm_sfence() +# define os_isync os_rmb; os_wmb +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "_mm_lfence() and _mm_sfence() are used for memory barrier" + +# define os_atomic_lock_release_byte(ptr) \ + (void) InterlockedExchange(ptr, 0) + +#else +# define os_rmb do { } while(0) +# define os_wmb do { } while(0) +# define os_isync do { } while(0) +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Memory barrier is not used" +#endif + #ifndef UNIV_NONINL #include "os0sync.ic" #endif diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/sync0rw.h mariadb-5.5-5.5.40/storage/xtradb/include/sync0rw.h --- mariadb-5.5-5.5.39/storage/xtradb/include/sync0rw.h 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/sync0rw.h 2014-10-08 13:19:52.000000000 +0000 @@ -70,14 +70,8 @@ #ifdef UNIV_SYNC_DEBUG /* The global mutex which protects debug info lists of all rw-locks. To modify the debug info list of an rw-lock, this mutex has to be - acquired in addition to the mutex protecting the lock. */ -extern mutex_t rw_lock_debug_mutex; -extern os_event_t rw_lock_debug_event; /*!< If deadlock detection does - not get immediately the mutex it - may wait for this event */ -extern ibool rw_lock_debug_waiters; /*!< This is set to TRUE, if - there may be waiters for the event */ +extern os_fast_mutex_t rw_lock_debug_mutex; #endif /* UNIV_SYNC_DEBUG */ /** number of spin waits on rw-latches, diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/sync0rw.ic mariadb-5.5-5.5.40/storage/xtradb/include/sync0rw.ic --- mariadb-5.5-5.5.39/storage/xtradb/include/sync0rw.ic 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/sync0rw.ic 2014-10-08 13:19:52.000000000 +0000 @@ -200,14 +200,14 @@ ulint amount) /*!< in: amount to decrement */ { #ifdef INNODB_RW_LOCKS_USE_ATOMICS - lint local_lock_word = lock->lock_word; - while (local_lock_word > 0) { + lint local_lock_word; + os_rmb; + while ((local_lock_word= lock->lock_word) > 0) { if (os_compare_and_swap_lint(&lock->lock_word, local_lock_word, local_lock_word - amount)) { return(TRUE); } - local_lock_word = lock->lock_word; } return(FALSE); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/sync0sync.ic mariadb-5.5-5.5.40/storage/xtradb/include/sync0sync.ic --- mariadb-5.5-5.5.39/storage/xtradb/include/sync0sync.ic 2014-08-03 12:00:40.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/sync0sync.ic 2014-10-08 13:19:52.000000000 +0000 @@ -92,6 +92,7 @@ ut_a(mutex->lock_word == 0); mutex->lock_word = 1; + os_wmb; } return((byte)ret); @@ -108,10 +109,7 @@ mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) - /* In theory __sync_lock_release should be used to release the lock. - Unfortunately, it does not work properly alone. The workaround is - that more conservative __sync_lock_test_and_set is used instead. */ - os_atomic_test_and_set_byte(&mutex->lock_word, 0); + os_atomic_lock_release_byte(&mutex->lock_word); #else mutex->lock_word = 0; @@ -147,6 +145,7 @@ ptr = &(mutex->waiters); + os_rmb; return(*ptr); /* Here we assume that the read of a single word from memory is atomic */ } @@ -181,6 +180,7 @@ to wake up possible hanging threads if they are missed in mutex_signal_object. */ + os_isync; if (mutex_get_waiters(mutex) != 0) { mutex_signal_object(mutex); diff -Nru mariadb-5.5-5.5.39/storage/xtradb/include/univ.i mariadb-5.5-5.5.40/storage/xtradb/include/univ.i --- mariadb-5.5-5.5.39/storage/xtradb/include/univ.i 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/include/univ.i 2014-10-08 13:19:51.000000000 +0000 @@ -64,10 +64,10 @@ (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR) #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 35.2 +#define PERCONA_INNODB_VERSION 36.1 #endif -#define INNODB_VERSION_STR "5.5.38-MariaDB-" IB_TO_STR(PERCONA_INNODB_VERSION) +#define INNODB_VERSION_STR "5.5.40-MariaDB-" IB_TO_STR(PERCONA_INNODB_VERSION) #define REFMAN "http://dev.mysql.com/doc/refman/" \ IB_TO_STR(MYSQL_MAJOR_VERSION) "." \ diff -Nru mariadb-5.5-5.5.39/storage/xtradb/log/log0log.c mariadb-5.5-5.5.40/storage/xtradb/log/log0log.c --- mariadb-5.5-5.5.39/storage/xtradb/log/log0log.c 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/log/log0log.c 2014-10-08 13:19:52.000000000 +0000 @@ -223,25 +223,6 @@ } /****************************************************************//** -Safely reads the log_sys->tracked_lsn value. Uses atomic operations -if available, otherwise this field is protected with the log system -mutex. The writer counterpart function is log_set_tracked_lsn() in -log0online.c. - -@return log_sys->tracked_lsn value. */ -UNIV_INLINE -ib_uint64_t -log_get_tracked_lsn() -{ -#ifdef HAVE_ATOMIC_BUILTINS_64 - return os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); -#else - ut_ad(mutex_own(&(log_sys->mutex))); - return log_sys->tracked_lsn; -#endif -} - -/****************************************************************//** Checks if the log groups have a big enough margin of free space in so that a new log entry can be written without overwriting log data that is not read by the changed page bitmap thread. diff -Nru mariadb-5.5-5.5.39/storage/xtradb/log/log0online.c mariadb-5.5-5.5.40/storage/xtradb/log/log0online.c --- mariadb-5.5-5.5.39/storage/xtradb/log/log0online.c 2014-08-03 12:00:43.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/log/log0online.c 2014-10-08 13:19:52.000000000 +0000 @@ -1202,6 +1202,9 @@ bmp_tree_node = (ib_rbt_node_t*) rbt_next(log_bmp_sys->modified_pages, bmp_tree_node); + + DBUG_EXECUTE_IF("bitmap_page_2_write_error", + DBUG_SET("+d,bitmap_page_write_error");); } rbt_reset(log_bmp_sys->modified_pages); @@ -1265,6 +1268,7 @@ /*********************************************************************//** Diagnose a bitmap file range setup failure and free the partially-initialized bitmap file range. */ +UNIV_COLD static void log_online_diagnose_inconsistent_dir( @@ -1444,26 +1448,30 @@ return FALSE; } -#ifdef UNIV_DEBUG - if (!bitmap_files->files[0].seq_num) { + if (!bitmap_files->files[0].seq_num + || bitmap_files->files[0].seq_num != first_file_seq_num) { log_online_diagnose_inconsistent_dir(bitmap_files); return FALSE; } - ut_ad(bitmap_files->files[0].seq_num == first_file_seq_num); + { size_t i; for (i = 1; i < bitmap_files->count; i++) { if (!bitmap_files->files[i].seq_num) { break; } - ut_ad(bitmap_files->files[i].seq_num - > bitmap_files->files[i - 1].seq_num); - ut_ad(bitmap_files->files[i].start_lsn - >= bitmap_files->files[i - 1].start_lsn); + if ((bitmap_files->files[i].seq_num + <= bitmap_files->files[i - 1].seq_num) + || (bitmap_files->files[i].start_lsn + < bitmap_files->files[i - 1].start_lsn)) { + + log_online_diagnose_inconsistent_dir( + bitmap_files); + return FALSE; + } } } -#endif return TRUE; } @@ -1590,6 +1598,17 @@ { ut_a(i); + if (UNIV_UNLIKELY(min_lsn > max_lsn)) { + + /* Empty range */ + i->in_files.count = 0; + i->in_files.files = NULL; + i->in.file = os_file_invalid; + i->page = NULL; + i->failed = FALSE; + return TRUE; + } + if (!log_online_setup_bitmap_file_range(&i->in_files, min_lsn, max_lsn)) { diff -Nru mariadb-5.5-5.5.39/storage/xtradb/os/os0stacktrace.c mariadb-5.5-5.5.40/storage/xtradb/os/os0stacktrace.c --- mariadb-5.5-5.5.39/storage/xtradb/os/os0stacktrace.c 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/os/os0stacktrace.c 2014-10-08 13:19:52.000000000 +0000 @@ -85,7 +85,7 @@ caller_address = (void*) uc->uc_mcontext.gregs[REG_RIP] ; #elif defined(__hppa__) ucontext_t* uc = (ucontext_t*) ucontext; - caller_address = (void*) uc->uc_mcontext.sc_iaoq[0] & ~0x3UL ; + caller_address = (void*) (uc->uc_mcontext.sc_iaoq[0] & ~0x3UL) ; #elif (defined (__ppc__)) || (defined (__powerpc__)) ucontext_t* uc = (ucontext_t*) ucontext; caller_address = (void*) uc->uc_mcontext.regs->nip ; diff -Nru mariadb-5.5-5.5.39/storage/xtradb/row/row0ins.c mariadb-5.5-5.5.40/storage/xtradb/row/row0ins.c --- mariadb-5.5-5.5.39/storage/xtradb/row/row0ins.c 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/row/row0ins.c 2014-10-08 13:19:51.000000000 +0000 @@ -1714,7 +1714,7 @@ do { const rec_t* rec = btr_pcur_get_rec(&pcur); const buf_block_t* block = btr_pcur_get_block(&pcur); - ulint lock_type; + const ulint lock_type = LOCK_ORDINARY; if (page_rec_is_infimum(rec)) { @@ -1724,16 +1724,6 @@ offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - /* If the transaction isolation level is no stronger than - READ COMMITTED, then avoid gap locks. */ - if (!page_rec_is_supremum(rec) - && thr_get_trx(thr)->isolation_level - <= TRX_ISO_READ_COMMITTED) { - lock_type = LOCK_REC_NOT_GAP; - } else { - lock_type = LOCK_ORDINARY; - } - if (allow_duplicates) { /* If the SQL-query will update or replace diff -Nru mariadb-5.5-5.5.39/storage/xtradb/srv/srv0srv.c mariadb-5.5-5.5.40/storage/xtradb/srv/srv0srv.c --- mariadb-5.5-5.5.39/storage/xtradb/srv/srv0srv.c 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/srv/srv0srv.c 2014-10-08 13:19:52.000000000 +0000 @@ -440,7 +440,12 @@ UNIV_INTERN ulint srv_dict_size_limit = 0; /*-------------------------------------------*/ +#ifdef HAVE_MEMORY_BARRIER +/* No idea to wait long with memory barriers */ +UNIV_INTERN ulong srv_n_spin_wait_rounds = 15; +#else UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; +#endif UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; UNIV_INTERN ulong srv_thread_sleep_delay = 10000; UNIV_INTERN ulong srv_spin_wait_delay = 6; @@ -2929,9 +2934,10 @@ /* Try to track a strange bug reported by Harald Fuchs and others, where the lsn seems to decrease at times */ - new_lsn = log_get_lsn(); + /* We have to use nowait to ensure we don't block */ + new_lsn= log_get_lsn_nowait(); - if (new_lsn < old_lsn) { + if (new_lsn && new_lsn < old_lsn) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Error: old log sequence number %llu" @@ -2943,7 +2949,8 @@ ut_ad(0); } - old_lsn = new_lsn; + if (new_lsn) + old_lsn = new_lsn; if (difftime(time(NULL), srv_last_monitor_time) > 60) { /* We referesh InnoDB Monitor values so that averages are diff -Nru mariadb-5.5-5.5.39/storage/xtradb/sync/sync0arr.c mariadb-5.5-5.5.40/storage/xtradb/sync/sync0arr.c --- mariadb-5.5-5.5.39/storage/xtradb/sync/sync0arr.c 2014-08-03 12:00:42.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/sync/sync0arr.c 2014-10-08 13:19:52.000000000 +0000 @@ -815,6 +815,7 @@ lock = cell->wait_object; + os_rmb; if (lock->lock_word > 0) { /* Either unlocked or only read locked. */ @@ -826,6 +827,7 @@ lock = cell->wait_object; /* lock_word == 0 means all readers have left */ + os_rmb; if (lock->lock_word == 0) { return(TRUE); @@ -834,6 +836,7 @@ lock = cell->wait_object; /* lock_word > 0 means no writer or reserved writer */ + os_rmb; if (lock->lock_word > 0) { return(TRUE); diff -Nru mariadb-5.5-5.5.39/storage/xtradb/sync/sync0rw.c mariadb-5.5-5.5.40/storage/xtradb/sync/sync0rw.c --- mariadb-5.5-5.5.39/storage/xtradb/sync/sync0rw.c 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/sync/sync0rw.c 2014-10-08 13:19:52.000000000 +0000 @@ -40,6 +40,7 @@ #include "srv0srv.h" #include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */ #include "ha_prototypes.h" +#include "my_cpu.h" /* IMPLEMENTATION OF THE RW_LOCK @@ -179,18 +180,12 @@ To modify the debug info list of an rw-lock, this mutex has to be acquired in addition to the mutex protecting the lock. */ -UNIV_INTERN mutex_t rw_lock_debug_mutex; +UNIV_INTERN os_fast_mutex_t rw_lock_debug_mutex; # ifdef UNIV_PFS_MUTEX UNIV_INTERN mysql_pfs_key_t rw_lock_debug_mutex_key; # endif -/* If deadlock detection does not get immediately the mutex, -it may wait for this event */ -UNIV_INTERN os_event_t rw_lock_debug_event; -/* This is set to TRUE, if there may be waiters for the event */ -UNIV_INTERN ibool rw_lock_debug_waiters; - /******************************************************************//** Creates a debug info struct. */ static @@ -390,15 +385,19 @@ lock_loop: /* Spin waiting for the writer field to become free */ + os_rmb; + HMT_low(); while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } i++; + os_rmb; } - if (i == SYNC_SPIN_ROUNDS) { + HMT_medium(); + if (lock->lock_word <= 0) { os_thread_yield(); } @@ -495,16 +494,19 @@ ulint index; ulint i = 0; + os_rmb; ut_ad(lock->lock_word <= 0); - + HMT_low(); while (lock->lock_word < 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } if(i < SYNC_SPIN_ROUNDS) { i++; + os_rmb; continue; } + HMT_medium(); /* If there is still a reader, then go to sleep.*/ rw_x_spin_round_count += i; @@ -541,7 +543,9 @@ sync_array_free_cell(sync_primary_wait_array, index); } + HMT_low(); } + HMT_medium(); rw_x_spin_round_count += i; } @@ -579,6 +583,8 @@ file_name, line); } else { + if (!pass) + os_rmb; /* Decrement failed: relock or failed lock */ if (!pass && lock->recursive && os_thread_eq(lock->writer_thread, curr_thread)) { @@ -644,6 +650,8 @@ } /* Spin waiting for the lock_word to become free */ + os_rmb; + HMT_low(); while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { if (srv_spin_wait_delay) { @@ -652,7 +660,9 @@ } i++; + os_rmb; } + HMT_medium(); if (i == SYNC_SPIN_ROUNDS) { os_thread_yield(); } else { @@ -715,22 +725,7 @@ rw_lock_debug_mutex_enter(void) /*===========================*/ { -loop: - if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { - return; - } - - os_event_reset(rw_lock_debug_event); - - rw_lock_debug_waiters = TRUE; - - if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { - return; - } - - os_event_wait(rw_lock_debug_event); - - goto loop; + os_fast_mutex_lock(&rw_lock_debug_mutex); } /******************************************************************//** @@ -740,12 +735,7 @@ rw_lock_debug_mutex_exit(void) /*==========================*/ { - mutex_exit(&rw_lock_debug_mutex); - - if (rw_lock_debug_waiters) { - rw_lock_debug_waiters = FALSE; - os_event_set(rw_lock_debug_event); - } + os_fast_mutex_unlock(&rw_lock_debug_mutex); } /******************************************************************//** diff -Nru mariadb-5.5-5.5.39/storage/xtradb/sync/sync0sync.c mariadb-5.5-5.5.40/storage/xtradb/sync/sync0sync.c --- mariadb-5.5-5.5.39/storage/xtradb/sync/sync0sync.c 2014-08-03 12:00:38.000000000 +0000 +++ mariadb-5.5-5.5.40/storage/xtradb/sync/sync0sync.c 2014-10-08 13:19:52.000000000 +0000 @@ -44,6 +44,7 @@ # include "srv0start.h" /* srv_is_being_started */ #endif /* UNIV_SYNC_DEBUG */ #include "ha_prototypes.h" +#include "my_cpu.h" /* REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX @@ -481,6 +482,8 @@ ptr = &(mutex->waiters); + os_wmb; + *ptr = n; /* Here we assume that the write of a single word in memory is atomic */ #endif @@ -523,13 +526,15 @@ spin_loop: ut_d(mutex->count_spin_loop++); + HMT_low(); while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } - + os_rmb; // Ensure future reads sees new values i++; } + HMT_medium(); if (i == SYNC_SPIN_ROUNDS) { #ifdef UNIV_DEBUG @@ -1509,11 +1514,7 @@ SYNC_NO_ORDER_CHECK); #ifdef UNIV_SYNC_DEBUG - mutex_create(rw_lock_debug_mutex_key, &rw_lock_debug_mutex, - SYNC_NO_ORDER_CHECK); - - rw_lock_debug_event = os_event_create(NULL); - rw_lock_debug_waiters = FALSE; + os_fast_mutex_init(rw_lock_debug_mutex_key, &rw_lock_debug_mutex); #endif /* UNIV_SYNC_DEBUG */ } @@ -1581,6 +1582,7 @@ sync_order_checks_on = FALSE; sync_thread_level_arrays_free(); + os_fast_mutex_free(&rw_lock_debug_mutex); #endif /* UNIV_SYNC_DEBUG */ sync_initialized = FALSE; diff -Nru mariadb-5.5-5.5.39/strings/ctype-mb.c mariadb-5.5-5.5.40/strings/ctype-mb.c --- mariadb-5.5-5.5.39/strings/ctype-mb.c 2014-08-03 12:00:35.000000000 +0000 +++ mariadb-5.5-5.5.40/strings/ctype-mb.c 2014-10-08 13:19:52.000000000 +0000 @@ -1,4 +1,4 @@ -/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. +/* Copyright (c) 2000, 2014, Oracle and/or its affiliates. Copyright (c) 2009, 2014, SkySQL Ab. This program is free software; you can redistribute it and/or modify @@ -1043,7 +1043,7 @@ } if (*wildstr == w_many) { /* Found w_many */ - uchar cmp; + int cmp; const char* mb = wildstr; int mb_len=0; diff -Nru mariadb-5.5-5.5.39/strings/decimal.c mariadb-5.5-5.5.40/strings/decimal.c --- mariadb-5.5-5.5.39/strings/decimal.c 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/strings/decimal.c 2014-10-08 13:19:51.000000000 +0000 @@ -1,5 +1,5 @@ -/* Copyright (c) 2004, 2013, Oracle and/or its affiliates. - Copyright (c) 2009, 2011, Monty Program Ab. +/* Copyright (c) 2004, 2014, Oracle and/or its affiliates. + Copyright (c) 2009, 2014, Monty Program Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -127,7 +127,6 @@ #define DIG_BASE 1000000000 #define DIG_MAX (DIG_BASE-1) #define DIG_BASE2 ((dec2)DIG_BASE * (dec2)DIG_BASE) -#define ROUND_UP(X) (((X)+DIG_PER_DEC1-1)/DIG_PER_DEC1) static const dec1 powers10[DIG_PER_DEC1+1]={ 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000}; static const int dig2bytes[DIG_PER_DEC1+1]={0, 1, 1, 2, 2, 3, 3, 4, 4, 4}; @@ -136,6 +135,11 @@ 999900000, 999990000, 999999000, 999999900, 999999990 }; +static inline int ROUND_UP(int x) +{ + return (x + (x > 0 ? DIG_PER_DEC1 - 1 : 0)) / DIG_PER_DEC1; +} + #ifdef HAVE_valgrind #define sanity(d) DBUG_ASSERT((d)->len > 0) #else @@ -2328,7 +2332,7 @@ error=E_DEC_TRUNCATED; goto done; } - stop1=start1+frac0; + stop1= start1 + frac0 + intg0; frac0+=intg0; to->intg=0; while (intg0++ < 0) diff -Nru mariadb-5.5-5.5.39/support-files/mysql.5.5.39.spec mariadb-5.5-5.5.40/support-files/mysql.5.5.39.spec --- mariadb-5.5-5.5.39/support-files/mysql.5.5.39.spec 2014-08-03 12:00:45.000000000 +0000 +++ mariadb-5.5-5.5.40/support-files/mysql.5.5.39.spec 1970-01-01 00:00:00.000000000 +0000 @@ -1,2057 +0,0 @@ -# Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; see the file COPYING. If not, write to the -# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston -# MA 02110-1301 USA. - -############################################################################## -# Some common macro definitions -############################################################################## - -# NOTE: "vendor" is used in upgrade/downgrade check, so you can't -# change these, has to be exactly as is. -%global mysql_old_vendor MySQL AB -%global mysql_vendor_2 Sun Microsystems, Inc. -%global mysql_vendor Oracle and/or its affiliates - -%global mysql_version 5.5.39 - -%global mysqld_user mysql -%global mysqld_group mysql -%global mysqldatadir /var/lib/mysql - -%global release 2 - - -# -# Macros we use which are not available in all supported versions of RPM -# -# - defined/undefined are missing on RHEL4 -# -%if %{expand:%{?defined:0}%{!?defined:1}} -%define defined() %{expand:%%{?%{1}:1}%%{!?%{1}:0}} -%endif -%if %{expand:%{?undefined:0}%{!?undefined:1}} -%define undefined() %{expand:%%{?%{1}:0}%%{!?%{1}:1}} -%endif - -# ---------------------------------------------------------------------------- -# RPM build tools now automatically detect Perl module dependencies. This -# detection causes problems as it is broken in some versions, and it also -# provides unwanted dependencies from mandatory scripts in our package. -# It might not be possible to disable this in all versions of RPM, but here we -# try anyway. We keep the "AutoReqProv: no" for the "test" sub package, as -# disabling here might fail, and that package has the most problems. -# See: -# http://fedoraproject.org/wiki/Packaging/Perl#Filtering_Requires:_and_Provides -# http://www.wideopen.com/archives/rpm-list/2002-October/msg00343.html -# ---------------------------------------------------------------------------- -%undefine __perl_provides -%undefine __perl_requires - -############################################################################## -# Command line handling -############################################################################## -# -# To set options: -# -# $ rpmbuild --define="option " ... -# - -# ---------------------------------------------------------------------------- -# Commercial builds -# ---------------------------------------------------------------------------- -%if %{undefined commercial} -%define commercial 0 -%endif - -# ---------------------------------------------------------------------------- -# Source name -# ---------------------------------------------------------------------------- -%if %{undefined src_base} -%define src_base mysql -%endif -%define src_dir %{src_base}-%{mysql_version} - -# ---------------------------------------------------------------------------- -# Feature set (storage engines, options). Default to community (everything) -# ---------------------------------------------------------------------------- -%if %{undefined feature_set} -%define feature_set community -%endif - -# ---------------------------------------------------------------------------- -# Server comment strings -# ---------------------------------------------------------------------------- -%if %{undefined compilation_comment_debug} -%define compilation_comment_debug MySQL Community Server - Debug (GPL) -%endif -%if %{undefined compilation_comment_release} -%define compilation_comment_release MySQL Community Server (GPL) -%endif - -# ---------------------------------------------------------------------------- -# Product and server suffixes -# ---------------------------------------------------------------------------- -%if %{undefined product_suffix} - %if %{defined short_product_tag} - %define product_suffix -%{short_product_tag} - %else - %define product_suffix %{nil} - %endif -%endif - -%if %{undefined server_suffix} -%define server_suffix %{nil} -%endif - -# ---------------------------------------------------------------------------- -# Distribution support -# ---------------------------------------------------------------------------- -%if %{undefined distro_specific} -%define distro_specific 0 -%endif -%if %{distro_specific} - %if %(test -f /etc/enterprise-release && echo 1 || echo 0) - %define oelver %(rpm -qf --qf '%%{version}\\n' /etc/enterprise-release | sed -e 's/^\\([0-9]*\\).*/\\1/g') - %if "%oelver" == "4" - %define distro_description Oracle Enterprise Linux 4 - %define distro_releasetag oel4 - %define distro_buildreq gcc-c++ gperf ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel - %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools - %else - %if "%oelver" == "5" - %define distro_description Oracle Enterprise Linux 5 - %define distro_releasetag oel5 - %define distro_buildreq gcc-c++ gperf ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel - %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools - %else - %{error:Oracle Enterprise Linux %{oelver} is unsupported} - %endif - %endif - %else - %if %(test -f /etc/oracle-release && echo 1 || echo 0) - %define elver %(rpm -qf --qf '%%{version}\\n' /etc/oracle-release | sed -e 's/^\\([0-9]*\\).*/\\1/g') - %if "%elver" == "6" - %define distro_description Oracle Linux 6 - %define distro_releasetag el6 - %define distro_buildreq gcc-c++ ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel - %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools - %else - %{error:Oracle Linux %{elver} is unsupported} - %endif - %else - %if %(test -f /etc/redhat-release && echo 1 || echo 0) - %define rhelver %(rpm -qf --qf '%%{version}\\n' /etc/redhat-release | sed -e 's/^\\([0-9]*\\).*/\\1/g') - %if "%rhelver" == "4" - %define distro_description Red Hat Enterprise Linux 4 - %define distro_releasetag rhel4 - %define distro_buildreq gcc-c++ gperf ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel - %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools - %else - %if "%rhelver" == "5" - %define distro_description Red Hat Enterprise Linux 5 - %define distro_releasetag rhel5 - %define distro_buildreq gcc-c++ gperf ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel - %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools - %else - %if "%rhelver" == "6" - %define distro_description Red Hat Enterprise Linux 6 - %define distro_releasetag rhel6 - %define distro_buildreq gcc-c++ ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel - %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools - %else - %{error:Red Hat Enterprise Linux %{rhelver} is unsupported} - %endif - %endif - %endif - %else - %if %(test -f /etc/SuSE-release && echo 1 || echo 0) - %define susever %(rpm -qf --qf '%%{version}\\n' /etc/SuSE-release | cut -d. -f1) - %if "%susever" == "10" - %define distro_description SUSE Linux Enterprise Server 10 - %define distro_releasetag sles10 - %define distro_buildreq gcc-c++ gdbm-devel gperf ncurses-devel openldap2-client readline-devel zlib-devel cmake libaio-devel - %define distro_requires aaa_base coreutils grep procps pwdutils - %else - %if "%susever" == "11" - %define distro_description SUSE Linux Enterprise Server 11 - %define distro_releasetag sles11 - %define distro_buildreq gcc-c++ gdbm-devel gperf ncurses-devel openldap2-client procps pwdutils readline-devel zlib-devel cmake libaio-devel - %define distro_requires aaa_base coreutils grep procps pwdutils - %else - %{error:SuSE %{susever} is unsupported} - %endif - %endif - %else - %{error:Unsupported distribution} - %endif - %endif - %endif - %endif -%else - %define generic_kernel %(uname -r | cut -d. -f1-2) - %define distro_description Generic Linux (kernel %{generic_kernel}) - %define distro_releasetag linux%{generic_kernel} - %define distro_buildreq gcc-c++ gperf ncurses-devel perl readline-devel time zlib-devel - %define distro_requires coreutils grep procps /sbin/chkconfig /usr/sbin/useradd /usr/sbin/groupadd -%endif - -# Avoid debuginfo RPMs, leaves binaries unstripped -%define debug_package %{nil} - -# Hack to work around bug in RHEL5 __os_install_post macro, wrong inverted -# test for __debug_package -%define __strip /bin/true - -# ---------------------------------------------------------------------------- -# Support optional "tcmalloc" library (experimental) -# ---------------------------------------------------------------------------- -%if %{defined malloc_lib_target} -%define WITH_TCMALLOC 1 -%else -%define WITH_TCMALLOC 0 -%endif - -############################################################################## -# Configuration based upon above user input, not to be set directly -############################################################################## - -%if 0%{?commercial} -%define license_files_server %{src_dir}/LICENSE.mysql -%define license_type Commercial -%else -%define license_files_server %{src_dir}/COPYING %{src_dir}/README -%define license_type GPL -%endif - -############################################################################## -# Main spec file section -############################################################################## - -Name: MySQL%{product_suffix} -Summary: MySQL: a very fast and reliable SQL database server -Group: Applications/Databases -Version: 5.5.39 -Release: %{release}%{?distro_releasetag:.%{distro_releasetag}} -Distribution: %{distro_description} -License: Copyright (c) 2000, 2014, %{mysql_vendor}. All rights reserved. Under %{license_type} license as shown in the Description field. -Source: http://www.mysql.com/Downloads/MySQL-5.5/%{src_dir}.tar.gz -URL: http://www.mysql.com/ -Packager: MySQL Release Engineering -Vendor: %{mysql_vendor} -BuildRequires: %{distro_buildreq} - -# Regression tests may take a long time, override the default to skip them -%{!?runselftest:%global runselftest 1} - -# Think about what you use here since the first step is to -# run a rm -rf -BuildRoot: %{_tmppath}/%{name}-%{version}-build - -# From the manual -%description -The MySQL(TM) software delivers a very fast, multi-threaded, multi-user, -and robust SQL (Structured Query Language) database server. MySQL Server -is intended for mission-critical, heavy-load production systems as well -as for embedding into mass-deployed software. MySQL is a trademark of -%{mysql_vendor} - -The MySQL software has Dual Licensing, which means you can use the MySQL -software free of charge under the GNU General Public License -(http://www.gnu.org/licenses/). You can also purchase commercial MySQL -licenses from %{mysql_vendor} if you do not wish to be bound by the terms of -the GPL. See the chapter "Licensing and Support" in the manual for -further info. - -The MySQL web site (http://www.mysql.com/) provides the latest -news and information about the MySQL software. Also please see the -documentation and the manual for more information. - -############################################################################## -# Sub package definition -############################################################################## - -%package -n MySQL-server%{product_suffix} -Summary: MySQL: a very fast and reliable SQL database server -Group: Applications/Databases -Requires: %{distro_requires} -%if 0%{?commercial} -Obsoletes: MySQL-server -%else -Obsoletes: MySQL-server-advanced -%endif -Obsoletes: mysql-server < %{version}-%{release} -Obsoletes: mysql-server-advanced -Obsoletes: MySQL-server-classic MySQL-server-community MySQL-server-enterprise -Obsoletes: MySQL-server-advanced-gpl MySQL-server-enterprise-gpl -Provides: mysql-server = %{version}-%{release} -Provides: mysql-server%{?_isa} = %{version}-%{release} - -%description -n MySQL-server%{product_suffix} -The MySQL(TM) software delivers a very fast, multi-threaded, multi-user, -and robust SQL (Structured Query Language) database server. MySQL Server -is intended for mission-critical, heavy-load production systems as well -as for embedding into mass-deployed software. MySQL is a trademark of -%{mysql_vendor} - -The MySQL software has Dual Licensing, which means you can use the MySQL -software free of charge under the GNU General Public License -(http://www.gnu.org/licenses/). You can also purchase commercial MySQL -licenses from %{mysql_vendor} if you do not wish to be bound by the terms of -the GPL. See the chapter "Licensing and Support" in the manual for -further info. - -The MySQL web site (http://www.mysql.com/) provides the latest news and -information about the MySQL software. Also please see the documentation -and the manual for more information. - -This package includes the MySQL server binary as well as related utilities -to run and administer a MySQL server. - -If you want to access and work with the database, you have to install -package "MySQL-client%{product_suffix}" as well! - -# ---------------------------------------------------------------------------- -%package -n MySQL-client%{product_suffix} -Summary: MySQL - Client -Group: Applications/Databases -%if 0%{?commercial} -Obsoletes: MySQL-client -%else -Obsoletes: MySQL-client-advanced -%endif -Obsoletes: mysql < %{version}-%{release} -Obsoletes: mysql-advanced < %{version}-%{release} -Obsoletes: MySQL-client-classic MySQL-client-community MySQL-client-enterprise -Obsoletes: MySQL-client-advanced-gpl MySQL-client-enterprise-gpl -Provides: mysql = %{version}-%{release} -Provides: mysql%{?_isa} = %{version}-%{release} - -%description -n MySQL-client%{product_suffix} -This package contains the standard MySQL clients and administration tools. - -For a description of MySQL see the base MySQL RPM or http://www.mysql.com/ - -# ---------------------------------------------------------------------------- -%package -n MySQL-test%{product_suffix} -Summary: MySQL - Test suite -Group: Applications/Databases -%if 0%{?commercial} -Requires: MySQL-client-advanced perl -Obsoletes: MySQL-test -%else -Requires: MySQL-client perl -Obsoletes: MySQL-test-advanced -%endif -Obsoletes: mysql-test < %{version}-%{release} -Obsoletes: mysql-test-advanced -Obsoletes: mysql-bench MySQL-bench -Obsoletes: MySQL-test-classic MySQL-test-community MySQL-test-enterprise -Obsoletes: MySQL-test-advanced-gpl MySQL-test-enterprise-gpl -Provides: mysql-test = %{version}-%{release} -Provides: mysql-test%{?_isa} = %{version}-%{release} -AutoReqProv: no - -%description -n MySQL-test%{product_suffix} -This package contains the MySQL regression test suite. - -For a description of MySQL see the base MySQL RPM or http://www.mysql.com/ - -# ---------------------------------------------------------------------------- -%package -n MySQL-devel%{product_suffix} -Summary: MySQL - Development header files and libraries -Group: Applications/Databases -%if 0%{?commercial} -Obsoletes: MySQL-devel -%else -Obsoletes: MySQL-devel-advanced -%endif -Obsoletes: mysql-devel < %{version}-%{release} -Obsoletes: mysql-embedded-devel mysql-devel-advanced mysql-embedded-devel-advanced -Obsoletes: MySQL-devel-classic MySQL-devel-community MySQL-devel-enterprise -Obsoletes: MySQL-devel-advanced-gpl MySQL-devel-enterprise-gpl -Provides: mysql-devel = %{version}-%{release} -Provides: mysql-devel%{?_isa} = %{version}-%{release} - -%description -n MySQL-devel%{product_suffix} -This package contains the development header files and libraries necessary -to develop MySQL client applications. - -For a description of MySQL see the base MySQL RPM or http://www.mysql.com/ - -# ---------------------------------------------------------------------------- -%package -n MySQL-shared%{product_suffix} -Summary: MySQL - Shared libraries -Group: Applications/Databases -%if 0%{?commercial} -Obsoletes: MySQL-shared -%else -Obsoletes: MySQL-shared-advanced -%endif -Obsoletes: MySQL-shared-standard MySQL-shared-pro -Obsoletes: MySQL-shared-pro-cert MySQL-shared-pro-gpl -Obsoletes: MySQL-shared-pro-gpl-cert -Obsoletes: MySQL-shared-classic MySQL-shared-community MySQL-shared-enterprise -Obsoletes: MySQL-shared-advanced-gpl MySQL-shared-enterprise-gpl - -%description -n MySQL-shared%{product_suffix} -This package contains the shared libraries (*.so*) which certain languages -and applications need to dynamically load and use MySQL. - -# ---------------------------------------------------------------------------- -%package -n MySQL-embedded%{product_suffix} -Summary: MySQL - Embedded library -Group: Applications/Databases -%if 0%{?commercial} -Requires: MySQL-devel-advanced -Obsoletes: MySQL-embedded -%else -Requires: MySQL-devel -Obsoletes: MySQL-embedded-advanced -%endif -Obsoletes: mysql-embedded < %{version}-%{release} -Obsoletes: mysql-embedded-advanced -Obsoletes: MySQL-embedded-pro -Obsoletes: MySQL-embedded-classic MySQL-embedded-community MySQL-embedded-enterprise -Obsoletes: MySQL-embedded-advanced-gpl MySQL-embedded-enterprise-gpl -Provides: mysql-embedded = %{version}-%{release} -Provides: mysql-emdedded%{?_isa} = %{version}-%{release} - -%description -n MySQL-embedded%{product_suffix} -This package contains the MySQL server as an embedded library. - -The embedded MySQL server library makes it possible to run a full-featured -MySQL server inside the client application. The main benefits are increased -speed and more simple management for embedded applications. - -The API is identical for the embedded MySQL version and the -client/server version. - -For a description of MySQL see the base MySQL RPM or http://www.mysql.com/ - -############################################################################## -%prep -%setup -T -a 0 -c -n %{src_dir} - -############################################################################## -%build - -# Fail quickly and obviously if user tries to build as root -%if %runselftest - if [ x"`id -u`" = x0 ]; then - echo "The MySQL regression tests may fail if run as root." - echo "If you really need to build the RPM as root, use" - echo "--define='runselftest 0' to skip the regression tests." - exit 1 - fi -%endif - -# Be strict about variables, bail at earliest opportunity, etc. -set -eu - -# Optional package files -touch optional-files-devel - -# -# Set environment in order of preference, MYSQL_BUILD_* first, then variable -# name, finally a default. RPM_OPT_FLAGS is assumed to be a part of the -# default RPM build environment. -# -# We set CXX=gcc by default to support so-called 'generic' binaries, where we -# do not have a dependancy on libgcc/libstdc++. This only works while we do -# not require C++ features such as exceptions, and may need to be removed at -# a later date. -# - -# This is a hack, $RPM_OPT_FLAGS on ia64 hosts contains flags which break -# the compile in cmd-line-utils/readline - needs investigation, but for now -# we simply unset it and use those specified directly in cmake. -%if "%{_arch}" == "ia64" -RPM_OPT_FLAGS= -%endif - -export PATH=${MYSQL_BUILD_PATH:-$PATH} -export CC=${MYSQL_BUILD_CC:-${CC:-gcc}} -export CXX=${MYSQL_BUILD_CXX:-${CXX:-gcc}} -export CFLAGS=${MYSQL_BUILD_CFLAGS:-${CFLAGS:-$RPM_OPT_FLAGS}} -export CXXFLAGS=${MYSQL_BUILD_CXXFLAGS:-${CXXFLAGS:-$RPM_OPT_FLAGS -felide-constructors -fno-exceptions -fno-rtti}} -export LDFLAGS=${MYSQL_BUILD_LDFLAGS:-${LDFLAGS:-}} -export CMAKE=${MYSQL_BUILD_CMAKE:-${CMAKE:-cmake}} -export MAKE_JFLAG=${MYSQL_BUILD_MAKE_JFLAG:-} - -# Build debug mysqld and libmysqld.a -mkdir debug -( - cd debug - # Attempt to remove any optimisation flags from the debug build - CFLAGS=`echo " ${CFLAGS} " | \ - sed -e 's/ -O[0-9]* / /' \ - -e 's/ -unroll2 / /' \ - -e 's/ -ip / /' \ - -e 's/^ //' \ - -e 's/ $//'` - CXXFLAGS=`echo " ${CXXFLAGS} " | \ - sed -e 's/ -O[0-9]* / /' \ - -e 's/ -unroll2 / /' \ - -e 's/ -ip / /' \ - -e 's/^ //' \ - -e 's/ $//'` - # XXX: MYSQL_UNIX_ADDR should be in cmake/* but mysql_version is included before - # XXX: install_layout so we can't just set it based on INSTALL_LAYOUT=RPM - ${CMAKE} ../%{src_dir} -DBUILD_CONFIG=mysql_release -DINSTALL_LAYOUT=RPM \ - -DCMAKE_BUILD_TYPE=Debug \ - -DENABLE_DTRACE=OFF \ - -DMYSQL_UNIX_ADDR="%{mysqldatadir}/mysql.sock" \ - -DFEATURE_SET="%{feature_set}" \ - -DCOMPILATION_COMMENT="%{compilation_comment_debug}" \ - -DMYSQL_SERVER_SUFFIX="%{server_suffix}" - echo BEGIN_DEBUG_CONFIG ; egrep '^#define' include/config.h ; echo END_DEBUG_CONFIG - make ${MAKE_JFLAG} VERBOSE=1 -) -# Build full release -mkdir release -( - cd release - # XXX: MYSQL_UNIX_ADDR should be in cmake/* but mysql_version is included before - # XXX: install_layout so we can't just set it based on INSTALL_LAYOUT=RPM - ${CMAKE} ../%{src_dir} -DBUILD_CONFIG=mysql_release -DINSTALL_LAYOUT=RPM \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DENABLE_DTRACE=OFF \ - -DMYSQL_UNIX_ADDR="%{mysqldatadir}/mysql.sock" \ - -DFEATURE_SET="%{feature_set}" \ - -DCOMPILATION_COMMENT="%{compilation_comment_release}" \ - -DMYSQL_SERVER_SUFFIX="%{server_suffix}" - echo BEGIN_NORMAL_CONFIG ; egrep '^#define' include/config.h ; echo END_NORMAL_CONFIG - make ${MAKE_JFLAG} VERBOSE=1 -) - -%if %runselftest - MTR_BUILD_THREAD=auto - export MTR_BUILD_THREAD - - (cd release && make test-bt-fast || true) -%endif - -############################################################################## -%install - -RBR=$RPM_BUILD_ROOT -MBD=$RPM_BUILD_DIR/%{src_dir} - -# Ensure that needed directories exists -install -d $RBR%{_sysconfdir}/{logrotate.d,init.d} -install -d $RBR%{mysqldatadir}/mysql -install -d $RBR%{_datadir}/mysql-test -install -d $RBR%{_datadir}/mysql/SELinux/RHEL4 -install -d $RBR%{_includedir} -install -d $RBR%{_libdir} -install -d $RBR%{_mandir} -install -d $RBR%{_sbindir} - -# Install all binaries -( - cd $MBD/release - make DESTDIR=$RBR install -) - -# For gcc builds, include libgcc.a in the devel subpackage (BUG 4921). Do -# this in a sub-shell to ensure we don't pollute the install environment -# with compiler bits. -( - PATH=${MYSQL_BUILD_PATH:-$PATH} - CC=${MYSQL_BUILD_CC:-${CC:-gcc}} - CFLAGS=${MYSQL_BUILD_CFLAGS:-${CFLAGS:-$RPM_OPT_FLAGS}} - if "${CC}" -v 2>&1 | grep '^gcc.version' >/dev/null 2>&1; then - libgcc=`${CC} ${CFLAGS} --print-libgcc-file` - if [ -f ${libgcc} ]; then - mkdir -p $RBR%{_libdir}/mysql - install -m 644 ${libgcc} $RBR%{_libdir}/mysql/libmygcc.a - echo "%{_libdir}/mysql/libmygcc.a" >>optional-files-devel - fi - fi -) - -# FIXME: at some point we should stop doing this and just install everything -# FIXME: directly into %{_libdir}/mysql - perhaps at the same time as renaming -# FIXME: the shared libraries to use libmysql*-$major.$minor.so syntax -mv -v $RBR/%{_libdir}/*.a $RBR/%{_libdir}/mysql/ - -# Install logrotate and autostart -install -m 644 $MBD/release/support-files/mysql-log-rotate $RBR%{_sysconfdir}/logrotate.d/mysql -install -m 755 $MBD/release/support-files/mysql.server $RBR%{_sysconfdir}/init.d/mysql - -# Create a symlink "rcmysql", pointing to the init.script. SuSE users -# will appreciate that, as all services usually offer this. -ln -s %{_sysconfdir}/init.d/mysql $RBR%{_sbindir}/rcmysql - -# Touch the place where the my.cnf config file might be located -# Just to make sure it's in the file list and marked as a config file -touch $RBR%{_sysconfdir}/my.cnf - -# Install SELinux files in datadir -install -m 600 $MBD/%{src_dir}/support-files/RHEL4-SElinux/mysql.{fc,te} \ - $RBR%{_datadir}/mysql/SELinux/RHEL4 - -%if %{WITH_TCMALLOC} -# Even though this is a shared library, put it under /usr/lib*/mysql, so it -# doesn't conflict with possible shared lib by the same name in /usr/lib*. See -# `mysql_config --variable=pkglibdir` and mysqld_safe for how this is used. -install -m 644 "%{malloc_lib_source}" \ - "$RBR%{_libdir}/mysql/%{malloc_lib_target}" -%endif - -# Remove man pages we explicitly do not want to package, avoids 'unpackaged -# files' warning. -# This has become obsolete: rm -f $RBR%{_mandir}/man1/make_win_bin_dist.1* - -############################################################################## -# Post processing actions, i.e. when installed -############################################################################## - -%pre -n MySQL-server%{product_suffix} -# This is the code running at the beginning of a RPM upgrade action, -# before replacing the old files with the new ones. - -# ATTENTION: Parts of this are duplicated in the "triggerpostun" ! - -# There are users who deviate from the default file system layout. -# Check local settings to support them. -if [ -x %{_bindir}/my_print_defaults ] -then - mysql_datadir=`%{_bindir}/my_print_defaults server mysqld | grep '^--datadir=' | sed -n 's/--datadir=//p'` - PID_FILE_PATT=`%{_bindir}/my_print_defaults server mysqld | grep '^--pid-file=' | sed -n 's/--pid-file=//p'` -fi -if [ -z "$mysql_datadir" ] -then - mysql_datadir=%{mysqldatadir} -fi -if [ -z "$PID_FILE_PATT" ] -then - PID_FILE_PATT="$mysql_datadir/*.pid" -fi - -# Check if we can safely upgrade. An upgrade is only safe if it's from one -# of our RPMs in the same version family. - -# Handle both ways of spelling the capability. -installed=`rpm -q --whatprovides mysql-server 2> /dev/null` -if [ $? -ne 0 -o -z "$installed" ]; then - installed=`rpm -q --whatprovides MySQL-server 2> /dev/null` -fi -if [ $? -eq 0 -a -n "$installed" ]; then - installed=`echo $installed | sed 's/\([^ ]*\) .*/\1/'` # Tests have shown duplicated package names - vendor=`rpm -q --queryformat='%{VENDOR}' "$installed" 2>&1` - version=`rpm -q --queryformat='%{VERSION}' "$installed" 2>&1` - myoldvendor='%{mysql_old_vendor}' - myvendor_2='%{mysql_vendor_2}' - myvendor='%{mysql_vendor}' - myversion='%{mysql_version}' - - old_family=`echo $version \ - | sed -n -e 's,^\([1-9][0-9]*\.[0-9][0-9]*\)\..*$,\1,p'` - new_family=`echo $myversion \ - | sed -n -e 's,^\([1-9][0-9]*\.[0-9][0-9]*\)\..*$,\1,p'` - - [ -z "$vendor" ] && vendor='' - [ -z "$old_family" ] && old_family="" - [ -z "$new_family" ] && new_family="" - - error_text= - if [ "$vendor" != "$myoldvendor" \ - -a "$vendor" != "$myvendor_2" \ - -a "$vendor" != "$myvendor" ]; then - error_text="$error_text -The current MySQL server package is provided by a different -vendor ($vendor) than $myoldvendor, $myvendor_2, or $myvendor. -Some files may be installed to different locations, including log -files and the service startup script in %{_sysconfdir}/init.d/. -" - fi - - if [ "$old_family" != "$new_family" ]; then - error_text="$error_text -Upgrading directly from MySQL $old_family to MySQL $new_family may not -be safe in all cases. A manual dump and restore using mysqldump is -recommended. It is important to review the MySQL manual's Upgrading -section for version-specific incompatibilities. -" - fi - - if [ -n "$error_text" ]; then - cat <&2 - -****************************************************************** -A MySQL server package ($installed) is installed. -$error_text -A manual upgrade is required. - -- Ensure that you have a complete, working backup of your data and my.cnf - files -- Shut down the MySQL server cleanly -- Remove the existing MySQL packages. Usually this command will - list the packages you should remove: - rpm -qa | grep -i '^mysql-' - - You may choose to use 'rpm --nodeps -ev ' to remove - the package which contains the mysqlclient shared library. The - library will be reinstalled by the MySQL-shared-compat package. -- Install the new MySQL packages supplied by $myvendor -- Ensure that the MySQL server is started -- Run the 'mysql_upgrade' program - -This is a brief description of the upgrade process. Important details -can be found in the MySQL manual, in the Upgrading section. -****************************************************************** -HERE - exit 1 - fi -fi - -# We assume that if there is exactly one ".pid" file, -# it contains the valid PID of a running MySQL server. -NR_PID_FILES=`ls -1 $PID_FILE_PATT 2>/dev/null | wc -l` -case $NR_PID_FILES in - 0 ) SERVER_TO_START='' ;; # No "*.pid" file == no running server - 1 ) SERVER_TO_START='true' ;; - * ) SERVER_TO_START='' # Situation not clear - SEVERAL_PID_FILES=true ;; -esac -# That logic may be debated: We might check whether it is non-empty, -# contains exactly one number (possibly a PID), and whether "ps" finds it. -# OTOH, if there is no such process, it means a crash without a cleanup - -# is that a reason not to start a new server after upgrade? - -STATUS_FILE=$mysql_datadir/RPM_UPGRADE_MARKER - -if [ -f $STATUS_FILE ]; then - echo "Some previous upgrade was not finished:" - ls -ld $STATUS_FILE - echo "Please check its status, then do" - echo " rm $STATUS_FILE" - echo "before repeating the MySQL upgrade." - exit 1 -elif [ -n "$SEVERAL_PID_FILES" ] ; then - echo "You have more than one PID file:" - ls -ld $PID_FILE_PATT - echo "Please check which one (if any) corresponds to a running server" - echo "and delete all others before repeating the MySQL upgrade." - exit 1 -fi - -NEW_VERSION=%{mysql_version}-%{release} - -# The "pre" section code is also run on a first installation, -# when there is no data directory yet. Protect against error messages. -if [ -d $mysql_datadir ] ; then - echo "MySQL RPM upgrade to version $NEW_VERSION" > $STATUS_FILE - echo "'pre' step running at `date`" >> $STATUS_FILE - echo >> $STATUS_FILE - fcount=`ls -ltr $mysql_datadir/*.err 2>/dev/null | wc -l` - if [ $fcount -gt 0 ] ; then - echo "ERR file(s):" >> $STATUS_FILE - ls -ltr $mysql_datadir/*.err >> $STATUS_FILE - echo >> $STATUS_FILE - echo "Latest 'Version' line in latest file:" >> $STATUS_FILE - grep '^Version' `ls -tr $mysql_datadir/*.err | tail -1` | \ - tail -1 >> $STATUS_FILE - echo >> $STATUS_FILE - fi - - if [ -n "$SERVER_TO_START" ] ; then - # There is only one PID file, race possibility ignored - echo "PID file:" >> $STATUS_FILE - ls -l $PID_FILE_PATT >> $STATUS_FILE - cat $PID_FILE_PATT >> $STATUS_FILE - echo >> $STATUS_FILE - echo "Server process:" >> $STATUS_FILE - ps -fp `cat $PID_FILE_PATT` >> $STATUS_FILE - echo >> $STATUS_FILE - echo "SERVER_TO_START=$SERVER_TO_START" >> $STATUS_FILE - else - # Take a note we checked it ... - echo "PID file:" >> $STATUS_FILE - ls -l $PID_FILE_PATT >> $STATUS_FILE 2>&1 - fi -fi - -# Shut down a previously installed server first -# Note we *could* make that depend on $SERVER_TO_START, but we rather don't, -# so a "stop" is attempted even if there is no PID file. -# (Maybe the "stop" doesn't work then, but we might fix that in itself.) -if [ -x %{_sysconfdir}/init.d/mysql ] ; then - %{_sysconfdir}/init.d/mysql stop > /dev/null 2>&1 - echo "Giving mysqld 5 seconds to exit nicely" - sleep 5 -fi - -%post -n MySQL-server%{product_suffix} -# This is the code running at the end of a RPM install or upgrade action, -# after the (new) files have been written. - -# ATTENTION: Parts of this are duplicated in the "triggerpostun" ! - -# There are users who deviate from the default file system layout. -# Check local settings to support them. -if [ -x %{_bindir}/my_print_defaults ] -then - mysql_datadir=`%{_bindir}/my_print_defaults server mysqld | grep '^--datadir=' | sed -n 's/--datadir=//p'` -fi -if [ -z "$mysql_datadir" ] -then - mysql_datadir=%{mysqldatadir} -fi - -NEW_VERSION=%{mysql_version}-%{release} -STATUS_FILE=$mysql_datadir/RPM_UPGRADE_MARKER - -# ---------------------------------------------------------------------- -# Create data directory if needed, check whether upgrade or install -# ---------------------------------------------------------------------- -if [ ! -d $mysql_datadir ] ; then mkdir -m 755 $mysql_datadir; fi -if [ -f $STATUS_FILE ] ; then - SERVER_TO_START=`grep '^SERVER_TO_START=' $STATUS_FILE | cut -c17-` -else - SERVER_TO_START='' -fi -# echo "Analyzed: SERVER_TO_START=$SERVER_TO_START" -if [ ! -d $mysql_datadir/mysql ] ; then - mkdir $mysql_datadir/mysql $mysql_datadir/test - echo "MySQL RPM installation of version $NEW_VERSION" >> $STATUS_FILE -else - # If the directory exists, we may assume it is an upgrade. - echo "MySQL RPM upgrade to version $NEW_VERSION" >> $STATUS_FILE -fi - -# ---------------------------------------------------------------------- -# Make MySQL start/shutdown automatically when the machine does it. -# ---------------------------------------------------------------------- -# NOTE: This still needs to be debated. Should we check whether these links -# for the other run levels exist(ed) before the upgrade? -# use chkconfig on Enterprise Linux and newer SuSE releases -if [ -x /sbin/chkconfig ] ; then - /sbin/chkconfig --add mysql -# use insserv for older SuSE Linux versions -elif [ -x /sbin/insserv ] ; then - /sbin/insserv %{_sysconfdir}/init.d/mysql -fi - -# ---------------------------------------------------------------------- -# Create a MySQL user and group. Do not report any problems if it already -# exists. -# ---------------------------------------------------------------------- -groupadd -r %{mysqld_group} 2> /dev/null || true -useradd -M -r -d $mysql_datadir -s /bin/bash -c "MySQL server" \ - -g %{mysqld_group} %{mysqld_user} 2> /dev/null || true -# The user may already exist, make sure it has the proper group nevertheless -# (BUG#12823) -usermod -g %{mysqld_group} %{mysqld_user} 2> /dev/null || true - -# ---------------------------------------------------------------------- -# Change permissions so that the user that will run the MySQL daemon -# owns all database files. -# ---------------------------------------------------------------------- -chown -R %{mysqld_user}:%{mysqld_group} $mysql_datadir - -# ---------------------------------------------------------------------- -# Initiate databases if needed -# ---------------------------------------------------------------------- -if ! grep '^MySQL RPM upgrade' $STATUS_FILE >/dev/null 2>&1 ; then - # Fix bug#45415: no "mysql_install_db" on an upgrade - # Do this as a negative to err towards more "install" runs - # rather than to miss one. - %{_bindir}/mysql_install_db --rpm --user=%{mysqld_user} -fi - -# ---------------------------------------------------------------------- -# Upgrade databases if needed would go here - but it cannot be automated yet -# ---------------------------------------------------------------------- - -# ---------------------------------------------------------------------- -# Change permissions again to fix any new files. -# ---------------------------------------------------------------------- -chown -R %{mysqld_user}:%{mysqld_group} $mysql_datadir - -# ---------------------------------------------------------------------- -# Fix permissions for the permission database so that only the user -# can read them. -# ---------------------------------------------------------------------- -chmod -R og-rw $mysql_datadir/mysql - -# ---------------------------------------------------------------------- -# install SELinux files - but don't override existing ones -# ---------------------------------------------------------------------- -SETARGETDIR=/etc/selinux/targeted/src/policy -SEDOMPROG=$SETARGETDIR/domains/program -SECONPROG=$SETARGETDIR/file_contexts/program -if [ -f /etc/redhat-release ] \ - && (grep -q "Red Hat Enterprise Linux .. release 4" /etc/redhat-release \ - || grep -q "CentOS release 4" /etc/redhat-release) ; then - echo - echo - echo 'Notes regarding SELinux on this platform:' - echo '=========================================' - echo - echo 'The default policy might cause server startup to fail because it is' - echo 'not allowed to access critical files. In this case, please update' - echo 'your installation.' - echo - echo 'The default policy might also cause inavailability of SSL related' - echo 'features because the server is not allowed to access /dev/random' - echo 'and /dev/urandom. If this is a problem, please do the following:' - echo - echo ' 1) install selinux-policy-targeted-sources from your OS vendor' - echo ' 2) add the following two lines to '$SEDOMPROG/mysqld.te':' - echo ' allow mysqld_t random_device_t:chr_file read;' - echo ' allow mysqld_t urandom_device_t:chr_file read;' - echo ' 3) cd to '$SETARGETDIR' and issue the following command:' - echo ' make load' - echo - echo -fi - -if [ -x sbin/restorecon ] ; then - sbin/restorecon -R var/lib/mysql -fi - -# Was the server running before the upgrade? If so, restart the new one. -if [ "$SERVER_TO_START" = "true" ] ; then - # Restart in the same way that mysqld will be started normally. - if [ -x %{_sysconfdir}/init.d/mysql ] ; then - %{_sysconfdir}/init.d/mysql start - echo "Giving mysqld 5 seconds to start" - sleep 5 - fi -fi - -# Collect an upgrade history ... -echo "Upgrade/install finished at `date`" >> $STATUS_FILE -echo >> $STATUS_FILE -echo "=====" >> $STATUS_FILE -STATUS_HISTORY=$mysql_datadir/RPM_UPGRADE_HISTORY -cat $STATUS_FILE >> $STATUS_HISTORY -mv -f $STATUS_FILE ${STATUS_FILE}-LAST # for "triggerpostun" - - -#echo "Thank you for installing the MySQL Community Server! For Production -#systems, we recommend MySQL Enterprise, which contains enterprise-ready -#software, intelligent advisory services, and full production support with -#scheduled service packs and more. Visit www.mysql.com/enterprise for more -#information." - -%preun -n MySQL-server%{product_suffix} - -# Which '$1' does this refer to? Fedora docs have info: -# " ... a count of the number of versions of the package that are installed. -# Action Count -# Install the first time 1 -# Upgrade 2 or higher (depending on the number of versions installed) -# Remove last version of package 0 " -# -# http://docs.fedoraproject.org/en-US/Fedora_Draft_Documentation/0.1/html/RPM_Guide/ch09s04s05.html - -if [ $1 = 0 ] ; then - # Stop MySQL before uninstalling it - if [ -x %{_sysconfdir}/init.d/mysql ] ; then - %{_sysconfdir}/init.d/mysql stop > /dev/null - # Remove autostart of MySQL - # use chkconfig on Enterprise Linux and newer SuSE releases - if [ -x /sbin/chkconfig ] ; then - /sbin/chkconfig --del mysql - # For older SuSE Linux versions - elif [ -x /sbin/insserv ] ; then - /sbin/insserv -r %{_sysconfdir}/init.d/mysql - fi - fi -fi - -# We do not remove the mysql user since it may still own a lot of -# database files. - -%triggerpostun -n MySQL-server%{product_suffix} --MySQL-server-community - -# Setup: We renamed this package, so any existing "server-community" -# package will be removed when this "server" is installed. -# Problem: RPM will first run the "pre" and "post" sections of this script, -# and only then the "preun" of that old community server. -# But this "preun" includes stopping the server and uninstalling the service, -# "chkconfig --del mysql" which removes the symlinks to the start script. -# Solution: *After* the community server got removed, restart this server -# and re-install the service. -# -# For information about triggers in spec files, see the Fedora docs: -# http://docs.fedoraproject.org/en-US/Fedora_Draft_Documentation/0.1/html/RPM_Guide/ch10s02.html -# For all details of this code, see the "pre" and "post" sections. - -# There are users who deviate from the default file system layout. -# Check local settings to support them. -if [ -x %{_bindir}/my_print_defaults ] -then - mysql_datadir=`%{_bindir}/my_print_defaults server mysqld | grep '^--datadir=' | sed -n 's/--datadir=//p'` -fi -if [ -z "$mysql_datadir" ] -then - mysql_datadir=%{mysqldatadir} -fi - -NEW_VERSION=%{mysql_version}-%{release} -STATUS_FILE=$mysql_datadir/RPM_UPGRADE_MARKER-LAST # Note the difference! -STATUS_HISTORY=$mysql_datadir/RPM_UPGRADE_HISTORY - -if [ -f $STATUS_FILE ] ; then - SERVER_TO_START=`grep '^SERVER_TO_START=' $STATUS_FILE | cut -c17-` -else - # This should never happen, but let's be prepared - SERVER_TO_START='' -fi -echo "Analyzed: SERVER_TO_START=$SERVER_TO_START" - -if [ -x /sbin/chkconfig ] ; then - /sbin/chkconfig --add mysql -# use insserv for older SuSE Linux versions -elif [ -x /sbin/insserv ] ; then - /sbin/insserv %{_sysconfdir}/init.d/mysql -fi - -# Was the server running before the upgrade? If so, restart the new one. -if [ "$SERVER_TO_START" = "true" ] ; then - # Restart in the same way that mysqld will be started normally. - if [ -x %{_sysconfdir}/init.d/mysql ] ; then - %{_sysconfdir}/init.d/mysql start - echo "Giving mysqld 5 seconds to start" - sleep 5 - fi -fi - -echo "Trigger 'postun --community' finished at `date`" >> $STATUS_HISTORY -echo >> $STATUS_HISTORY -echo "=====" >> $STATUS_HISTORY - - -# ---------------------------------------------------------------------- -# Clean up the BuildRoot after build is done -# ---------------------------------------------------------------------- -%clean -[ "$RPM_BUILD_ROOT" != "/" ] && [ -d $RPM_BUILD_ROOT ] \ - && rm -rf $RPM_BUILD_ROOT; - -############################################################################## -# Files section -############################################################################## - -%files -n MySQL-server%{product_suffix} -f release/support-files/plugins.files -%defattr(-,root,root,0755) - -%if %{defined license_files_server} -%doc %{license_files_server} -%endif -%doc %{src_dir}/Docs/ChangeLog -%doc %{src_dir}/Docs/INFO_SRC* -%doc release/Docs/INFO_BIN* -%doc release/support-files/my-*.cnf - -%if 0%{?commercial} -%doc %attr(644, root, root) %{_infodir}/mysql.info* -%endif - -%doc %attr(644, root, man) %{_mandir}/man1/innochecksum.1* -%doc %attr(644, root, man) %{_mandir}/man1/my_print_defaults.1* -%doc %attr(644, root, man) %{_mandir}/man1/myisam_ftdump.1* -%doc %attr(644, root, man) %{_mandir}/man1/myisamchk.1* -%doc %attr(644, root, man) %{_mandir}/man1/myisamlog.1* -%doc %attr(644, root, man) %{_mandir}/man1/myisampack.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_convert_table_format.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_fix_extensions.1* -%doc %attr(644, root, man) %{_mandir}/man8/mysqld.8* -%doc %attr(644, root, man) %{_mandir}/man1/mysqld_multi.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqld_safe.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqldumpslow.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_install_db.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_plugin.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_secure_installation.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_setpermission.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_upgrade.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqlhotcopy.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqlman.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql.server.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqltest.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_tzinfo_to_sql.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_zap.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqlbug.1* -%doc %attr(644, root, man) %{_mandir}/man1/perror.1* -%doc %attr(644, root, man) %{_mandir}/man1/replace.1* -%doc %attr(644, root, man) %{_mandir}/man1/resolve_stack_dump.1* -%doc %attr(644, root, man) %{_mandir}/man1/resolveip.1* - -%ghost %config(noreplace,missingok) %{_sysconfdir}/my.cnf - -%attr(755, root, root) %{_bindir}/innochecksum -%attr(755, root, root) %{_bindir}/my_print_defaults -%attr(755, root, root) %{_bindir}/myisam_ftdump -%attr(755, root, root) %{_bindir}/myisamchk -%attr(755, root, root) %{_bindir}/myisamlog -%attr(755, root, root) %{_bindir}/myisampack -%attr(755, root, root) %{_bindir}/mysql_convert_table_format -%attr(755, root, root) %{_bindir}/mysql_fix_extensions -%attr(755, root, root) %{_bindir}/mysql_install_db -%attr(755, root, root) %{_bindir}/mysql_plugin -%attr(755, root, root) %{_bindir}/mysql_secure_installation -%attr(755, root, root) %{_bindir}/mysql_setpermission -%attr(755, root, root) %{_bindir}/mysql_tzinfo_to_sql -%attr(755, root, root) %{_bindir}/mysql_upgrade -%attr(755, root, root) %{_bindir}/mysql_zap -%attr(755, root, root) %{_bindir}/mysqlbug -%attr(755, root, root) %{_bindir}/mysqld_multi -%attr(755, root, root) %{_bindir}/mysqld_safe -%attr(755, root, root) %{_bindir}/mysqldumpslow -%attr(755, root, root) %{_bindir}/mysqlhotcopy -%attr(755, root, root) %{_bindir}/mysqltest -%attr(755, root, root) %{_bindir}/perror -%attr(755, root, root) %{_bindir}/replace -%attr(755, root, root) %{_bindir}/resolve_stack_dump -%attr(755, root, root) %{_bindir}/resolveip - -%attr(755, root, root) %{_sbindir}/mysqld -%attr(755, root, root) %{_sbindir}/mysqld-debug -%attr(755, root, root) %{_sbindir}/rcmysql -%attr(755, root, root) %{_libdir}/mysql/plugin/daemon_example.ini - -%if %{WITH_TCMALLOC} -%attr(755, root, root) %{_libdir}/mysql/%{malloc_lib_target} -%endif - -%attr(644, root, root) %config(noreplace,missingok) %{_sysconfdir}/logrotate.d/mysql -%attr(755, root, root) %{_sysconfdir}/init.d/mysql - -%attr(755, root, root) %{_datadir}/mysql/ - -# ---------------------------------------------------------------------------- -%files -n MySQL-client%{product_suffix} - -%defattr(-, root, root, 0755) -%attr(755, root, root) %{_bindir}/msql2mysql -%attr(755, root, root) %{_bindir}/mysql -%attr(755, root, root) %{_bindir}/mysql_find_rows -%attr(755, root, root) %{_bindir}/mysql_waitpid -%attr(755, root, root) %{_bindir}/mysqlaccess -# XXX: This should be moved to %{_sysconfdir} -%attr(644, root, root) %{_bindir}/mysqlaccess.conf -%attr(755, root, root) %{_bindir}/mysqladmin -%attr(755, root, root) %{_bindir}/mysqlbinlog -%attr(755, root, root) %{_bindir}/mysqlcheck -%attr(755, root, root) %{_bindir}/mysqldump -%attr(755, root, root) %{_bindir}/mysqlimport -%attr(755, root, root) %{_bindir}/mysqlshow -%attr(755, root, root) %{_bindir}/mysqlslap - -%doc %attr(644, root, man) %{_mandir}/man1/msql2mysql.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_find_rows.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_waitpid.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqlaccess.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqladmin.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqlbinlog.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqlcheck.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqldump.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqlimport.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqlshow.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqlslap.1* - -# ---------------------------------------------------------------------------- -%files -n MySQL-devel%{product_suffix} -f optional-files-devel -%defattr(-, root, root, 0755) -%doc %attr(644, root, man) %{_mandir}/man1/comp_err.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_config.1* -%attr(755, root, root) %{_bindir}/mysql_config -%dir %attr(755, root, root) %{_includedir}/mysql -%dir %attr(755, root, root) %{_libdir}/mysql -%{_includedir}/mysql/* -%{_datadir}/aclocal/mysql.m4 -%{_libdir}/mysql/libmysqlclient.a -%{_libdir}/mysql/libmysqlclient_r.a -%{_libdir}/mysql/libmysqlservices.a - -# ---------------------------------------------------------------------------- -%files -n MySQL-shared%{product_suffix} -%defattr(-, root, root, 0755) -# Shared libraries (omit for architectures that don't support them) -%{_libdir}/libmysql*.so* - -%post -n MySQL-shared%{product_suffix} -/sbin/ldconfig - -%postun -n MySQL-shared%{product_suffix} -/sbin/ldconfig - -# ---------------------------------------------------------------------------- -%files -n MySQL-test%{product_suffix} -%defattr(-, root, root, 0755) -%attr(-, root, root) %{_datadir}/mysql-test -%attr(755, root, root) %{_bindir}/mysql_client_test -%attr(755, root, root) %{_bindir}/mysql_client_test_embedded -%attr(755, root, root) %{_bindir}/mysqltest_embedded -%doc %attr(644, root, man) %{_mandir}/man1/mysql_client_test.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql-stress-test.pl.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql-test-run.pl.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysql_client_test_embedded.1* -%doc %attr(644, root, man) %{_mandir}/man1/mysqltest_embedded.1* - -# ---------------------------------------------------------------------------- -%files -n MySQL-embedded%{product_suffix} -%defattr(-, root, root, 0755) -%attr(755, root, root) %{_bindir}/mysql_embedded -%attr(644, root, root) %{_libdir}/mysql/libmysqld.a -%attr(644, root, root) %{_libdir}/mysql/libmysqld-debug.a - -############################################################################## -# The spec file changelog only includes changes made to the spec file -# itself - note that they must be ordered by date (important when -# merging BK trees) -############################################################################## -%changelog -* Wed Jul 02 2014 Bjorn Munch -- Disable dtrace unconditionally, breaks after we install Oracle dtrace - -* Wed Oct 30 2013 Balasubramanian Kandasamy -- Removed non gpl file docs/mysql.info from community packages - -* Mon Sep 09 2013 Balasubramanian Kandasamy -- Updated logic to get the correct count of PID files - -* Fri Aug 16 2013 Balasubramanian Kandasamy -- Added provides lowercase mysql tags - -* Wed Jun 26 2013 Balasubramanian Kandasamy -- Cleaned up spec file to resolve rpm dependencies. - -* Tue Jul 24 2012 Joerg Bruehe - -- Add a macro "runselftest": - if set to 1 (default), the test suite will be run during the RPM build; - this can be oveeridden via the command line by adding - --define "runselftest 0" - Failures of the test suite will NOT make the RPM build fail! - -* Mon Jun 11 2012 Joerg Bruehe - -- Make sure newly added "SPECIFIC-ULN/" directory does not disturb packaging. - -* Wed Sep 28 2011 Joerg Bruehe - -- Fix duplicate mentioning of "mysql_plugin" and its manual page, - it is better to keep alphabetic order in the files list (merging!). - -* Wed Sep 14 2011 Joerg Bruehe - -- Let the RPM capabilities ("obsoletes" etc) ensure that an upgrade may replace - the RPMs of any configuration (of the current or the preceding release series) - by the new ones. This is done by not using the implicitly generated capabilities - (which include the configuration name) and relying on more generic ones which - just list the function ("server", "client", ...). - The implicit generation cannot be prevented, so all these capabilities must be - explicitly listed in "Obsoletes:" - -* Tue Sep 13 2011 Jonathan Perkin - -- Add support for Oracle Linux 6 and Red Hat Enterprise Linux 6. Due to - changes in RPM behaviour ($RPM_BUILD_ROOT is removed prior to install) - this necessitated a move of the libmygcc.a installation to the install - phase, which is probably where it belonged in the first place. - -* Tue Sep 13 2011 Joerg Bruehe - -- "make_win_bin_dist" and its manual are dropped, cmake does it different. - -* Thu Sep 08 2011 Daniel Fischer - -- Add mysql_plugin man page. - -* Tue Aug 30 2011 Joerg Bruehe - -- Add the manual page for "mysql_plugin" to the server package. - -* Fri Aug 19 2011 Joerg Bruehe - -- Null-upmerge the fix of bug#37165: This spec file is not affected. -- Replace "/var/lib/mysql" by the spec file variable "%{mysqldatadir}". - -* Fri Aug 12 2011 Daniel Fischer - -- Source plugin library files list from cmake-generated file. - -* Mon Jul 25 2011 Chuck Bell - -- Added the mysql_plugin client - enables or disables plugins. - -* Thu Jul 21 2011 Sunanda Menon - -- Fix bug#12561297: Added the MySQL embedded binary - -* Thu Jul 07 2011 Joerg Bruehe - -- Fix bug#45415: "rpm upgrade recreates test database" - Let the creation of the "test" database happen only during a new installation, - not in an RPM upgrade. - This affects both the "mkdir" and the call of "mysql_install_db". - -* Thu Feb 09 2011 Joerg Bruehe - -- Fix bug#56581: If an installation deviates from the default file locations - ("datadir" and "pid-file"), the mechanism to detect a running server (on upgrade) - should still work, and use these locations. - The problem was that the fix for bug#27072 did not check for local settings. - -* Mon Jan 31 2011 Joerg Bruehe - -- Install the new "manifest" files: "INFO_SRC" and "INFO_BIN". - -* Tue Nov 23 2010 Jonathan Perkin - -- EXCEPTIONS-CLIENT has been deleted, remove it from here too -- Support MYSQL_BUILD_MAKE_JFLAG environment variable for passing - a '-j' argument to make. - -* Mon Nov 1 2010 Georgi Kodinov - -- Added test authentication (WL#1054) plugin binaries - -* Wed Oct 6 2010 Georgi Kodinov - -- Added example external authentication (WL#1054) plugin binaries - -* Wed Aug 11 2010 Joerg Bruehe - -- With a recent spec file cleanup, names have changed: A "-community" part was dropped. - Reflect that in the "Obsoletes" specifications. -- Add a "triggerpostun" to handle the uninstall of the "-community" server RPM. -- This fixes bug#55015 "MySQL server is not restarted properly after RPM upgrade". - -* Tue Jun 15 2010 Joerg Bruehe - -- Change the behaviour on installation and upgrade: - On installation, do not autostart the server. - *Iff* the server was stopped before the upgrade is started, this is taken as a - sign the administrator is handling that manually, and so the new server will - not be started automatically at the end of the upgrade. - The start/stop scripts will still be installed, so the server will be started - on the next machine boot. - This is the 5.5 version of fixing bug#27072 (RPM autostarting the server). - -* Tue Jun 1 2010 Jonathan Perkin - -- Implement SELinux checks from distribution-specific spec file. - -* Wed May 12 2010 Jonathan Perkin - -- Large number of changes to build using CMake -- Introduce distribution-specific RPMs -- Drop debuginfo, build all binaries with debug/symbols -- Remove __os_install_post, use native macro -- Remove _unpackaged_files_terminate_build, make it an error to have - unpackaged files -- Remove cluster RPMs - -* Wed Mar 24 2010 Joerg Bruehe - -- Add "--with-perfschema" to the configure options. - -* Mon Mar 22 2010 Joerg Bruehe - -- User "usr/lib*" to allow for both "usr/lib" and "usr/lib64", - mask "rmdir" return code 1. -- Remove "ha_example.*" files from the list, they aren't built. - -* Wed Mar 17 2010 Joerg Bruehe - -- Fix a wrong path name in handling the debug plugins. - -* Wed Mar 10 2010 Joerg Bruehe - -- Take the result of the debug plugin build and put it into the optimized tree, - so that it becomes part of the final installation; - include the files in the packlist. Part of the fixes for bug#49022. - -* Mon Mar 01 2010 Joerg Bruehe - -- Set "Oracle and/or its affiliates" as the vendor and copyright owner, - accept upgrading from packages showing MySQL or Sun as vendor. - -* Fri Feb 12 2010 Joerg Bruehe - -- Formatting changes: - Have a consistent structure of separator lines and of indentation - (8 leading blanks => tab). -- Introduce the variable "src_dir". -- Give the environment variables "MYSQL_BUILD_CC(CXX)" precedence - over "CC" ("CXX"). -- Drop the old "with_static" argument analysis, this is not supported - in 5.1 since ages. -- Introduce variables to control the handlers individually, as well - as other options. -- Use the new "--with-plugin" notation for the table handlers. -- Drop handling "/etc/rc.d/init.d/mysql", the switch to "/etc/init.d/mysql" - was done back in 2002 already. -- Make "--with-zlib-dir=bundled" the default, add an option to disable it. -- Add missing manual pages to the file list. -- Improve the runtime check for "libgcc.a", protect it against being tried - with the Intel compiler "icc". - -* Mon Jan 11 2010 Joerg Bruehe - -- Change RPM file naming: - - Suffix like "-m2", "-rc" becomes part of version as "_m2", "_rc". - - Release counts from 1, not 0. - -* Wed Dec 23 2009 Joerg Bruehe - -- The "semisync" plugin file name has lost its introductory "lib", - adapt the file lists for the subpackages. - This is a part missing from the fix for bug#48351. -- Remove the "fix_privilege_tables" manual, it does not exist in 5.5 - (and likely, the whole script will go, too). - -* Mon Nov 16 2009 Joerg Bruehe - -- Fix some problems with the directives around "tcmalloc" (experimental), - remove erroneous traces of the InnoDB plugin (that is 5.1 only). - -* Fri Oct 06 2009 Magnus Blaudd - -- Removed mysql_fix_privilege_tables - -* Fri Oct 02 2009 Alexander Nozdrin - -- "mysqlmanager" got removed from version 5.4, all references deleted. - -* Fri Aug 28 2009 Joerg Bruehe - -- Merge up from 5.1 to 5.4: Remove handling for the InnoDB plugin. - -* Thu Aug 27 2009 Joerg Bruehe - -- This version does not contain the "Instance manager", "mysqlmanager": - Remove it from the spec file so that packaging succeeds. - -* Mon Aug 24 2009 Jonathan Perkin - -- Add conditionals for bundled zlib and innodb plugin - -* Fri Aug 21 2009 Jonathan Perkin - -- Install plugin libraries in appropriate packages. -- Disable libdaemon_example and ftexample plugins. - -* Thu Aug 20 2009 Jonathan Perkin - -- Update variable used for mysql-test suite location to match source. - -* Fri Nov 07 2008 Joerg Bruehe - -- Correct yesterday's fix, so that it also works for the last flag, - and fix a wrong quoting: un-quoted quote marks must not be escaped. - -* Thu Nov 06 2008 Kent Boortz - -- Removed "mysql_upgrade_shell" -- Removed some copy/paste between debug and normal build - -* Thu Nov 06 2008 Joerg Bruehe - -- Modify CFLAGS and CXXFLAGS such that a debug build is not optimized. - This should cover both gcc and icc flags. Fixes bug#40546. - -* Fri Aug 29 2008 Kent Boortz - -- Removed the "Federated" storage engine option, and enabled in all - -* Tue Aug 26 2008 Joerg Bruehe - -- Get rid of the "warning: Installed (but unpackaged) file(s) found:" - Some generated files aren't needed in RPMs: - - the "sql-bench/" subdirectory - Some files were missing: - - /usr/share/aclocal/mysql.m4 ("devel" subpackage) - - Manual "mysqlbug" ("server" subpackage) - - Program "innochecksum" and its manual ("server" subpackage) - - Manual "mysql_find_rows" ("client" subpackage) - - Script "mysql_upgrade_shell" ("client" subpackage) - - Program "ndb_cpcd" and its manual ("ndb-extra" subpackage) - - Manuals "ndb_mgm" + "ndb_restore" ("ndb-tools" subpackage) - -* Mon Mar 31 2008 Kent Boortz - -- Made the "Federated" storage engine an option -- Made the "Cluster" storage engine and sub packages an option - -* Wed Mar 19 2008 Joerg Bruehe - -- Add the man pages for "ndbd" and "ndb_mgmd". - -* Mon Feb 18 2008 Timothy Smith - -- Require a manual upgrade if the alread-installed mysql-server is - from another vendor, or is of a different major version. - -* Wed May 02 2007 Joerg Bruehe - -- "ndb_size.tmpl" is not needed any more, - "man1/mysql_install_db.1" lacked the trailing '*'. - -* Sat Apr 07 2007 Kent Boortz - -- Removed man page for "mysql_create_system_tables" - -* Wed Mar 21 2007 Daniel Fischer - -- Add debug server. - -* Mon Mar 19 2007 Daniel Fischer - -- Remove Max RPMs; the server RPMs contain a mysqld compiled with all - features that previously only were built into Max. - -* Fri Mar 02 2007 Joerg Bruehe - -- Add several man pages for NDB which are now created. - -* Fri Jan 05 2007 Kent Boortz - -- Put back "libmygcc.a", found no real reason it was removed. - -- Add CFLAGS to gcc call with --print-libgcc-file, to make sure the - correct "libgcc.a" path is returned for the 32/64 bit architecture. - -* Mon Dec 18 2006 Joerg Bruehe - -- Fix the move of "mysqlmanager" to section 8: Directory name was wrong. - -* Thu Dec 14 2006 Joerg Bruehe - -- Include the new man pages for "my_print_defaults" and "mysql_tzinfo_to_sql" - in the server RPM. -- The "mysqlmanager" man page got moved from section 1 to 8. - -* Thu Nov 30 2006 Joerg Bruehe - -- Call "make install" using "benchdir_root=%{_datadir}", - because that is affecting the regression test suite as well. - -* Thu Nov 16 2006 Joerg Bruehe - -- Explicitly note that the "MySQL-shared" RPMs (as built by MySQL AB) - replace "mysql-shared" (as distributed by SuSE) to allow easy upgrading - (bug#22081). - -* Mon Nov 13 2006 Joerg Bruehe - -- Add "--with-partition" to all server builds. - -- Use "--report-features" in one test run per server build. - -* Tue Aug 15 2006 Joerg Bruehe - -- The "max" server is removed from packages, effective from 5.1.12-beta. - Delete all steps to build, package, or install it. - -* Mon Jul 10 2006 Joerg Bruehe - -- Fix a typing error in the "make" target for the Perl script to run the tests. - -* Tue Jul 04 2006 Joerg Bruehe - -- Use the Perl script to run the tests, because it will automatically check - whether the server is configured with SSL. - -* Tue Jun 27 2006 Joerg Bruehe - -- move "mysqldumpslow" from the client RPM to the server RPM (bug#20216) - -- Revert all previous attempts to call "mysql_upgrade" during RPM upgrade, - there are some more aspects which need to be solved before this is possible. - For now, just ensure the binary "mysql_upgrade" is delivered and installed. - -* Thu Jun 22 2006 Joerg Bruehe - -- Close a gap of the previous version by explicitly using - a newly created temporary directory for the socket to be used - in the "mysql_upgrade" operation, overriding any local setting. - -* Tue Jun 20 2006 Joerg Bruehe - -- To run "mysql_upgrade", we need a running server; - start it in isolation and skip password checks. - -* Sat May 20 2006 Kent Boortz - -- Always compile for PIC, position independent code. - -* Wed May 10 2006 Kent Boortz - -- Use character set "all" when compiling with Cluster, to make Cluster - nodes independent on the character set directory, and the problem - that two RPM sub packages both wants to install this directory. - -* Mon May 01 2006 Kent Boortz - -- Use "./libtool --mode=execute" instead of searching for the - executable in current directory and ".libs". - -* Fri Apr 28 2006 Kent Boortz - -- Install and run "mysql_upgrade" - -* Wed Apr 12 2006 Jim Winstead - -- Remove sql-bench, and MySQL-bench RPM (will be built as an independent - project from the mysql-bench repository) - -* Tue Apr 11 2006 Jim Winstead - -- Remove old mysqltestmanager and related programs -* Sat Apr 01 2006 Kent Boortz - -- Set $LDFLAGS from $MYSQL_BUILD_LDFLAGS - -* Wed Mar 07 2006 Kent Boortz - -- Changed product name from "Community Edition" to "Community Server" - -* Mon Mar 06 2006 Kent Boortz - -- Fast mutexes is now disabled by default, but should be - used in Linux builds. - -* Mon Feb 20 2006 Kent Boortz - -- Reintroduced a max build -- Limited testing of 'debug' and 'max' servers -- Berkeley DB only in 'max' - -* Mon Feb 13 2006 Joerg Bruehe - -- Use "-i" on "make test-force"; - this is essential for later evaluation of this log file. - -* Thu Feb 09 2006 Kent Boortz - -- Pass '-static' to libtool, link static with our own libraries, dynamic - with system libraries. Link with the bundled zlib. - -* Wed Feb 08 2006 Kristian Nielsen - -- Modified RPM spec to match new 5.1 debug+max combined community packaging. - -* Sun Dec 18 2005 Kent Boortz - -- Added "client/mysqlslap" - -* Mon Dec 12 2005 Rodrigo Novo - -- Added zlib to the list of (static) libraries installed -- Added check against libtool wierdness (WRT: sql/mysqld || sql/.libs/mysqld) -- Compile MySQL with bundled zlib -- Fixed %packager name to "MySQL Production Engineering Team" - -* Mon Dec 05 2005 Joerg Bruehe - -- Avoid using the "bundled" zlib on "shared" builds: - As it is not installed (on the build system), this gives dependency - problems with "libtool" causing the build to fail. - (Change was done on Nov 11, but left uncommented.) - -* Tue Nov 22 2005 Joerg Bruehe - -- Extend the file existence check for "init.d/mysql" on un-install - to also guard the call to "insserv"/"chkconfig". - -* Thu Oct 27 2005 Lenz Grimmer - -- added more man pages - -* Wed Oct 19 2005 Kent Boortz - -- Made yaSSL support an option (off by default) - -* Wed Oct 19 2005 Kent Boortz - -- Enabled yaSSL support - -* Sat Oct 15 2005 Kent Boortz - -- Give mode arguments the same way in all places -- Moved copy of mysqld.a to "standard" build, but - disabled it as we don't do embedded yet in 5.0 - -* Fri Oct 14 2005 Kent Boortz - -- For 5.x, always compile with --with-big-tables -- Copy the config.log file to location outside - the build tree - -* Fri Oct 14 2005 Kent Boortz - -- Removed unneeded/obsolete configure options -- Added archive engine to standard server -- Removed the embedded server from experimental server -- Changed suffix "-Max" => "-max" -- Changed comment string "Max" => "Experimental" - -* Thu Oct 13 2005 Lenz Grimmer - -- added a usermod call to assign a potential existing mysql user to the - correct user group (BUG#12823) -- Save the perror binary built during Max build so it supports the NDB - error codes (BUG#13740) -- added a separate macro "mysqld_group" to be able to define the - user group of the mysql user seperately, if desired. - -* Thu Sep 29 2005 Lenz Grimmer - -- fixed the removing of the RPM_BUILD_ROOT in the %clean section (the - $RBR variable did not get expanded, thus leaving old build roots behind) - -* Thu Aug 04 2005 Lenz Grimmer - -- Fixed the creation of the mysql user group account in the postinstall - section (BUG 12348) -- Fixed enabling the Archive storage engine in the Max binary - -* Tue Aug 02 2005 Lenz Grimmer - -- Fixed the Requires: tag for the server RPM (BUG 12233) - -* Fri Jul 15 2005 Lenz Grimmer - -- create a "mysql" user group and assign the mysql user account to that group - in the server postinstall section. (BUG 10984) - -* Tue Jun 14 2005 Lenz Grimmer - -- Do not build statically on i386 by default, only when adding either "--with - static" or "--define '_with_static 1'" to the RPM build options. Static - linking really only makes sense when linking against the specially patched - glibc 2.2.5. - -* Mon Jun 06 2005 Lenz Grimmer - -- added mysql_client_test to the "bench" subpackage (BUG 10676) -- added the libndbclient static and shared libraries (BUG 10676) - -* Wed Jun 01 2005 Lenz Grimmer - -- use "mysqldatadir" variable instead of hard-coding the path multiple times -- use the "mysqld_user" variable on all occasions a user name is referenced -- removed (incomplete) Brazilian translations -- removed redundant release tags from the subpackage descriptions - -* Wed May 25 2005 Joerg Bruehe - -- Added a "make clean" between separate calls to "BuildMySQL". - -* Thu May 12 2005 Guilhem Bichot - -- Removed the mysql_tableinfo script made obsolete by the information schema - -* Wed Apr 20 2005 Lenz Grimmer - -- Enabled the "blackhole" storage engine for the Max RPM - -* Wed Apr 13 2005 Lenz Grimmer - -- removed the MySQL manual files (html/ps/texi) - they have been removed - from the MySQL sources and are now available seperately. - -* Mon Apr 4 2005 Petr Chardin - -- old mysqlmanager, mysqlmanagerc and mysqlmanager-pwger renamed into - mysqltestmanager, mysqltestmanager and mysqltestmanager-pwgen respectively - -* Fri Mar 18 2005 Lenz Grimmer - -- Disabled RAID in the Max binaries once and for all (it has finally been - removed from the source tree) - -* Sun Feb 20 2005 Petr Chardin - -- Install MySQL Instance Manager together with mysqld, touch mysqlmanager - password file - -* Mon Feb 14 2005 Lenz Grimmer - -- Fixed the compilation comments and moved them into the separate build sections - for Max and Standard - -* Mon Feb 7 2005 Tomas Ulin - -- enabled the "Ndbcluster" storage engine for the max binary -- added extra make install in ndb subdir after Max build to get ndb binaries -- added packages for ndbcluster storage engine - -* Fri Jan 14 2005 Lenz Grimmer - -- replaced obsoleted "BuildPrereq" with "BuildRequires" instead - -* Thu Jan 13 2005 Lenz Grimmer - -- enabled the "Federated" storage engine for the max binary - -* Tue Jan 04 2005 Petr Chardin - -- ISAM and merge storage engines were purged. As well as appropriate - tools and manpages (isamchk and isamlog) - -* Thu Dec 31 2004 Lenz Grimmer - -- enabled the "Archive" storage engine for the max binary -- enabled the "CSV" storage engine for the max binary -- enabled the "Example" storage engine for the max binary - -* Thu Aug 26 2004 Lenz Grimmer - -- MySQL-Max now requires MySQL-server instead of MySQL (BUG 3860) - -* Fri Aug 20 2004 Lenz Grimmer - -- do not link statically on IA64/AMD64 as these systems do not have - a patched glibc installed - -* Tue Aug 10 2004 Lenz Grimmer - -- Added libmygcc.a to the devel subpackage (required to link applications - against the the embedded server libmysqld.a) (BUG 4921) - -* Mon Aug 09 2004 Lenz Grimmer - -- Added EXCEPTIONS-CLIENT to the "devel" package - -* Thu Jul 29 2004 Lenz Grimmer - -- disabled OpenSSL in the Max binaries again (the RPM packages were the - only exception to this anyway) (BUG 1043) - -* Wed Jun 30 2004 Lenz Grimmer - -- fixed server postinstall (mysql_install_db was called with the wrong - parameter) - -* Thu Jun 24 2004 Lenz Grimmer - -- added mysql_tzinfo_to_sql to the server subpackage -- run "make clean" instead of "make distclean" - -* Mon Apr 05 2004 Lenz Grimmer - -- added ncurses-devel to the build prerequisites (BUG 3377) - -* Thu Feb 12 2004 Lenz Grimmer - -- when using gcc, _always_ use CXX=gcc -- replaced Copyright with License field (Copyright is obsolete) - -* Tue Feb 03 2004 Lenz Grimmer - -- added myisam_ftdump to the Server package - -* Tue Jan 13 2004 Lenz Grimmer - -- link the mysql client against libreadline instead of libedit (BUG 2289) - -* Mon Dec 22 2003 Lenz Grimmer - -- marked /etc/logrotate.d/mysql as a config file (BUG 2156) - -* Fri Dec 13 2003 Lenz Grimmer - -- fixed file permissions (BUG 1672) - -* Thu Dec 11 2003 Lenz Grimmer - -- made testing for gcc3 a bit more robust - -* Fri Dec 05 2003 Lenz Grimmer - -- added missing file mysql_create_system_tables to the server subpackage - -* Fri Nov 21 2003 Lenz Grimmer - -- removed dependency on MySQL-client from the MySQL-devel subpackage - as it is not really required. (BUG 1610) - -* Fri Aug 29 2003 Lenz Grimmer - -- Fixed BUG 1162 (removed macro names from the changelog) -- Really fixed BUG 998 (disable the checking for installed but - unpackaged files) - -* Tue Aug 05 2003 Lenz Grimmer - -- Fixed BUG 959 (libmysqld not being compiled properly) -- Fixed BUG 998 (RPM build errors): added missing files to the - distribution (mysql_fix_extensions, mysql_tableinfo, mysqldumpslow, - mysql_fix_privilege_tables.1), removed "-n" from install section. - -* Wed Jul 09 2003 Lenz Grimmer - -- removed the GIF Icon (file was not included in the sources anyway) -- removed unused variable shared_lib_version -- do not run automake before building the standard binary - (should not be necessary) -- add server suffix '-standard' to standard binary (to be in line - with the binary tarball distributions) -- Use more RPM macros (_exec_prefix, _sbindir, _libdir, _sysconfdir, - _datadir, _includedir) throughout the spec file. -- allow overriding CC and CXX (required when building with other compilers) - -* Fri May 16 2003 Lenz Grimmer - -- re-enabled RAID again - -* Wed Apr 30 2003 Lenz Grimmer - -- disabled MyISAM RAID (--with-raid) - it throws an assertion which - needs to be investigated first. - -* Mon Mar 10 2003 Lenz Grimmer - -- added missing file mysql_secure_installation to server subpackage - (BUG 141) - -* Tue Feb 11 2003 Lenz Grimmer - -- re-added missing pre- and post(un)install scripts to server subpackage -- added config file /etc/my.cnf to the file list (just for completeness) -- make sure to create the datadir with 755 permissions - -* Mon Jan 27 2003 Lenz Grimmer - -- removed unused CC and CXX variables -- CFLAGS and CXXFLAGS should honor RPM_OPT_FLAGS - -* Fri Jan 24 2003 Lenz Grimmer - -- renamed package "MySQL" to "MySQL-server" -- fixed Copyright tag -- added mysql_waitpid to client subpackage (required for mysql-test-run) - -* Wed Nov 27 2002 Lenz Grimmer - -- moved init script from /etc/rc.d/init.d to /etc/init.d (the majority of - Linux distributions now support this scheme as proposed by the LSB either - directly or via a compatibility symlink) -- Use new "restart" init script action instead of starting and stopping - separately -- Be more flexible in activating the automatic bootup - use insserv (on - older SuSE versions) or chkconfig (Red Hat, newer SuSE versions and - others) to create the respective symlinks - -* Wed Sep 25 2002 Lenz Grimmer - -- MySQL-Max now requires MySQL >= 4.0 to avoid version mismatches - (mixing 3.23 and 4.0 packages) - -* Fri Aug 09 2002 Lenz Grimmer - -- Turn off OpenSSL in MySQL-Max for now until it works properly again -- enable RAID for the Max binary instead -- added compatibility link: safe_mysqld -> mysqld_safe to ease the - transition from 3.23 - -* Thu Jul 18 2002 Lenz Grimmer - -- Reworked the build steps a little bit: the Max binary is supposed - to include OpenSSL, which cannot be linked statically, thus trying - to statically link against a special glibc is futile anyway -- because of this, it is not required to make yet another build run - just to compile the shared libs (saves a lot of time) -- updated package description of the Max subpackage -- clean up the BuildRoot directory afterwards - -* Mon Jul 15 2002 Lenz Grimmer - -- Updated Packager information -- Fixed the build options: the regular package is supposed to - include InnoDB and linked statically, while the Max package - should include BDB and SSL support - -* Fri May 03 2002 Lenz Grimmer - -- Use more RPM macros (e.g. infodir, mandir) to make the spec - file more portable -- reorganized the installation of documentation files: let RPM - take care of this -- reorganized the file list: actually install man pages along - with the binaries of the respective subpackage -- do not include libmysqld.a in the devel subpackage as well, if we - have a special "embedded" subpackage -- reworked the package descriptions - -* Mon Oct 8 2001 Monty - -- Added embedded server as a separate RPM - -* Fri Apr 13 2001 Monty - -- Added mysqld-max to the distribution - -* Tue Jan 2 2001 Monty - -- Added mysql-test to the bench package - -* Fri Aug 18 2000 Tim Smith - -- Added separate libmysql_r directory; now both a threaded - and non-threaded library is shipped. - -* Wed Sep 28 1999 David Axmark - -- Added the support-files/my-example.cnf to the docs directory. - -- Removed devel dependency on base since it is about client - development. - -* Wed Sep 8 1999 David Axmark - -- Cleaned up some for 3.23. - -* Thu Jul 1 1999 David Axmark - -- Added support for shared libraries in a separate sub - package. Original fix by David Fox (dsfox@cogsci.ucsd.edu) - -- The --enable-assembler switch is now automatically disables on - platforms there assembler code is unavailable. This should allow - building this RPM on non i386 systems. - -* Mon Feb 22 1999 David Axmark - -- Removed unportable cc switches from the spec file. The defaults can - now be overridden with environment variables. This feature is used - to compile the official RPM with optimal (but compiler version - specific) switches. - -- Removed the repetitive description parts for the sub rpms. Maybe add - again if RPM gets a multiline macro capability. - -- Added support for a pt_BR translation. Translation contributed by - Jorge Godoy . - -* Wed Nov 4 1998 David Axmark - -- A lot of changes in all the rpm and install scripts. This may even - be a working RPM :-) - -* Sun Aug 16 1998 David Axmark - -- A developers changelog for MySQL is available in the source RPM. And - there is a history of major user visible changed in the Reference - Manual. Only RPM specific changes will be documented here. diff -Nru mariadb-5.5-5.5.39/support-files/mysql.5.5.40.spec mariadb-5.5-5.5.40/support-files/mysql.5.5.40.spec --- mariadb-5.5-5.5.39/support-files/mysql.5.5.40.spec 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-5.5-5.5.40/support-files/mysql.5.5.40.spec 2014-10-08 13:19:54.000000000 +0000 @@ -0,0 +1,2062 @@ +# Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston +# MA 02110-1301 USA. + +############################################################################## +# Some common macro definitions +############################################################################## + +# NOTE: "vendor" is used in upgrade/downgrade check, so you can't +# change these, has to be exactly as is. +%global mysql_old_vendor MySQL AB +%global mysql_vendor_2 Sun Microsystems, Inc. +%global mysql_vendor Oracle and/or its affiliates + +%global mysql_version 5.5.40 + +%global mysqld_user mysql +%global mysqld_group mysql +%global mysqldatadir /var/lib/mysql + +%global release 1 + + +# +# Macros we use which are not available in all supported versions of RPM +# +# - defined/undefined are missing on RHEL4 +# +%if %{expand:%{?defined:0}%{!?defined:1}} +%define defined() %{expand:%%{?%{1}:1}%%{!?%{1}:0}} +%endif +%if %{expand:%{?undefined:0}%{!?undefined:1}} +%define undefined() %{expand:%%{?%{1}:0}%%{!?%{1}:1}} +%endif + +# ---------------------------------------------------------------------------- +# RPM build tools now automatically detect Perl module dependencies. This +# detection causes problems as it is broken in some versions, and it also +# provides unwanted dependencies from mandatory scripts in our package. +# It might not be possible to disable this in all versions of RPM, but here we +# try anyway. We keep the "AutoReqProv: no" for the "test" sub package, as +# disabling here might fail, and that package has the most problems. +# See: +# http://fedoraproject.org/wiki/Packaging/Perl#Filtering_Requires:_and_Provides +# http://www.wideopen.com/archives/rpm-list/2002-October/msg00343.html +# ---------------------------------------------------------------------------- +%undefine __perl_provides +%undefine __perl_requires + +############################################################################## +# Command line handling +############################################################################## +# +# To set options: +# +# $ rpmbuild --define="option " ... +# + +# ---------------------------------------------------------------------------- +# Commercial builds +# ---------------------------------------------------------------------------- +%if %{undefined commercial} +%define commercial 0 +%endif + +# ---------------------------------------------------------------------------- +# Source name +# ---------------------------------------------------------------------------- +%if %{undefined src_base} +%define src_base mysql +%endif +%define src_dir %{src_base}-%{mysql_version} + +# ---------------------------------------------------------------------------- +# Feature set (storage engines, options). Default to community (everything) +# ---------------------------------------------------------------------------- +%if %{undefined feature_set} +%define feature_set community +%endif + +# ---------------------------------------------------------------------------- +# Server comment strings +# ---------------------------------------------------------------------------- +%if %{undefined compilation_comment_debug} +%define compilation_comment_debug MySQL Community Server - Debug (GPL) +%endif +%if %{undefined compilation_comment_release} +%define compilation_comment_release MySQL Community Server (GPL) +%endif + +# ---------------------------------------------------------------------------- +# Product and server suffixes +# ---------------------------------------------------------------------------- +%if %{undefined product_suffix} + %if %{defined short_product_tag} + %define product_suffix -%{short_product_tag} + %else + %define product_suffix %{nil} + %endif +%endif + +%if %{undefined server_suffix} +%define server_suffix %{nil} +%endif + +# ---------------------------------------------------------------------------- +# Distribution support +# ---------------------------------------------------------------------------- +%if %{undefined distro_specific} +%define distro_specific 0 +%endif +%if %{distro_specific} + %if %(test -f /etc/enterprise-release && echo 1 || echo 0) + %define oelver %(rpm -qf --qf '%%{version}\\n' /etc/enterprise-release | sed -e 's/^\\([0-9]*\\).*/\\1/g') + %if "%oelver" == "4" + %define distro_description Oracle Enterprise Linux 4 + %define distro_releasetag oel4 + %define distro_buildreq gcc-c++ gperf ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel + %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools + %else + %if "%oelver" == "5" + %define distro_description Oracle Enterprise Linux 5 + %define distro_releasetag oel5 + %define distro_buildreq gcc-c++ gperf ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel + %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools + %else + %{error:Oracle Enterprise Linux %{oelver} is unsupported} + %endif + %endif + %else + %if %(test -f /etc/oracle-release && echo 1 || echo 0) + %define elver %(rpm -qf --qf '%%{version}\\n' /etc/oracle-release | sed -e 's/^\\([0-9]*\\).*/\\1/g') + %if "%elver" == "6" || "%elver" == "7" + %define distro_description Oracle Linux %elver + %define distro_releasetag el%elver + %define distro_buildreq gcc-c++ ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel + %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools + %else + %{error:Oracle Linux %{elver} is unsupported} + %endif + %else + %if %(test -f /etc/redhat-release && echo 1 || echo 0) + %define rhelver %(rpm -qf --qf '%%{version}\\n' /etc/redhat-release | sed -e 's/^\\([0-9]*\\).*/\\1/g') + %if "%rhelver" == "4" + %define distro_description Red Hat Enterprise Linux 4 + %define distro_releasetag rhel4 + %define distro_buildreq gcc-c++ gperf ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel + %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools + %else + %if "%rhelver" == "5" + %define distro_description Red Hat Enterprise Linux 5 + %define distro_releasetag rhel5 + %define distro_buildreq gcc-c++ gperf ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel + %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools + %else + %if "%rhelver" == "6" + %define distro_description Red Hat Enterprise Linux 6 + %define distro_releasetag rhel6 + %define distro_buildreq gcc-c++ ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel + %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools + %else + %{error:Red Hat Enterprise Linux %{rhelver} is unsupported} + %endif + %endif + %endif + %else + %if %(test -f /etc/SuSE-release && echo 1 || echo 0) + %define susever %(rpm -qf --qf '%%{version}\\n' /etc/SuSE-release | cut -d. -f1) + %if "%susever" == "10" + %define distro_description SUSE Linux Enterprise Server 10 + %define distro_releasetag sles10 + %define distro_buildreq gcc-c++ gdbm-devel gperf ncurses-devel openldap2-client readline-devel zlib-devel cmake libaio-devel + %define distro_requires aaa_base coreutils grep procps pwdutils + %else + %if "%susever" == "11" + %define distro_description SUSE Linux Enterprise Server 11 + %define distro_releasetag sles11 + %define distro_buildreq gcc-c++ gdbm-devel gperf ncurses-devel openldap2-client procps pwdutils readline-devel zlib-devel cmake libaio-devel + %define distro_requires aaa_base coreutils grep procps pwdutils + %else + %{error:SuSE %{susever} is unsupported} + %endif + %endif + %else + %{error:Unsupported distribution} + %endif + %endif + %endif + %endif +%else + %define generic_kernel %(uname -r | cut -d. -f1-2) + %define distro_description Generic Linux (kernel %{generic_kernel}) + %define distro_releasetag linux%{generic_kernel} + %define distro_buildreq gcc-c++ gperf ncurses-devel perl readline-devel time zlib-devel + %define distro_requires coreutils grep procps /sbin/chkconfig /usr/sbin/useradd /usr/sbin/groupadd +%endif + +# Avoid debuginfo RPMs, leaves binaries unstripped +%define debug_package %{nil} + +# Hack to work around bug in RHEL5 __os_install_post macro, wrong inverted +# test for __debug_package +%define __strip /bin/true + +# ---------------------------------------------------------------------------- +# Support optional "tcmalloc" library (experimental) +# ---------------------------------------------------------------------------- +%if %{defined malloc_lib_target} +%define WITH_TCMALLOC 1 +%else +%define WITH_TCMALLOC 0 +%endif + +############################################################################## +# Configuration based upon above user input, not to be set directly +############################################################################## + +%if 0%{?commercial} +%define license_files_server %{src_dir}/LICENSE.mysql +%define license_type Commercial +%else +%define license_files_server %{src_dir}/COPYING %{src_dir}/README +%define license_type GPL +%endif + +############################################################################## +# Main spec file section +############################################################################## + +Name: MySQL%{product_suffix} +Summary: MySQL: a very fast and reliable SQL database server +Group: Applications/Databases +Version: 5.5.40 +Release: %{release}%{?distro_releasetag:.%{distro_releasetag}} +Distribution: %{distro_description} +License: Copyright (c) 2000, 2014, %{mysql_vendor}. All rights reserved. Under %{license_type} license as shown in the Description field. +Source: http://www.mysql.com/Downloads/MySQL-5.5/%{src_dir}.tar.gz +URL: http://www.mysql.com/ +Packager: MySQL Release Engineering +Vendor: %{mysql_vendor} +BuildRequires: %{distro_buildreq} +%{?el7:Patch0: mysql-5.5-libmysqlclient-symbols.patch} + +# Regression tests may take a long time, override the default to skip them +%{!?runselftest:%global runselftest 1} + +# Think about what you use here since the first step is to +# run a rm -rf +BuildRoot: %{_tmppath}/%{name}-%{version}-build + +# From the manual +%description +The MySQL(TM) software delivers a very fast, multi-threaded, multi-user, +and robust SQL (Structured Query Language) database server. MySQL Server +is intended for mission-critical, heavy-load production systems as well +as for embedding into mass-deployed software. MySQL is a trademark of +%{mysql_vendor} + +The MySQL software has Dual Licensing, which means you can use the MySQL +software free of charge under the GNU General Public License +(http://www.gnu.org/licenses/). You can also purchase commercial MySQL +licenses from %{mysql_vendor} if you do not wish to be bound by the terms of +the GPL. See the chapter "Licensing and Support" in the manual for +further info. + +The MySQL web site (http://www.mysql.com/) provides the latest +news and information about the MySQL software. Also please see the +documentation and the manual for more information. + +############################################################################## +# Sub package definition +############################################################################## + +%package -n MySQL-server%{product_suffix} +Summary: MySQL: a very fast and reliable SQL database server +Group: Applications/Databases +Requires: %{distro_requires} +%if 0%{?commercial} +Obsoletes: MySQL-server +%else +Obsoletes: MySQL-server-advanced +%endif +Obsoletes: mysql-server < %{version}-%{release} +Obsoletes: mysql-server-advanced +Obsoletes: MySQL-server-classic MySQL-server-community MySQL-server-enterprise +Obsoletes: MySQL-server-advanced-gpl MySQL-server-enterprise-gpl +Provides: mysql-server = %{version}-%{release} +Provides: mysql-server%{?_isa} = %{version}-%{release} + +%description -n MySQL-server%{product_suffix} +The MySQL(TM) software delivers a very fast, multi-threaded, multi-user, +and robust SQL (Structured Query Language) database server. MySQL Server +is intended for mission-critical, heavy-load production systems as well +as for embedding into mass-deployed software. MySQL is a trademark of +%{mysql_vendor} + +The MySQL software has Dual Licensing, which means you can use the MySQL +software free of charge under the GNU General Public License +(http://www.gnu.org/licenses/). You can also purchase commercial MySQL +licenses from %{mysql_vendor} if you do not wish to be bound by the terms of +the GPL. See the chapter "Licensing and Support" in the manual for +further info. + +The MySQL web site (http://www.mysql.com/) provides the latest news and +information about the MySQL software. Also please see the documentation +and the manual for more information. + +This package includes the MySQL server binary as well as related utilities +to run and administer a MySQL server. + +If you want to access and work with the database, you have to install +package "MySQL-client%{product_suffix}" as well! + +# ---------------------------------------------------------------------------- +%package -n MySQL-client%{product_suffix} +Summary: MySQL - Client +Group: Applications/Databases +%if 0%{?commercial} +Obsoletes: MySQL-client +%else +Obsoletes: MySQL-client-advanced +%endif +Obsoletes: mysql < %{version}-%{release} +Obsoletes: mysql-advanced < %{version}-%{release} +Obsoletes: MySQL-client-classic MySQL-client-community MySQL-client-enterprise +Obsoletes: MySQL-client-advanced-gpl MySQL-client-enterprise-gpl +Provides: mysql = %{version}-%{release} +Provides: mysql%{?_isa} = %{version}-%{release} + +%description -n MySQL-client%{product_suffix} +This package contains the standard MySQL clients and administration tools. + +For a description of MySQL see the base MySQL RPM or http://www.mysql.com/ + +# ---------------------------------------------------------------------------- +%package -n MySQL-test%{product_suffix} +Summary: MySQL - Test suite +Group: Applications/Databases +%if 0%{?commercial} +Requires: MySQL-client-advanced perl +Obsoletes: MySQL-test +%else +Requires: MySQL-client perl +Obsoletes: MySQL-test-advanced +%endif +Obsoletes: mysql-test < %{version}-%{release} +Obsoletes: mysql-test-advanced +Obsoletes: mysql-bench MySQL-bench +Obsoletes: MySQL-test-classic MySQL-test-community MySQL-test-enterprise +Obsoletes: MySQL-test-advanced-gpl MySQL-test-enterprise-gpl +Provides: mysql-test = %{version}-%{release} +Provides: mysql-test%{?_isa} = %{version}-%{release} +AutoReqProv: no + +%description -n MySQL-test%{product_suffix} +This package contains the MySQL regression test suite. + +For a description of MySQL see the base MySQL RPM or http://www.mysql.com/ + +# ---------------------------------------------------------------------------- +%package -n MySQL-devel%{product_suffix} +Summary: MySQL - Development header files and libraries +Group: Applications/Databases +%if 0%{?commercial} +Obsoletes: MySQL-devel +%else +Obsoletes: MySQL-devel-advanced +%endif +Obsoletes: mysql-devel < %{version}-%{release} +Obsoletes: mysql-embedded-devel mysql-devel-advanced mysql-embedded-devel-advanced +Obsoletes: MySQL-devel-classic MySQL-devel-community MySQL-devel-enterprise +Obsoletes: MySQL-devel-advanced-gpl MySQL-devel-enterprise-gpl +Provides: mysql-devel = %{version}-%{release} +Provides: mysql-devel%{?_isa} = %{version}-%{release} + +%description -n MySQL-devel%{product_suffix} +This package contains the development header files and libraries necessary +to develop MySQL client applications. + +For a description of MySQL see the base MySQL RPM or http://www.mysql.com/ + +# ---------------------------------------------------------------------------- +%package -n MySQL-shared%{product_suffix} +Summary: MySQL - Shared libraries +Group: Applications/Databases +%if 0%{?commercial} +Obsoletes: MySQL-shared +%else +Obsoletes: MySQL-shared-advanced +%endif +Obsoletes: MySQL-shared-standard MySQL-shared-pro +Obsoletes: MySQL-shared-pro-cert MySQL-shared-pro-gpl +Obsoletes: MySQL-shared-pro-gpl-cert +Obsoletes: MySQL-shared-classic MySQL-shared-community MySQL-shared-enterprise +Obsoletes: MySQL-shared-advanced-gpl MySQL-shared-enterprise-gpl + +%description -n MySQL-shared%{product_suffix} +This package contains the shared libraries (*.so*) which certain languages +and applications need to dynamically load and use MySQL. + +# ---------------------------------------------------------------------------- +%package -n MySQL-embedded%{product_suffix} +Summary: MySQL - Embedded library +Group: Applications/Databases +%if 0%{?commercial} +Requires: MySQL-devel-advanced +Obsoletes: MySQL-embedded +%else +Requires: MySQL-devel +Obsoletes: MySQL-embedded-advanced +%endif +Obsoletes: mysql-embedded < %{version}-%{release} +Obsoletes: mysql-embedded-advanced +Obsoletes: MySQL-embedded-pro +Obsoletes: MySQL-embedded-classic MySQL-embedded-community MySQL-embedded-enterprise +Obsoletes: MySQL-embedded-advanced-gpl MySQL-embedded-enterprise-gpl +Provides: mysql-embedded = %{version}-%{release} +Provides: mysql-emdedded%{?_isa} = %{version}-%{release} + +%description -n MySQL-embedded%{product_suffix} +This package contains the MySQL server as an embedded library. + +The embedded MySQL server library makes it possible to run a full-featured +MySQL server inside the client application. The main benefits are increased +speed and more simple management for embedded applications. + +The API is identical for the embedded MySQL version and the +client/server version. + +For a description of MySQL see the base MySQL RPM or http://www.mysql.com/ + +############################################################################## +%prep +%setup -T -a 0 -c -n %{src_dir} +pushd %{src_dir} +%{?el7:%patch0 -p1} +############################################################################## +%build + +# Fail quickly and obviously if user tries to build as root +%if %runselftest + if [ x"`id -u`" = x0 ]; then + echo "The MySQL regression tests may fail if run as root." + echo "If you really need to build the RPM as root, use" + echo "--define='runselftest 0' to skip the regression tests." + exit 1 + fi +%endif + +# Be strict about variables, bail at earliest opportunity, etc. +set -eu + +# Optional package files +touch optional-files-devel + +# +# Set environment in order of preference, MYSQL_BUILD_* first, then variable +# name, finally a default. RPM_OPT_FLAGS is assumed to be a part of the +# default RPM build environment. +# +# We set CXX=gcc by default to support so-called 'generic' binaries, where we +# do not have a dependancy on libgcc/libstdc++. This only works while we do +# not require C++ features such as exceptions, and may need to be removed at +# a later date. +# + +# This is a hack, $RPM_OPT_FLAGS on ia64 hosts contains flags which break +# the compile in cmd-line-utils/readline - needs investigation, but for now +# we simply unset it and use those specified directly in cmake. +%if "%{_arch}" == "ia64" +RPM_OPT_FLAGS= +%endif + +export PATH=${MYSQL_BUILD_PATH:-$PATH} +export CC=${MYSQL_BUILD_CC:-${CC:-gcc}} +export CXX=${MYSQL_BUILD_CXX:-${CXX:-gcc}} +export CFLAGS=${MYSQL_BUILD_CFLAGS:-${CFLAGS:-$RPM_OPT_FLAGS}} +export CXXFLAGS=${MYSQL_BUILD_CXXFLAGS:-${CXXFLAGS:-$RPM_OPT_FLAGS -felide-constructors -fno-exceptions -fno-rtti}} +export LDFLAGS=${MYSQL_BUILD_LDFLAGS:-${LDFLAGS:-}} +export CMAKE=${MYSQL_BUILD_CMAKE:-${CMAKE:-cmake}} +export MAKE_JFLAG=${MYSQL_BUILD_MAKE_JFLAG:-} + +# Build debug mysqld and libmysqld.a +mkdir debug +( + cd debug + # Attempt to remove any optimisation flags from the debug build + CFLAGS=`echo " ${CFLAGS} " | \ + sed -e 's/ -O[0-9]* / /' \ + -e 's/ -unroll2 / /' \ + -e 's/ -ip / /' \ + -e 's/^ //' \ + -e 's/ $//'` + CXXFLAGS=`echo " ${CXXFLAGS} " | \ + sed -e 's/ -O[0-9]* / /' \ + -e 's/ -unroll2 / /' \ + -e 's/ -ip / /' \ + -e 's/^ //' \ + -e 's/ $//'` + # XXX: MYSQL_UNIX_ADDR should be in cmake/* but mysql_version is included before + # XXX: install_layout so we can't just set it based on INSTALL_LAYOUT=RPM + ${CMAKE} ../%{src_dir} -DBUILD_CONFIG=mysql_release -DINSTALL_LAYOUT=RPM \ + -DCMAKE_BUILD_TYPE=Debug \ + -DENABLE_DTRACE=OFF \ + -DMYSQL_UNIX_ADDR="%{mysqldatadir}/mysql.sock" \ + -DFEATURE_SET="%{feature_set}" \ + -DCOMPILATION_COMMENT="%{compilation_comment_debug}" \ + -DMYSQL_SERVER_SUFFIX="%{server_suffix}" + echo BEGIN_DEBUG_CONFIG ; egrep '^#define' include/config.h ; echo END_DEBUG_CONFIG + make ${MAKE_JFLAG} VERBOSE=1 +) +# Build full release +mkdir release +( + cd release + # XXX: MYSQL_UNIX_ADDR should be in cmake/* but mysql_version is included before + # XXX: install_layout so we can't just set it based on INSTALL_LAYOUT=RPM + ${CMAKE} ../%{src_dir} -DBUILD_CONFIG=mysql_release -DINSTALL_LAYOUT=RPM \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DENABLE_DTRACE=OFF \ + -DMYSQL_UNIX_ADDR="%{mysqldatadir}/mysql.sock" \ + -DFEATURE_SET="%{feature_set}" \ + -DCOMPILATION_COMMENT="%{compilation_comment_release}" \ + -DMYSQL_SERVER_SUFFIX="%{server_suffix}" + echo BEGIN_NORMAL_CONFIG ; egrep '^#define' include/config.h ; echo END_NORMAL_CONFIG + make ${MAKE_JFLAG} VERBOSE=1 +) + +%if %runselftest + MTR_BUILD_THREAD=auto + export MTR_BUILD_THREAD + + (cd release && make test-bt-fast || true) +%endif + +############################################################################## +%install + +RBR=$RPM_BUILD_ROOT +MBD=$RPM_BUILD_DIR/%{src_dir} + +# Ensure that needed directories exists +install -d $RBR%{_sysconfdir}/{logrotate.d,init.d} +install -d $RBR%{mysqldatadir}/mysql +install -d $RBR%{_datadir}/mysql-test +install -d $RBR%{_datadir}/mysql/SELinux/RHEL4 +install -d $RBR%{_includedir} +install -d $RBR%{_libdir} +install -d $RBR%{_mandir} +install -d $RBR%{_sbindir} + +mkdir -p $RBR%{_sysconfdir}/my.cnf.d + +# Install all binaries +( + cd $MBD/release + make DESTDIR=$RBR install +) + +# For gcc builds, include libgcc.a in the devel subpackage (BUG 4921). Do +# this in a sub-shell to ensure we don't pollute the install environment +# with compiler bits. +( + PATH=${MYSQL_BUILD_PATH:-$PATH} + CC=${MYSQL_BUILD_CC:-${CC:-gcc}} + CFLAGS=${MYSQL_BUILD_CFLAGS:-${CFLAGS:-$RPM_OPT_FLAGS}} + if "${CC}" -v 2>&1 | grep '^gcc.version' >/dev/null 2>&1; then + libgcc=`${CC} ${CFLAGS} --print-libgcc-file` + if [ -f ${libgcc} ]; then + mkdir -p $RBR%{_libdir}/mysql + install -m 644 ${libgcc} $RBR%{_libdir}/mysql/libmygcc.a + echo "%{_libdir}/mysql/libmygcc.a" >>optional-files-devel + fi + fi +) + +# FIXME: at some point we should stop doing this and just install everything +# FIXME: directly into %{_libdir}/mysql - perhaps at the same time as renaming +# FIXME: the shared libraries to use libmysql*-$major.$minor.so syntax +mv -v $RBR/%{_libdir}/*.a $RBR/%{_libdir}/mysql/ + +# Install logrotate and autostart +install -m 644 $MBD/release/support-files/mysql-log-rotate $RBR%{_sysconfdir}/logrotate.d/mysql +install -m 755 $MBD/release/support-files/mysql.server $RBR%{_sysconfdir}/init.d/mysql + +# Create a symlink "rcmysql", pointing to the init.script. SuSE users +# will appreciate that, as all services usually offer this. +ln -s %{_sysconfdir}/init.d/mysql $RBR%{_sbindir}/rcmysql + +# Touch the place where the my.cnf config file might be located +# Just to make sure it's in the file list and marked as a config file +touch $RBR%{_sysconfdir}/my.cnf + +# Install SELinux files in datadir +install -m 600 $MBD/%{src_dir}/support-files/RHEL4-SElinux/mysql.{fc,te} \ + $RBR%{_datadir}/mysql/SELinux/RHEL4 + +%if %{WITH_TCMALLOC} +# Even though this is a shared library, put it under /usr/lib*/mysql, so it +# doesn't conflict with possible shared lib by the same name in /usr/lib*. See +# `mysql_config --variable=pkglibdir` and mysqld_safe for how this is used. +install -m 644 "%{malloc_lib_source}" \ + "$RBR%{_libdir}/mysql/%{malloc_lib_target}" +%endif + +# Remove man pages we explicitly do not want to package, avoids 'unpackaged +# files' warning. +# This has become obsolete: rm -f $RBR%{_mandir}/man1/make_win_bin_dist.1* + +############################################################################## +# Post processing actions, i.e. when installed +############################################################################## + +%pre -n MySQL-server%{product_suffix} +# This is the code running at the beginning of a RPM upgrade action, +# before replacing the old files with the new ones. + +# ATTENTION: Parts of this are duplicated in the "triggerpostun" ! + +# There are users who deviate from the default file system layout. +# Check local settings to support them. +if [ -x %{_bindir}/my_print_defaults ] +then + mysql_datadir=`%{_bindir}/my_print_defaults server mysqld | grep '^--datadir=' | sed -n 's/--datadir=//p'` + PID_FILE_PATT=`%{_bindir}/my_print_defaults server mysqld | grep '^--pid-file=' | sed -n 's/--pid-file=//p'` +fi +if [ -z "$mysql_datadir" ] +then + mysql_datadir=%{mysqldatadir} +fi +if [ -z "$PID_FILE_PATT" ] +then + PID_FILE_PATT="$mysql_datadir/*.pid" +fi + +# Check if we can safely upgrade. An upgrade is only safe if it's from one +# of our RPMs in the same version family. + +# Handle both ways of spelling the capability. +installed=`rpm -q --whatprovides mysql-server 2> /dev/null` +if [ $? -ne 0 -o -z "$installed" ]; then + installed=`rpm -q --whatprovides MySQL-server 2> /dev/null` +fi +if [ $? -eq 0 -a -n "$installed" ]; then + installed=`echo $installed | sed 's/\([^ ]*\) .*/\1/'` # Tests have shown duplicated package names + vendor=`rpm -q --queryformat='%{VENDOR}' "$installed" 2>&1` + version=`rpm -q --queryformat='%{VERSION}' "$installed" 2>&1` + myoldvendor='%{mysql_old_vendor}' + myvendor_2='%{mysql_vendor_2}' + myvendor='%{mysql_vendor}' + myversion='%{mysql_version}' + + old_family=`echo $version \ + | sed -n -e 's,^\([1-9][0-9]*\.[0-9][0-9]*\)\..*$,\1,p'` + new_family=`echo $myversion \ + | sed -n -e 's,^\([1-9][0-9]*\.[0-9][0-9]*\)\..*$,\1,p'` + + [ -z "$vendor" ] && vendor='' + [ -z "$old_family" ] && old_family="" + [ -z "$new_family" ] && new_family="" + + error_text= + if [ "$vendor" != "$myoldvendor" \ + -a "$vendor" != "$myvendor_2" \ + -a "$vendor" != "$myvendor" ]; then + error_text="$error_text +The current MySQL server package is provided by a different +vendor ($vendor) than $myoldvendor, $myvendor_2, or $myvendor. +Some files may be installed to different locations, including log +files and the service startup script in %{_sysconfdir}/init.d/. +" + fi + + if [ "$old_family" != "$new_family" ]; then + error_text="$error_text +Upgrading directly from MySQL $old_family to MySQL $new_family may not +be safe in all cases. A manual dump and restore using mysqldump is +recommended. It is important to review the MySQL manual's Upgrading +section for version-specific incompatibilities. +" + fi + + if [ -n "$error_text" ]; then + cat <&2 + +****************************************************************** +A MySQL server package ($installed) is installed. +$error_text +A manual upgrade is required. + +- Ensure that you have a complete, working backup of your data and my.cnf + files +- Shut down the MySQL server cleanly +- Remove the existing MySQL packages. Usually this command will + list the packages you should remove: + rpm -qa | grep -i '^mysql-' + + You may choose to use 'rpm --nodeps -ev ' to remove + the package which contains the mysqlclient shared library. The + library will be reinstalled by the MySQL-shared-compat package. +- Install the new MySQL packages supplied by $myvendor +- Ensure that the MySQL server is started +- Run the 'mysql_upgrade' program + +This is a brief description of the upgrade process. Important details +can be found in the MySQL manual, in the Upgrading section. +****************************************************************** +HERE + exit 1 + fi +fi + +# We assume that if there is exactly one ".pid" file, +# it contains the valid PID of a running MySQL server. +NR_PID_FILES=`ls -1 $PID_FILE_PATT 2>/dev/null | wc -l` +case $NR_PID_FILES in + 0 ) SERVER_TO_START='' ;; # No "*.pid" file == no running server + 1 ) SERVER_TO_START='true' ;; + * ) SERVER_TO_START='' # Situation not clear + SEVERAL_PID_FILES=true ;; +esac +# That logic may be debated: We might check whether it is non-empty, +# contains exactly one number (possibly a PID), and whether "ps" finds it. +# OTOH, if there is no such process, it means a crash without a cleanup - +# is that a reason not to start a new server after upgrade? + +STATUS_FILE=$mysql_datadir/RPM_UPGRADE_MARKER + +if [ -f $STATUS_FILE ]; then + echo "Some previous upgrade was not finished:" + ls -ld $STATUS_FILE + echo "Please check its status, then do" + echo " rm $STATUS_FILE" + echo "before repeating the MySQL upgrade." + exit 1 +elif [ -n "$SEVERAL_PID_FILES" ] ; then + echo "You have more than one PID file:" + ls -ld $PID_FILE_PATT + echo "Please check which one (if any) corresponds to a running server" + echo "and delete all others before repeating the MySQL upgrade." + exit 1 +fi + +NEW_VERSION=%{mysql_version}-%{release} + +# The "pre" section code is also run on a first installation, +# when there is no data directory yet. Protect against error messages. +if [ -d $mysql_datadir ] ; then + echo "MySQL RPM upgrade to version $NEW_VERSION" > $STATUS_FILE + echo "'pre' step running at `date`" >> $STATUS_FILE + echo >> $STATUS_FILE + fcount=`ls -ltr $mysql_datadir/*.err 2>/dev/null | wc -l` + if [ $fcount -gt 0 ] ; then + echo "ERR file(s):" >> $STATUS_FILE + ls -ltr $mysql_datadir/*.err >> $STATUS_FILE + echo >> $STATUS_FILE + echo "Latest 'Version' line in latest file:" >> $STATUS_FILE + grep '^Version' `ls -tr $mysql_datadir/*.err | tail -1` | \ + tail -1 >> $STATUS_FILE + echo >> $STATUS_FILE + fi + + if [ -n "$SERVER_TO_START" ] ; then + # There is only one PID file, race possibility ignored + echo "PID file:" >> $STATUS_FILE + ls -l $PID_FILE_PATT >> $STATUS_FILE + cat $PID_FILE_PATT >> $STATUS_FILE + echo >> $STATUS_FILE + echo "Server process:" >> $STATUS_FILE + ps -fp `cat $PID_FILE_PATT` >> $STATUS_FILE + echo >> $STATUS_FILE + echo "SERVER_TO_START=$SERVER_TO_START" >> $STATUS_FILE + else + # Take a note we checked it ... + echo "PID file:" >> $STATUS_FILE + ls -l $PID_FILE_PATT >> $STATUS_FILE 2>&1 + fi +fi + +# Shut down a previously installed server first +# Note we *could* make that depend on $SERVER_TO_START, but we rather don't, +# so a "stop" is attempted even if there is no PID file. +# (Maybe the "stop" doesn't work then, but we might fix that in itself.) +if [ -x %{_sysconfdir}/init.d/mysql ] ; then + %{_sysconfdir}/init.d/mysql stop > /dev/null 2>&1 + echo "Giving mysqld 5 seconds to exit nicely" + sleep 5 +fi + +%post -n MySQL-server%{product_suffix} +# This is the code running at the end of a RPM install or upgrade action, +# after the (new) files have been written. + +# ATTENTION: Parts of this are duplicated in the "triggerpostun" ! + +# There are users who deviate from the default file system layout. +# Check local settings to support them. +if [ -x %{_bindir}/my_print_defaults ] +then + mysql_datadir=`%{_bindir}/my_print_defaults server mysqld | grep '^--datadir=' | sed -n 's/--datadir=//p'` +fi +if [ -z "$mysql_datadir" ] +then + mysql_datadir=%{mysqldatadir} +fi + +NEW_VERSION=%{mysql_version}-%{release} +STATUS_FILE=$mysql_datadir/RPM_UPGRADE_MARKER + +# ---------------------------------------------------------------------- +# Create data directory if needed, check whether upgrade or install +# ---------------------------------------------------------------------- +if [ ! -d $mysql_datadir ] ; then mkdir -m 755 $mysql_datadir; fi +if [ -f $STATUS_FILE ] ; then + SERVER_TO_START=`grep '^SERVER_TO_START=' $STATUS_FILE | cut -c17-` +else + SERVER_TO_START='' +fi +# echo "Analyzed: SERVER_TO_START=$SERVER_TO_START" +if [ ! -d $mysql_datadir/mysql ] ; then + mkdir $mysql_datadir/mysql $mysql_datadir/test + echo "MySQL RPM installation of version $NEW_VERSION" >> $STATUS_FILE +else + # If the directory exists, we may assume it is an upgrade. + echo "MySQL RPM upgrade to version $NEW_VERSION" >> $STATUS_FILE +fi + +# ---------------------------------------------------------------------- +# Make MySQL start/shutdown automatically when the machine does it. +# ---------------------------------------------------------------------- +# NOTE: This still needs to be debated. Should we check whether these links +# for the other run levels exist(ed) before the upgrade? +# use chkconfig on Enterprise Linux and newer SuSE releases +if [ -x /sbin/chkconfig ] ; then + /sbin/chkconfig --add mysql +# use insserv for older SuSE Linux versions +elif [ -x /sbin/insserv ] ; then + /sbin/insserv %{_sysconfdir}/init.d/mysql +fi + +# ---------------------------------------------------------------------- +# Create a MySQL user and group. Do not report any problems if it already +# exists. +# ---------------------------------------------------------------------- +groupadd -r %{mysqld_group} 2> /dev/null || true +useradd -M -r -d $mysql_datadir -s /bin/bash -c "MySQL server" \ + -g %{mysqld_group} %{mysqld_user} 2> /dev/null || true +# The user may already exist, make sure it has the proper group nevertheless +# (BUG#12823) +usermod -g %{mysqld_group} %{mysqld_user} 2> /dev/null || true + +# ---------------------------------------------------------------------- +# Change permissions so that the user that will run the MySQL daemon +# owns all database files. +# ---------------------------------------------------------------------- +chown -R %{mysqld_user}:%{mysqld_group} $mysql_datadir + +# ---------------------------------------------------------------------- +# Initiate databases if needed +# ---------------------------------------------------------------------- +if ! grep '^MySQL RPM upgrade' $STATUS_FILE >/dev/null 2>&1 ; then + # Fix bug#45415: no "mysql_install_db" on an upgrade + # Do this as a negative to err towards more "install" runs + # rather than to miss one. + %{_bindir}/mysql_install_db --rpm --user=%{mysqld_user} +fi + +# ---------------------------------------------------------------------- +# Upgrade databases if needed would go here - but it cannot be automated yet +# ---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- +# Change permissions again to fix any new files. +# ---------------------------------------------------------------------- +chown -R %{mysqld_user}:%{mysqld_group} $mysql_datadir + +# ---------------------------------------------------------------------- +# Fix permissions for the permission database so that only the user +# can read them. +# ---------------------------------------------------------------------- +chmod -R og-rw $mysql_datadir/mysql + +# ---------------------------------------------------------------------- +# install SELinux files - but don't override existing ones +# ---------------------------------------------------------------------- +SETARGETDIR=/etc/selinux/targeted/src/policy +SEDOMPROG=$SETARGETDIR/domains/program +SECONPROG=$SETARGETDIR/file_contexts/program +if [ -f /etc/redhat-release ] \ + && (grep -q "Red Hat Enterprise Linux .. release 4" /etc/redhat-release \ + || grep -q "CentOS release 4" /etc/redhat-release) ; then + echo + echo + echo 'Notes regarding SELinux on this platform:' + echo '=========================================' + echo + echo 'The default policy might cause server startup to fail because it is' + echo 'not allowed to access critical files. In this case, please update' + echo 'your installation.' + echo + echo 'The default policy might also cause inavailability of SSL related' + echo 'features because the server is not allowed to access /dev/random' + echo 'and /dev/urandom. If this is a problem, please do the following:' + echo + echo ' 1) install selinux-policy-targeted-sources from your OS vendor' + echo ' 2) add the following two lines to '$SEDOMPROG/mysqld.te':' + echo ' allow mysqld_t random_device_t:chr_file read;' + echo ' allow mysqld_t urandom_device_t:chr_file read;' + echo ' 3) cd to '$SETARGETDIR' and issue the following command:' + echo ' make load' + echo + echo +fi + +if [ -x sbin/restorecon ] ; then + sbin/restorecon -R var/lib/mysql +fi + +# Was the server running before the upgrade? If so, restart the new one. +if [ "$SERVER_TO_START" = "true" ] ; then + # Restart in the same way that mysqld will be started normally. + if [ -x %{_sysconfdir}/init.d/mysql ] ; then + %{_sysconfdir}/init.d/mysql start + echo "Giving mysqld 5 seconds to start" + sleep 5 + fi +fi + +# Collect an upgrade history ... +echo "Upgrade/install finished at `date`" >> $STATUS_FILE +echo >> $STATUS_FILE +echo "=====" >> $STATUS_FILE +STATUS_HISTORY=$mysql_datadir/RPM_UPGRADE_HISTORY +cat $STATUS_FILE >> $STATUS_HISTORY +mv -f $STATUS_FILE ${STATUS_FILE}-LAST # for "triggerpostun" + + +#echo "Thank you for installing the MySQL Community Server! For Production +#systems, we recommend MySQL Enterprise, which contains enterprise-ready +#software, intelligent advisory services, and full production support with +#scheduled service packs and more. Visit www.mysql.com/enterprise for more +#information." + +%preun -n MySQL-server%{product_suffix} + +# Which '$1' does this refer to? Fedora docs have info: +# " ... a count of the number of versions of the package that are installed. +# Action Count +# Install the first time 1 +# Upgrade 2 or higher (depending on the number of versions installed) +# Remove last version of package 0 " +# +# http://docs.fedoraproject.org/en-US/Fedora_Draft_Documentation/0.1/html/RPM_Guide/ch09s04s05.html + +if [ $1 = 0 ] ; then + # Stop MySQL before uninstalling it + if [ -x %{_sysconfdir}/init.d/mysql ] ; then + %{_sysconfdir}/init.d/mysql stop > /dev/null + # Remove autostart of MySQL + # use chkconfig on Enterprise Linux and newer SuSE releases + if [ -x /sbin/chkconfig ] ; then + /sbin/chkconfig --del mysql + # For older SuSE Linux versions + elif [ -x /sbin/insserv ] ; then + /sbin/insserv -r %{_sysconfdir}/init.d/mysql + fi + fi +fi + +# We do not remove the mysql user since it may still own a lot of +# database files. + +%triggerpostun -n MySQL-server%{product_suffix} --MySQL-server-community + +# Setup: We renamed this package, so any existing "server-community" +# package will be removed when this "server" is installed. +# Problem: RPM will first run the "pre" and "post" sections of this script, +# and only then the "preun" of that old community server. +# But this "preun" includes stopping the server and uninstalling the service, +# "chkconfig --del mysql" which removes the symlinks to the start script. +# Solution: *After* the community server got removed, restart this server +# and re-install the service. +# +# For information about triggers in spec files, see the Fedora docs: +# http://docs.fedoraproject.org/en-US/Fedora_Draft_Documentation/0.1/html/RPM_Guide/ch10s02.html +# For all details of this code, see the "pre" and "post" sections. + +# There are users who deviate from the default file system layout. +# Check local settings to support them. +if [ -x %{_bindir}/my_print_defaults ] +then + mysql_datadir=`%{_bindir}/my_print_defaults server mysqld | grep '^--datadir=' | sed -n 's/--datadir=//p'` +fi +if [ -z "$mysql_datadir" ] +then + mysql_datadir=%{mysqldatadir} +fi + +NEW_VERSION=%{mysql_version}-%{release} +STATUS_FILE=$mysql_datadir/RPM_UPGRADE_MARKER-LAST # Note the difference! +STATUS_HISTORY=$mysql_datadir/RPM_UPGRADE_HISTORY + +if [ -f $STATUS_FILE ] ; then + SERVER_TO_START=`grep '^SERVER_TO_START=' $STATUS_FILE | cut -c17-` +else + # This should never happen, but let's be prepared + SERVER_TO_START='' +fi +echo "Analyzed: SERVER_TO_START=$SERVER_TO_START" + +if [ -x /sbin/chkconfig ] ; then + /sbin/chkconfig --add mysql +# use insserv for older SuSE Linux versions +elif [ -x /sbin/insserv ] ; then + /sbin/insserv %{_sysconfdir}/init.d/mysql +fi + +# Was the server running before the upgrade? If so, restart the new one. +if [ "$SERVER_TO_START" = "true" ] ; then + # Restart in the same way that mysqld will be started normally. + if [ -x %{_sysconfdir}/init.d/mysql ] ; then + %{_sysconfdir}/init.d/mysql start + echo "Giving mysqld 5 seconds to start" + sleep 5 + fi +fi + +echo "Trigger 'postun --community' finished at `date`" >> $STATUS_HISTORY +echo >> $STATUS_HISTORY +echo "=====" >> $STATUS_HISTORY + + +# ---------------------------------------------------------------------- +# Clean up the BuildRoot after build is done +# ---------------------------------------------------------------------- +%clean +[ "$RPM_BUILD_ROOT" != "/" ] && [ -d $RPM_BUILD_ROOT ] \ + && rm -rf $RPM_BUILD_ROOT; + +############################################################################## +# Files section +############################################################################## + +%files -n MySQL-server%{product_suffix} -f release/support-files/plugins.files +%defattr(-,root,root,0755) + +%if %{defined license_files_server} +%doc %{license_files_server} +%endif +%doc %{src_dir}/Docs/ChangeLog +%doc %{src_dir}/Docs/INFO_SRC* +%doc release/Docs/INFO_BIN* +%doc release/support-files/my-*.cnf + +%if 0%{?commercial} +%doc %attr(644, root, root) %{_infodir}/mysql.info* +%endif + +%doc %attr(644, root, man) %{_mandir}/man1/innochecksum.1* +%doc %attr(644, root, man) %{_mandir}/man1/my_print_defaults.1* +%doc %attr(644, root, man) %{_mandir}/man1/myisam_ftdump.1* +%doc %attr(644, root, man) %{_mandir}/man1/myisamchk.1* +%doc %attr(644, root, man) %{_mandir}/man1/myisamlog.1* +%doc %attr(644, root, man) %{_mandir}/man1/myisampack.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_convert_table_format.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_fix_extensions.1* +%doc %attr(644, root, man) %{_mandir}/man8/mysqld.8* +%doc %attr(644, root, man) %{_mandir}/man1/mysqld_multi.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqld_safe.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqldumpslow.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_install_db.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_plugin.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_secure_installation.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_setpermission.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_upgrade.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqlhotcopy.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqlman.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql.server.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqltest.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_tzinfo_to_sql.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_zap.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqlbug.1* +%doc %attr(644, root, man) %{_mandir}/man1/perror.1* +%doc %attr(644, root, man) %{_mandir}/man1/replace.1* +%doc %attr(644, root, man) %{_mandir}/man1/resolve_stack_dump.1* +%doc %attr(644, root, man) %{_mandir}/man1/resolveip.1* + +%ghost %config(noreplace,missingok) %{_sysconfdir}/my.cnf +%dir %{_sysconfdir}/my.cnf.d + +%attr(755, root, root) %{_bindir}/innochecksum +%attr(755, root, root) %{_bindir}/my_print_defaults +%attr(755, root, root) %{_bindir}/myisam_ftdump +%attr(755, root, root) %{_bindir}/myisamchk +%attr(755, root, root) %{_bindir}/myisamlog +%attr(755, root, root) %{_bindir}/myisampack +%attr(755, root, root) %{_bindir}/mysql_convert_table_format +%attr(755, root, root) %{_bindir}/mysql_fix_extensions +%attr(755, root, root) %{_bindir}/mysql_install_db +%attr(755, root, root) %{_bindir}/mysql_plugin +%attr(755, root, root) %{_bindir}/mysql_secure_installation +%attr(755, root, root) %{_bindir}/mysql_setpermission +%attr(755, root, root) %{_bindir}/mysql_tzinfo_to_sql +%attr(755, root, root) %{_bindir}/mysql_upgrade +%attr(755, root, root) %{_bindir}/mysql_zap +%attr(755, root, root) %{_bindir}/mysqlbug +%attr(755, root, root) %{_bindir}/mysqld_multi +%attr(755, root, root) %{_bindir}/mysqld_safe +%attr(755, root, root) %{_bindir}/mysqldumpslow +%attr(755, root, root) %{_bindir}/mysqlhotcopy +%attr(755, root, root) %{_bindir}/mysqltest +%attr(755, root, root) %{_bindir}/perror +%attr(755, root, root) %{_bindir}/replace +%attr(755, root, root) %{_bindir}/resolve_stack_dump +%attr(755, root, root) %{_bindir}/resolveip + +%attr(755, root, root) %{_sbindir}/mysqld +%attr(755, root, root) %{_sbindir}/mysqld-debug +%attr(755, root, root) %{_sbindir}/rcmysql +%attr(755, root, root) %{_libdir}/mysql/plugin/daemon_example.ini + +%if %{WITH_TCMALLOC} +%attr(755, root, root) %{_libdir}/mysql/%{malloc_lib_target} +%endif + +%attr(644, root, root) %config(noreplace,missingok) %{_sysconfdir}/logrotate.d/mysql +%attr(755, root, root) %{_sysconfdir}/init.d/mysql + +%attr(755, root, root) %{_datadir}/mysql/ + +# ---------------------------------------------------------------------------- +%files -n MySQL-client%{product_suffix} + +%defattr(-, root, root, 0755) +%attr(755, root, root) %{_bindir}/msql2mysql +%attr(755, root, root) %{_bindir}/mysql +%attr(755, root, root) %{_bindir}/mysql_find_rows +%attr(755, root, root) %{_bindir}/mysql_waitpid +%attr(755, root, root) %{_bindir}/mysqlaccess +# XXX: This should be moved to %{_sysconfdir} +%attr(644, root, root) %{_bindir}/mysqlaccess.conf +%attr(755, root, root) %{_bindir}/mysqladmin +%attr(755, root, root) %{_bindir}/mysqlbinlog +%attr(755, root, root) %{_bindir}/mysqlcheck +%attr(755, root, root) %{_bindir}/mysqldump +%attr(755, root, root) %{_bindir}/mysqlimport +%attr(755, root, root) %{_bindir}/mysqlshow +%attr(755, root, root) %{_bindir}/mysqlslap + +%doc %attr(644, root, man) %{_mandir}/man1/msql2mysql.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_find_rows.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_waitpid.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqlaccess.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqladmin.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqlbinlog.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqlcheck.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqldump.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqlimport.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqlshow.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqlslap.1* + +# ---------------------------------------------------------------------------- +%files -n MySQL-devel%{product_suffix} -f optional-files-devel +%defattr(-, root, root, 0755) +%doc %attr(644, root, man) %{_mandir}/man1/comp_err.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_config.1* +%attr(755, root, root) %{_bindir}/mysql_config +%dir %attr(755, root, root) %{_includedir}/mysql +%dir %attr(755, root, root) %{_libdir}/mysql +%{_includedir}/mysql/* +%{_datadir}/aclocal/mysql.m4 +%{_libdir}/mysql/libmysqlclient.a +%{_libdir}/mysql/libmysqlclient_r.a +%{_libdir}/mysql/libmysqlservices.a + +# ---------------------------------------------------------------------------- +%files -n MySQL-shared%{product_suffix} +%defattr(-, root, root, 0755) +# Shared libraries (omit for architectures that don't support them) +%{_libdir}/libmysql*.so* + +%post -n MySQL-shared%{product_suffix} +/sbin/ldconfig + +%postun -n MySQL-shared%{product_suffix} +/sbin/ldconfig + +# ---------------------------------------------------------------------------- +%files -n MySQL-test%{product_suffix} +%defattr(-, root, root, 0755) +%attr(-, root, root) %{_datadir}/mysql-test +%attr(755, root, root) %{_bindir}/mysql_client_test +%attr(755, root, root) %{_bindir}/mysql_client_test_embedded +%attr(755, root, root) %{_bindir}/mysqltest_embedded +%doc %attr(644, root, man) %{_mandir}/man1/mysql_client_test.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql-stress-test.pl.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql-test-run.pl.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysql_client_test_embedded.1* +%doc %attr(644, root, man) %{_mandir}/man1/mysqltest_embedded.1* + +# ---------------------------------------------------------------------------- +%files -n MySQL-embedded%{product_suffix} +%defattr(-, root, root, 0755) +%attr(755, root, root) %{_bindir}/mysql_embedded +%attr(644, root, root) %{_libdir}/mysql/libmysqld.a +%attr(644, root, root) %{_libdir}/mysql/libmysqld-debug.a + +############################################################################## +# The spec file changelog only includes changes made to the spec file +# itself - note that they must be ordered by date (important when +# merging BK trees) +############################################################################## +%changelog +* Wed Jul 02 2014 Bjorn Munch +- Disable dtrace unconditionally, breaks after we install Oracle dtrace + +* Wed Oct 30 2013 Balasubramanian Kandasamy +- Removed non gpl file docs/mysql.info from community packages + +* Mon Sep 09 2013 Balasubramanian Kandasamy +- Updated logic to get the correct count of PID files + +* Fri Aug 16 2013 Balasubramanian Kandasamy +- Added provides lowercase mysql tags + +* Wed Jun 26 2013 Balasubramanian Kandasamy +- Cleaned up spec file to resolve rpm dependencies. + +* Tue Jul 24 2012 Joerg Bruehe + +- Add a macro "runselftest": + if set to 1 (default), the test suite will be run during the RPM build; + this can be oveeridden via the command line by adding + --define "runselftest 0" + Failures of the test suite will NOT make the RPM build fail! + +* Mon Jun 11 2012 Joerg Bruehe + +- Make sure newly added "SPECIFIC-ULN/" directory does not disturb packaging. + +* Wed Sep 28 2011 Joerg Bruehe + +- Fix duplicate mentioning of "mysql_plugin" and its manual page, + it is better to keep alphabetic order in the files list (merging!). + +* Wed Sep 14 2011 Joerg Bruehe + +- Let the RPM capabilities ("obsoletes" etc) ensure that an upgrade may replace + the RPMs of any configuration (of the current or the preceding release series) + by the new ones. This is done by not using the implicitly generated capabilities + (which include the configuration name) and relying on more generic ones which + just list the function ("server", "client", ...). + The implicit generation cannot be prevented, so all these capabilities must be + explicitly listed in "Obsoletes:" + +* Tue Sep 13 2011 Jonathan Perkin + +- Add support for Oracle Linux 6 and Red Hat Enterprise Linux 6. Due to + changes in RPM behaviour ($RPM_BUILD_ROOT is removed prior to install) + this necessitated a move of the libmygcc.a installation to the install + phase, which is probably where it belonged in the first place. + +* Tue Sep 13 2011 Joerg Bruehe + +- "make_win_bin_dist" and its manual are dropped, cmake does it different. + +* Thu Sep 08 2011 Daniel Fischer + +- Add mysql_plugin man page. + +* Tue Aug 30 2011 Joerg Bruehe + +- Add the manual page for "mysql_plugin" to the server package. + +* Fri Aug 19 2011 Joerg Bruehe + +- Null-upmerge the fix of bug#37165: This spec file is not affected. +- Replace "/var/lib/mysql" by the spec file variable "%{mysqldatadir}". + +* Fri Aug 12 2011 Daniel Fischer + +- Source plugin library files list from cmake-generated file. + +* Mon Jul 25 2011 Chuck Bell + +- Added the mysql_plugin client - enables or disables plugins. + +* Thu Jul 21 2011 Sunanda Menon + +- Fix bug#12561297: Added the MySQL embedded binary + +* Thu Jul 07 2011 Joerg Bruehe + +- Fix bug#45415: "rpm upgrade recreates test database" + Let the creation of the "test" database happen only during a new installation, + not in an RPM upgrade. + This affects both the "mkdir" and the call of "mysql_install_db". + +* Thu Feb 09 2011 Joerg Bruehe + +- Fix bug#56581: If an installation deviates from the default file locations + ("datadir" and "pid-file"), the mechanism to detect a running server (on upgrade) + should still work, and use these locations. + The problem was that the fix for bug#27072 did not check for local settings. + +* Mon Jan 31 2011 Joerg Bruehe + +- Install the new "manifest" files: "INFO_SRC" and "INFO_BIN". + +* Tue Nov 23 2010 Jonathan Perkin + +- EXCEPTIONS-CLIENT has been deleted, remove it from here too +- Support MYSQL_BUILD_MAKE_JFLAG environment variable for passing + a '-j' argument to make. + +* Mon Nov 1 2010 Georgi Kodinov + +- Added test authentication (WL#1054) plugin binaries + +* Wed Oct 6 2010 Georgi Kodinov + +- Added example external authentication (WL#1054) plugin binaries + +* Wed Aug 11 2010 Joerg Bruehe + +- With a recent spec file cleanup, names have changed: A "-community" part was dropped. + Reflect that in the "Obsoletes" specifications. +- Add a "triggerpostun" to handle the uninstall of the "-community" server RPM. +- This fixes bug#55015 "MySQL server is not restarted properly after RPM upgrade". + +* Tue Jun 15 2010 Joerg Bruehe + +- Change the behaviour on installation and upgrade: + On installation, do not autostart the server. + *Iff* the server was stopped before the upgrade is started, this is taken as a + sign the administrator is handling that manually, and so the new server will + not be started automatically at the end of the upgrade. + The start/stop scripts will still be installed, so the server will be started + on the next machine boot. + This is the 5.5 version of fixing bug#27072 (RPM autostarting the server). + +* Tue Jun 1 2010 Jonathan Perkin + +- Implement SELinux checks from distribution-specific spec file. + +* Wed May 12 2010 Jonathan Perkin + +- Large number of changes to build using CMake +- Introduce distribution-specific RPMs +- Drop debuginfo, build all binaries with debug/symbols +- Remove __os_install_post, use native macro +- Remove _unpackaged_files_terminate_build, make it an error to have + unpackaged files +- Remove cluster RPMs + +* Wed Mar 24 2010 Joerg Bruehe + +- Add "--with-perfschema" to the configure options. + +* Mon Mar 22 2010 Joerg Bruehe + +- User "usr/lib*" to allow for both "usr/lib" and "usr/lib64", + mask "rmdir" return code 1. +- Remove "ha_example.*" files from the list, they aren't built. + +* Wed Mar 17 2010 Joerg Bruehe + +- Fix a wrong path name in handling the debug plugins. + +* Wed Mar 10 2010 Joerg Bruehe + +- Take the result of the debug plugin build and put it into the optimized tree, + so that it becomes part of the final installation; + include the files in the packlist. Part of the fixes for bug#49022. + +* Mon Mar 01 2010 Joerg Bruehe + +- Set "Oracle and/or its affiliates" as the vendor and copyright owner, + accept upgrading from packages showing MySQL or Sun as vendor. + +* Fri Feb 12 2010 Joerg Bruehe + +- Formatting changes: + Have a consistent structure of separator lines and of indentation + (8 leading blanks => tab). +- Introduce the variable "src_dir". +- Give the environment variables "MYSQL_BUILD_CC(CXX)" precedence + over "CC" ("CXX"). +- Drop the old "with_static" argument analysis, this is not supported + in 5.1 since ages. +- Introduce variables to control the handlers individually, as well + as other options. +- Use the new "--with-plugin" notation for the table handlers. +- Drop handling "/etc/rc.d/init.d/mysql", the switch to "/etc/init.d/mysql" + was done back in 2002 already. +- Make "--with-zlib-dir=bundled" the default, add an option to disable it. +- Add missing manual pages to the file list. +- Improve the runtime check for "libgcc.a", protect it against being tried + with the Intel compiler "icc". + +* Mon Jan 11 2010 Joerg Bruehe + +- Change RPM file naming: + - Suffix like "-m2", "-rc" becomes part of version as "_m2", "_rc". + - Release counts from 1, not 0. + +* Wed Dec 23 2009 Joerg Bruehe + +- The "semisync" plugin file name has lost its introductory "lib", + adapt the file lists for the subpackages. + This is a part missing from the fix for bug#48351. +- Remove the "fix_privilege_tables" manual, it does not exist in 5.5 + (and likely, the whole script will go, too). + +* Mon Nov 16 2009 Joerg Bruehe + +- Fix some problems with the directives around "tcmalloc" (experimental), + remove erroneous traces of the InnoDB plugin (that is 5.1 only). + +* Fri Oct 06 2009 Magnus Blaudd + +- Removed mysql_fix_privilege_tables + +* Fri Oct 02 2009 Alexander Nozdrin + +- "mysqlmanager" got removed from version 5.4, all references deleted. + +* Fri Aug 28 2009 Joerg Bruehe + +- Merge up from 5.1 to 5.4: Remove handling for the InnoDB plugin. + +* Thu Aug 27 2009 Joerg Bruehe + +- This version does not contain the "Instance manager", "mysqlmanager": + Remove it from the spec file so that packaging succeeds. + +* Mon Aug 24 2009 Jonathan Perkin + +- Add conditionals for bundled zlib and innodb plugin + +* Fri Aug 21 2009 Jonathan Perkin + +- Install plugin libraries in appropriate packages. +- Disable libdaemon_example and ftexample plugins. + +* Thu Aug 20 2009 Jonathan Perkin + +- Update variable used for mysql-test suite location to match source. + +* Fri Nov 07 2008 Joerg Bruehe + +- Correct yesterday's fix, so that it also works for the last flag, + and fix a wrong quoting: un-quoted quote marks must not be escaped. + +* Thu Nov 06 2008 Kent Boortz + +- Removed "mysql_upgrade_shell" +- Removed some copy/paste between debug and normal build + +* Thu Nov 06 2008 Joerg Bruehe + +- Modify CFLAGS and CXXFLAGS such that a debug build is not optimized. + This should cover both gcc and icc flags. Fixes bug#40546. + +* Fri Aug 29 2008 Kent Boortz + +- Removed the "Federated" storage engine option, and enabled in all + +* Tue Aug 26 2008 Joerg Bruehe + +- Get rid of the "warning: Installed (but unpackaged) file(s) found:" + Some generated files aren't needed in RPMs: + - the "sql-bench/" subdirectory + Some files were missing: + - /usr/share/aclocal/mysql.m4 ("devel" subpackage) + - Manual "mysqlbug" ("server" subpackage) + - Program "innochecksum" and its manual ("server" subpackage) + - Manual "mysql_find_rows" ("client" subpackage) + - Script "mysql_upgrade_shell" ("client" subpackage) + - Program "ndb_cpcd" and its manual ("ndb-extra" subpackage) + - Manuals "ndb_mgm" + "ndb_restore" ("ndb-tools" subpackage) + +* Mon Mar 31 2008 Kent Boortz + +- Made the "Federated" storage engine an option +- Made the "Cluster" storage engine and sub packages an option + +* Wed Mar 19 2008 Joerg Bruehe + +- Add the man pages for "ndbd" and "ndb_mgmd". + +* Mon Feb 18 2008 Timothy Smith + +- Require a manual upgrade if the alread-installed mysql-server is + from another vendor, or is of a different major version. + +* Wed May 02 2007 Joerg Bruehe + +- "ndb_size.tmpl" is not needed any more, + "man1/mysql_install_db.1" lacked the trailing '*'. + +* Sat Apr 07 2007 Kent Boortz + +- Removed man page for "mysql_create_system_tables" + +* Wed Mar 21 2007 Daniel Fischer + +- Add debug server. + +* Mon Mar 19 2007 Daniel Fischer + +- Remove Max RPMs; the server RPMs contain a mysqld compiled with all + features that previously only were built into Max. + +* Fri Mar 02 2007 Joerg Bruehe + +- Add several man pages for NDB which are now created. + +* Fri Jan 05 2007 Kent Boortz + +- Put back "libmygcc.a", found no real reason it was removed. + +- Add CFLAGS to gcc call with --print-libgcc-file, to make sure the + correct "libgcc.a" path is returned for the 32/64 bit architecture. + +* Mon Dec 18 2006 Joerg Bruehe + +- Fix the move of "mysqlmanager" to section 8: Directory name was wrong. + +* Thu Dec 14 2006 Joerg Bruehe + +- Include the new man pages for "my_print_defaults" and "mysql_tzinfo_to_sql" + in the server RPM. +- The "mysqlmanager" man page got moved from section 1 to 8. + +* Thu Nov 30 2006 Joerg Bruehe + +- Call "make install" using "benchdir_root=%{_datadir}", + because that is affecting the regression test suite as well. + +* Thu Nov 16 2006 Joerg Bruehe + +- Explicitly note that the "MySQL-shared" RPMs (as built by MySQL AB) + replace "mysql-shared" (as distributed by SuSE) to allow easy upgrading + (bug#22081). + +* Mon Nov 13 2006 Joerg Bruehe + +- Add "--with-partition" to all server builds. + +- Use "--report-features" in one test run per server build. + +* Tue Aug 15 2006 Joerg Bruehe + +- The "max" server is removed from packages, effective from 5.1.12-beta. + Delete all steps to build, package, or install it. + +* Mon Jul 10 2006 Joerg Bruehe + +- Fix a typing error in the "make" target for the Perl script to run the tests. + +* Tue Jul 04 2006 Joerg Bruehe + +- Use the Perl script to run the tests, because it will automatically check + whether the server is configured with SSL. + +* Tue Jun 27 2006 Joerg Bruehe + +- move "mysqldumpslow" from the client RPM to the server RPM (bug#20216) + +- Revert all previous attempts to call "mysql_upgrade" during RPM upgrade, + there are some more aspects which need to be solved before this is possible. + For now, just ensure the binary "mysql_upgrade" is delivered and installed. + +* Thu Jun 22 2006 Joerg Bruehe + +- Close a gap of the previous version by explicitly using + a newly created temporary directory for the socket to be used + in the "mysql_upgrade" operation, overriding any local setting. + +* Tue Jun 20 2006 Joerg Bruehe + +- To run "mysql_upgrade", we need a running server; + start it in isolation and skip password checks. + +* Sat May 20 2006 Kent Boortz + +- Always compile for PIC, position independent code. + +* Wed May 10 2006 Kent Boortz + +- Use character set "all" when compiling with Cluster, to make Cluster + nodes independent on the character set directory, and the problem + that two RPM sub packages both wants to install this directory. + +* Mon May 01 2006 Kent Boortz + +- Use "./libtool --mode=execute" instead of searching for the + executable in current directory and ".libs". + +* Fri Apr 28 2006 Kent Boortz + +- Install and run "mysql_upgrade" + +* Wed Apr 12 2006 Jim Winstead + +- Remove sql-bench, and MySQL-bench RPM (will be built as an independent + project from the mysql-bench repository) + +* Tue Apr 11 2006 Jim Winstead + +- Remove old mysqltestmanager and related programs +* Sat Apr 01 2006 Kent Boortz + +- Set $LDFLAGS from $MYSQL_BUILD_LDFLAGS + +* Wed Mar 07 2006 Kent Boortz + +- Changed product name from "Community Edition" to "Community Server" + +* Mon Mar 06 2006 Kent Boortz + +- Fast mutexes is now disabled by default, but should be + used in Linux builds. + +* Mon Feb 20 2006 Kent Boortz + +- Reintroduced a max build +- Limited testing of 'debug' and 'max' servers +- Berkeley DB only in 'max' + +* Mon Feb 13 2006 Joerg Bruehe + +- Use "-i" on "make test-force"; + this is essential for later evaluation of this log file. + +* Thu Feb 09 2006 Kent Boortz + +- Pass '-static' to libtool, link static with our own libraries, dynamic + with system libraries. Link with the bundled zlib. + +* Wed Feb 08 2006 Kristian Nielsen + +- Modified RPM spec to match new 5.1 debug+max combined community packaging. + +* Sun Dec 18 2005 Kent Boortz + +- Added "client/mysqlslap" + +* Mon Dec 12 2005 Rodrigo Novo + +- Added zlib to the list of (static) libraries installed +- Added check against libtool wierdness (WRT: sql/mysqld || sql/.libs/mysqld) +- Compile MySQL with bundled zlib +- Fixed %packager name to "MySQL Production Engineering Team" + +* Mon Dec 05 2005 Joerg Bruehe + +- Avoid using the "bundled" zlib on "shared" builds: + As it is not installed (on the build system), this gives dependency + problems with "libtool" causing the build to fail. + (Change was done on Nov 11, but left uncommented.) + +* Tue Nov 22 2005 Joerg Bruehe + +- Extend the file existence check for "init.d/mysql" on un-install + to also guard the call to "insserv"/"chkconfig". + +* Thu Oct 27 2005 Lenz Grimmer + +- added more man pages + +* Wed Oct 19 2005 Kent Boortz + +- Made yaSSL support an option (off by default) + +* Wed Oct 19 2005 Kent Boortz + +- Enabled yaSSL support + +* Sat Oct 15 2005 Kent Boortz + +- Give mode arguments the same way in all places +- Moved copy of mysqld.a to "standard" build, but + disabled it as we don't do embedded yet in 5.0 + +* Fri Oct 14 2005 Kent Boortz + +- For 5.x, always compile with --with-big-tables +- Copy the config.log file to location outside + the build tree + +* Fri Oct 14 2005 Kent Boortz + +- Removed unneeded/obsolete configure options +- Added archive engine to standard server +- Removed the embedded server from experimental server +- Changed suffix "-Max" => "-max" +- Changed comment string "Max" => "Experimental" + +* Thu Oct 13 2005 Lenz Grimmer + +- added a usermod call to assign a potential existing mysql user to the + correct user group (BUG#12823) +- Save the perror binary built during Max build so it supports the NDB + error codes (BUG#13740) +- added a separate macro "mysqld_group" to be able to define the + user group of the mysql user seperately, if desired. + +* Thu Sep 29 2005 Lenz Grimmer + +- fixed the removing of the RPM_BUILD_ROOT in the %clean section (the + $RBR variable did not get expanded, thus leaving old build roots behind) + +* Thu Aug 04 2005 Lenz Grimmer + +- Fixed the creation of the mysql user group account in the postinstall + section (BUG 12348) +- Fixed enabling the Archive storage engine in the Max binary + +* Tue Aug 02 2005 Lenz Grimmer + +- Fixed the Requires: tag for the server RPM (BUG 12233) + +* Fri Jul 15 2005 Lenz Grimmer + +- create a "mysql" user group and assign the mysql user account to that group + in the server postinstall section. (BUG 10984) + +* Tue Jun 14 2005 Lenz Grimmer + +- Do not build statically on i386 by default, only when adding either "--with + static" or "--define '_with_static 1'" to the RPM build options. Static + linking really only makes sense when linking against the specially patched + glibc 2.2.5. + +* Mon Jun 06 2005 Lenz Grimmer + +- added mysql_client_test to the "bench" subpackage (BUG 10676) +- added the libndbclient static and shared libraries (BUG 10676) + +* Wed Jun 01 2005 Lenz Grimmer + +- use "mysqldatadir" variable instead of hard-coding the path multiple times +- use the "mysqld_user" variable on all occasions a user name is referenced +- removed (incomplete) Brazilian translations +- removed redundant release tags from the subpackage descriptions + +* Wed May 25 2005 Joerg Bruehe + +- Added a "make clean" between separate calls to "BuildMySQL". + +* Thu May 12 2005 Guilhem Bichot + +- Removed the mysql_tableinfo script made obsolete by the information schema + +* Wed Apr 20 2005 Lenz Grimmer + +- Enabled the "blackhole" storage engine for the Max RPM + +* Wed Apr 13 2005 Lenz Grimmer + +- removed the MySQL manual files (html/ps/texi) - they have been removed + from the MySQL sources and are now available seperately. + +* Mon Apr 4 2005 Petr Chardin + +- old mysqlmanager, mysqlmanagerc and mysqlmanager-pwger renamed into + mysqltestmanager, mysqltestmanager and mysqltestmanager-pwgen respectively + +* Fri Mar 18 2005 Lenz Grimmer + +- Disabled RAID in the Max binaries once and for all (it has finally been + removed from the source tree) + +* Sun Feb 20 2005 Petr Chardin + +- Install MySQL Instance Manager together with mysqld, touch mysqlmanager + password file + +* Mon Feb 14 2005 Lenz Grimmer + +- Fixed the compilation comments and moved them into the separate build sections + for Max and Standard + +* Mon Feb 7 2005 Tomas Ulin + +- enabled the "Ndbcluster" storage engine for the max binary +- added extra make install in ndb subdir after Max build to get ndb binaries +- added packages for ndbcluster storage engine + +* Fri Jan 14 2005 Lenz Grimmer + +- replaced obsoleted "BuildPrereq" with "BuildRequires" instead + +* Thu Jan 13 2005 Lenz Grimmer + +- enabled the "Federated" storage engine for the max binary + +* Tue Jan 04 2005 Petr Chardin + +- ISAM and merge storage engines were purged. As well as appropriate + tools and manpages (isamchk and isamlog) + +* Thu Dec 31 2004 Lenz Grimmer + +- enabled the "Archive" storage engine for the max binary +- enabled the "CSV" storage engine for the max binary +- enabled the "Example" storage engine for the max binary + +* Thu Aug 26 2004 Lenz Grimmer + +- MySQL-Max now requires MySQL-server instead of MySQL (BUG 3860) + +* Fri Aug 20 2004 Lenz Grimmer + +- do not link statically on IA64/AMD64 as these systems do not have + a patched glibc installed + +* Tue Aug 10 2004 Lenz Grimmer + +- Added libmygcc.a to the devel subpackage (required to link applications + against the the embedded server libmysqld.a) (BUG 4921) + +* Mon Aug 09 2004 Lenz Grimmer + +- Added EXCEPTIONS-CLIENT to the "devel" package + +* Thu Jul 29 2004 Lenz Grimmer + +- disabled OpenSSL in the Max binaries again (the RPM packages were the + only exception to this anyway) (BUG 1043) + +* Wed Jun 30 2004 Lenz Grimmer + +- fixed server postinstall (mysql_install_db was called with the wrong + parameter) + +* Thu Jun 24 2004 Lenz Grimmer + +- added mysql_tzinfo_to_sql to the server subpackage +- run "make clean" instead of "make distclean" + +* Mon Apr 05 2004 Lenz Grimmer + +- added ncurses-devel to the build prerequisites (BUG 3377) + +* Thu Feb 12 2004 Lenz Grimmer + +- when using gcc, _always_ use CXX=gcc +- replaced Copyright with License field (Copyright is obsolete) + +* Tue Feb 03 2004 Lenz Grimmer + +- added myisam_ftdump to the Server package + +* Tue Jan 13 2004 Lenz Grimmer + +- link the mysql client against libreadline instead of libedit (BUG 2289) + +* Mon Dec 22 2003 Lenz Grimmer + +- marked /etc/logrotate.d/mysql as a config file (BUG 2156) + +* Fri Dec 13 2003 Lenz Grimmer + +- fixed file permissions (BUG 1672) + +* Thu Dec 11 2003 Lenz Grimmer + +- made testing for gcc3 a bit more robust + +* Fri Dec 05 2003 Lenz Grimmer + +- added missing file mysql_create_system_tables to the server subpackage + +* Fri Nov 21 2003 Lenz Grimmer + +- removed dependency on MySQL-client from the MySQL-devel subpackage + as it is not really required. (BUG 1610) + +* Fri Aug 29 2003 Lenz Grimmer + +- Fixed BUG 1162 (removed macro names from the changelog) +- Really fixed BUG 998 (disable the checking for installed but + unpackaged files) + +* Tue Aug 05 2003 Lenz Grimmer + +- Fixed BUG 959 (libmysqld not being compiled properly) +- Fixed BUG 998 (RPM build errors): added missing files to the + distribution (mysql_fix_extensions, mysql_tableinfo, mysqldumpslow, + mysql_fix_privilege_tables.1), removed "-n" from install section. + +* Wed Jul 09 2003 Lenz Grimmer + +- removed the GIF Icon (file was not included in the sources anyway) +- removed unused variable shared_lib_version +- do not run automake before building the standard binary + (should not be necessary) +- add server suffix '-standard' to standard binary (to be in line + with the binary tarball distributions) +- Use more RPM macros (_exec_prefix, _sbindir, _libdir, _sysconfdir, + _datadir, _includedir) throughout the spec file. +- allow overriding CC and CXX (required when building with other compilers) + +* Fri May 16 2003 Lenz Grimmer + +- re-enabled RAID again + +* Wed Apr 30 2003 Lenz Grimmer + +- disabled MyISAM RAID (--with-raid) - it throws an assertion which + needs to be investigated first. + +* Mon Mar 10 2003 Lenz Grimmer + +- added missing file mysql_secure_installation to server subpackage + (BUG 141) + +* Tue Feb 11 2003 Lenz Grimmer + +- re-added missing pre- and post(un)install scripts to server subpackage +- added config file /etc/my.cnf to the file list (just for completeness) +- make sure to create the datadir with 755 permissions + +* Mon Jan 27 2003 Lenz Grimmer + +- removed unused CC and CXX variables +- CFLAGS and CXXFLAGS should honor RPM_OPT_FLAGS + +* Fri Jan 24 2003 Lenz Grimmer + +- renamed package "MySQL" to "MySQL-server" +- fixed Copyright tag +- added mysql_waitpid to client subpackage (required for mysql-test-run) + +* Wed Nov 27 2002 Lenz Grimmer + +- moved init script from /etc/rc.d/init.d to /etc/init.d (the majority of + Linux distributions now support this scheme as proposed by the LSB either + directly or via a compatibility symlink) +- Use new "restart" init script action instead of starting and stopping + separately +- Be more flexible in activating the automatic bootup - use insserv (on + older SuSE versions) or chkconfig (Red Hat, newer SuSE versions and + others) to create the respective symlinks + +* Wed Sep 25 2002 Lenz Grimmer + +- MySQL-Max now requires MySQL >= 4.0 to avoid version mismatches + (mixing 3.23 and 4.0 packages) + +* Fri Aug 09 2002 Lenz Grimmer + +- Turn off OpenSSL in MySQL-Max for now until it works properly again +- enable RAID for the Max binary instead +- added compatibility link: safe_mysqld -> mysqld_safe to ease the + transition from 3.23 + +* Thu Jul 18 2002 Lenz Grimmer + +- Reworked the build steps a little bit: the Max binary is supposed + to include OpenSSL, which cannot be linked statically, thus trying + to statically link against a special glibc is futile anyway +- because of this, it is not required to make yet another build run + just to compile the shared libs (saves a lot of time) +- updated package description of the Max subpackage +- clean up the BuildRoot directory afterwards + +* Mon Jul 15 2002 Lenz Grimmer + +- Updated Packager information +- Fixed the build options: the regular package is supposed to + include InnoDB and linked statically, while the Max package + should include BDB and SSL support + +* Fri May 03 2002 Lenz Grimmer + +- Use more RPM macros (e.g. infodir, mandir) to make the spec + file more portable +- reorganized the installation of documentation files: let RPM + take care of this +- reorganized the file list: actually install man pages along + with the binaries of the respective subpackage +- do not include libmysqld.a in the devel subpackage as well, if we + have a special "embedded" subpackage +- reworked the package descriptions + +* Mon Oct 8 2001 Monty + +- Added embedded server as a separate RPM + +* Fri Apr 13 2001 Monty + +- Added mysqld-max to the distribution + +* Tue Jan 2 2001 Monty + +- Added mysql-test to the bench package + +* Fri Aug 18 2000 Tim Smith + +- Added separate libmysql_r directory; now both a threaded + and non-threaded library is shipped. + +* Wed Sep 28 1999 David Axmark + +- Added the support-files/my-example.cnf to the docs directory. + +- Removed devel dependency on base since it is about client + development. + +* Wed Sep 8 1999 David Axmark + +- Cleaned up some for 3.23. + +* Thu Jul 1 1999 David Axmark + +- Added support for shared libraries in a separate sub + package. Original fix by David Fox (dsfox@cogsci.ucsd.edu) + +- The --enable-assembler switch is now automatically disables on + platforms there assembler code is unavailable. This should allow + building this RPM on non i386 systems. + +* Mon Feb 22 1999 David Axmark + +- Removed unportable cc switches from the spec file. The defaults can + now be overridden with environment variables. This feature is used + to compile the official RPM with optimal (but compiler version + specific) switches. + +- Removed the repetitive description parts for the sub rpms. Maybe add + again if RPM gets a multiline macro capability. + +- Added support for a pt_BR translation. Translation contributed by + Jorge Godoy . + +* Wed Nov 4 1998 David Axmark + +- A lot of changes in all the rpm and install scripts. This may even + be a working RPM :-) + +* Sun Aug 16 1998 David Axmark + +- A developers changelog for MySQL is available in the source RPM. And + there is a history of major user visible changed in the Reference + Manual. Only RPM specific changes will be documented here. diff -Nru mariadb-5.5-5.5.39/support-files/mysql.spec.sh mariadb-5.5-5.5.40/support-files/mysql.spec.sh --- mariadb-5.5-5.5.39/support-files/mysql.spec.sh 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/support-files/mysql.spec.sh 2014-10-08 13:19:52.000000000 +0000 @@ -30,7 +30,7 @@ %global mysqld_group mysql %global mysqldatadir /var/lib/mysql -%global release 2 +%global release 1 # @@ -142,9 +142,9 @@ %else %if %(test -f /etc/oracle-release && echo 1 || echo 0) %define elver %(rpm -qf --qf '%%{version}\\n' /etc/oracle-release | sed -e 's/^\\([0-9]*\\).*/\\1/g') - %if "%elver" == "6" - %define distro_description Oracle Linux 6 - %define distro_releasetag el6 + %if "%elver" == "6" || "%elver" == "7" + %define distro_description Oracle Linux %elver + %define distro_releasetag el%elver %define distro_buildreq gcc-c++ ncurses-devel perl readline-devel time zlib-devel cmake libaio-devel %define distro_requires chkconfig coreutils grep procps shadow-utils net-tools %else @@ -251,6 +251,7 @@ Packager: MySQL Release Engineering Vendor: %{mysql_vendor} BuildRequires: %{distro_buildreq} +%{?el7:Patch0: mysql-5.5-libmysqlclient-symbols.patch} # Regression tests may take a long time, override the default to skip them %{!?runselftest:%global runselftest 1} @@ -443,7 +444,8 @@ ############################################################################## %prep %setup -T -a 0 -c -n %{src_dir} - +pushd %{src_dir} +%{?el7:%patch0 -p1} ############################################################################## %build @@ -559,6 +561,8 @@ install -d $RBR%{_mandir} install -d $RBR%{_sbindir} +mkdir -p $RBR%{_sysconfdir}/my.cnf.d + # Install all binaries ( cd $MBD/release @@ -1094,6 +1098,7 @@ %doc %attr(644, root, man) %{_mandir}/man1/resolveip.1* %ghost %config(noreplace,missingok) %{_sysconfdir}/my.cnf +%dir %{_sysconfdir}/my.cnf.d %attr(755, root, root) %{_bindir}/innochecksum %attr(755, root, root) %{_bindir}/my_print_defaults diff -Nru mariadb-5.5-5.5.39/tests/async_queries.c mariadb-5.5-5.5.40/tests/async_queries.c --- mariadb-5.5-5.5.39/tests/async_queries.c 2014-08-03 12:00:36.000000000 +0000 +++ mariadb-5.5-5.5.40/tests/async_queries.c 2014-10-08 13:19:51.000000000 +0000 @@ -21,16 +21,16 @@ API, and compare to running same queries with the normal blocking API. */ -#include -#include -#include -#include - #include #include #include #include +#include +#include +#include +#include + #include diff -Nru mariadb-5.5-5.5.39/VERSION mariadb-5.5-5.5.40/VERSION --- mariadb-5.5-5.5.39/VERSION 2014-08-03 12:00:34.000000000 +0000 +++ mariadb-5.5-5.5.40/VERSION 2014-10-08 13:19:52.000000000 +0000 @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=5 MYSQL_VERSION_MINOR=5 -MYSQL_VERSION_PATCH=39 +MYSQL_VERSION_PATCH=40 MYSQL_VERSION_EXTRA=