diff -Nru mlpack-3.1.0/CMakeLists.txt mlpack-3.1.1/CMakeLists.txt --- mlpack-3.1.0/CMakeLists.txt 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/CMakeLists.txt 2019-05-27 02:18:49.000000000 +0000 @@ -336,14 +336,17 @@ "${CMAKE_BINARY_DIR}/deps/${ENSMALLEN_INCLUDE_DIR}/include") message(STATUS "Successfully downloaded ensmallen into ${CMAKE_BINARY_DIR}/deps/${ENSMALLEN_INCLUDE_DIR}/") + + # Now we have to also ensure these header files get installed. + install(DIRECTORY ${CMAKE_BINARY_DIR}/deps/${ENSMALLEN_INCLUDE_DIR}/include/ensmallen_bits/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ensmallen_bits) + install(FILES ${CMAKE_BINARY_DIR}/deps/${ENSMALLEN_INCLUDE_DIR}/include/ensmallen.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) else () message(FATAL_ERROR "Problem unpacking ensmallen! Expected only one directory ensmallen-x.y.z/; found ${ENS_DIRECTORIES}. Try removing the directory ${CMAKE_BINARY_DIR}/deps and reconfiguring.") endif () else () list(GET ENS_DOWNLOAD_STATUS_LIST 1 ENS_DOWNLOAD_ERROR) message(FATAL_ERROR - "Could not download ensmallen! Error code ${ENS_DOWNLOAD_STATUS}: -${ENS_DOWNLOAD_ERROR}! Error log: ${ENS_DOWBLOAD_LOG}") + "Could not download ensmallen! Error code ${ENS_DOWNLOAD_STATUS}: ${ENS_DOWNLOAD_ERROR}! Error log: ${ENS_DOWNLOAD_LOG}") endif () else () # Release versions will have ensmallen packaged with the release so we can diff -Nru mlpack-3.1.0/debian/changelog mlpack-3.1.1/debian/changelog --- mlpack-3.1.0/debian/changelog 2019-05-02 10:59:24.000000000 +0000 +++ mlpack-3.1.1/debian/changelog 2019-06-06 12:42:17.000000000 +0000 @@ -1,3 +1,15 @@ +mlpack (3.1.1-1) unstable; urgency=medium + + * new upstream version + * add some missing run-time dependencies + * patch a grammatical error + * install mysterious formula.repository file + * account for upstream installing foo.1 in man/foo.1 instead of man/man1/foo.1 + * scour away stray .gitignore installed in docs + * set --doc-main-package whatever that is + + -- Barak A. Pearlmutter Thu, 06 Jun 2019 13:42:17 +0100 + mlpack (3.1.0-1) unstable; urgency=medium * fix debian/watch uscan support file diff -Nru mlpack-3.1.0/debian/control mlpack-3.1.1/debian/control --- mlpack-3.1.0/debian/control 2019-05-02 10:59:24.000000000 +0000 +++ mlpack-3.1.1/debian/control 2019-06-06 12:42:17.000000000 +0000 @@ -26,7 +26,8 @@ Multi-Arch: same Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, libmlpack3 (= ${binary:Version}), - libarmadillo-dev (>= 1:3.6.0), liblapack-dev, libxml2-dev, libboost-dev + libarmadillo-dev (>= 1:3.6.0), liblapack-dev, libxml2-dev, + libboost-dev, libboost-program-options-dev, libboost-serialization-dev, libboost-test-dev Suggests: mlpack-doc Description: intuitive, fast, scalable C++ machine learning library (development libs) This package contains the mlpack Library development files. diff -Nru mlpack-3.1.0/debian/mlpack-bin.manpages mlpack-3.1.1/debian/mlpack-bin.manpages --- mlpack-3.1.0/debian/mlpack-bin.manpages 2019-04-30 13:55:24.000000000 +0000 +++ mlpack-3.1.1/debian/mlpack-bin.manpages 2019-06-06 12:42:17.000000000 +0000 @@ -1 +1 @@ -/usr/share/man/man1/* +/usr/share/man/*.* diff -Nru mlpack-3.1.0/debian/mlpack-doc.doc-base.mlpack-manual mlpack-3.1.1/debian/mlpack-doc.doc-base.mlpack-manual --- mlpack-3.1.0/debian/mlpack-doc.doc-base.mlpack-manual 2019-04-30 13:55:24.000000000 +0000 +++ mlpack-3.1.1/debian/mlpack-doc.doc-base.mlpack-manual 2019-06-06 12:42:17.000000000 +0000 @@ -7,7 +7,7 @@ Section: Programming/C++ Format: PDF -Files: /usr/share/doc/mlpack-doc/refman.pdf +Files: /usr/share/doc/libmlpack-dev/refman.pdf Format: HTML Index: /usr/share/doc/mlpack-doc/html/index.html diff -Nru mlpack-3.1.0/debian/patches/0001-build-Doxygen.patch mlpack-3.1.1/debian/patches/0001-build-Doxygen.patch --- mlpack-3.1.0/debian/patches/0001-build-Doxygen.patch 2019-05-02 10:59:24.000000000 +0000 +++ mlpack-3.1.1/debian/patches/0001-build-Doxygen.patch 2019-06-06 12:42:17.000000000 +0000 @@ -11,10 +11,10 @@ 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt -index 26dfd6e..9ea1272 100644 +index aebd9cc..8f304bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -569,7 +569,7 @@ if (DOXYGEN_FOUND) +@@ -572,7 +572,7 @@ if (DOXYGEN_FOUND) ) # Generate documentation. diff -Nru mlpack-3.1.0/debian/patches/0003-grammar.patch mlpack-3.1.1/debian/patches/0003-grammar.patch --- mlpack-3.1.0/debian/patches/0003-grammar.patch 1970-01-01 00:00:00.000000000 +0000 +++ mlpack-3.1.1/debian/patches/0003-grammar.patch 2019-06-06 12:42:17.000000000 +0000 @@ -0,0 +1,55 @@ +From: "Barak A. Pearlmutter" +Date: Thu, 6 Jun 2019 13:36:46 +0100 +Subject: grammar + +--- + src/mlpack/core/util/sfinae_utility.hpp | 8 ++++---- + src/mlpack/methods/kde/kde_main.cpp | 2 +- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/src/mlpack/core/util/sfinae_utility.hpp b/src/mlpack/core/util/sfinae_utility.hpp +index 5164af6..bbd69df 100644 +--- a/src/mlpack/core/util/sfinae_utility.hpp ++++ b/src/mlpack/core/util/sfinae_utility.hpp +@@ -220,7 +220,7 @@ struct NAME \ + #define SINGLE_ARG(...) __VA_ARGS__ + + /** +- * HAS_METHOD_FORM generates a template that allows to check at compile time ++ * HAS_METHOD_FORM generates a template that allows a compile time check for + * whether a given class has a method of the requested form. For example, for + * the following class + * +@@ -254,8 +254,8 @@ struct NAME \ + HAS_METHOD_FORM_BASE(SINGLE_ARG(METHOD), SINGLE_ARG(NAME), 7) + + /** +- * HAS_EXACT_METHOD_FORM generates a template that allows to check at compile +- * time whether a given class has a method of the requested form. For example, ++ * HAS_EXACT_METHOD_FORM generates a template that allows a compile time check ++ * whether a given class has a method of the requested form. For example, + * for the following class + * + * class A +@@ -291,7 +291,7 @@ struct NAME \ + * A version of HAS_METHOD_FORM() where the maximum number of extra arguments is + * set to the default of 7. + * +- * HAS_METHOD_FORM generates a template that allows to check at compile time ++ * HAS_METHOD_FORM generates a template that allows a compile time check as to + * whether a given class has a method of the requested form. For example, for + * the following class + * +diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp +index 74e6603..9b19bb8 100644 +--- a/src/mlpack/methods/kde/kde_main.cpp ++++ b/src/mlpack/methods/kde/kde_main.cpp +@@ -37,7 +37,7 @@ PROGRAM_INFO("Kernel Density Estimation", + "performance as it uses an approximate dual or single tree algorithm for " + "acceleration." + "\n\n" +- "Dual or single tree optimization allows to avoid lots of barely relevant " ++ "Dual or single tree optimization allows one to avoid lots of barely relevant " + "calculations (as kernel function values decrease with distance), so it is " + "an approximate computation. You can specify the maximum relative error " + "tolerance for each query value with " + PRINT_PARAM_STRING("rel_error") + diff -Nru mlpack-3.1.0/debian/patches/series mlpack-3.1.1/debian/patches/series --- mlpack-3.1.0/debian/patches/series 2019-05-02 10:59:24.000000000 +0000 +++ mlpack-3.1.1/debian/patches/series 2019-06-06 12:42:17.000000000 +0000 @@ -1,2 +1,3 @@ 0001-build-Doxygen.patch 0002-Doxygen-timestamp.patch +0003-grammar.patch diff -Nru mlpack-3.1.0/debian/rules mlpack-3.1.1/debian/rules --- mlpack-3.1.0/debian/rules 2019-05-02 10:59:24.000000000 +0000 +++ mlpack-3.1.1/debian/rules 2019-06-06 12:42:17.000000000 +0000 @@ -64,8 +64,25 @@ ln --verbose --symbolic --force /usr/share/javascript/jquery/jquery.js $$f; \ done +override_dh_installdocs: + dh_installdocs -pmlpack-doc --doc-main-package libmlpack-dev + dh_installdocs -Nmlpack-doc + +override_dh_installexamples: + dh_installexamples -pmlpack-doc --doc-main-package libmlpack-dev + dh_installexamples -Nmlpack-doc + @echo Search and destroy stray VCS support file + find debian/mlpack-doc -name .gitignore -ls -delete || true + override_dh_installchangelogs: dh_installchangelogs HISTORY.md +override_dh_installexamples: + dh_installexamples + @echo Remove empty directory + -rmdir debian/mlpack-doc/usr/share/doc/libmlpack-dev/examples/sample-ml-app/sample-ml-app/data/ + @echo Remove stray VCS file + -find debian/mlpack-doc -name .gitignore -print -delete + override_dh_compress: dh_compress -Xrefman.pdf -Xdoc/html/ diff -Nru mlpack-3.1.0/doc/examples/sample-ml-app/sample-ml-app/sample-ml-app.vcxproj mlpack-3.1.1/doc/examples/sample-ml-app/sample-ml-app/sample-ml-app.vcxproj --- mlpack-3.1.0/doc/examples/sample-ml-app/sample-ml-app/sample-ml-app.vcxproj 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/doc/examples/sample-ml-app/sample-ml-app/sample-ml-app.vcxproj 2019-05-27 02:18:49.000000000 +0000 @@ -104,16 +104,16 @@ true _DEBUG;_CONSOLE;%(PreprocessorDefinitions) false - C:\boost\boost_1_66_0;C:\mlpack\armadillo-8.500.1\include;C:\mlpack\mlpack-3.0.4\build\include;%(AdditionalIncludeDirectories) + C:\boost\boost_1_66_0;C:\mlpack\armadillo-8.500.1\include;C:\mlpack\mlpack-3.1.1\build\include;%(AdditionalIncludeDirectories) Console true - C:\mlpack\mlpack-3.0.4\build\Debug\mlpack.lib;C:\boost\boost_1_66_0\lib64-msvc-14.1\libboost_serialization-vc141-mt-gd-x64-1_66.lib;C:\boost\boost_1_66_0\lib64-msvc-14.1\libboost_program_options-vc141-mt-gd-x64-1_66.lib;%(AdditionalDependencies) + C:\mlpack\mlpack-3.1.1\build\Debug\mlpack.lib;C:\boost\boost_1_66_0\lib64-msvc-14.1\libboost_serialization-vc141-mt-gd-x64-1_66.lib;C:\boost\boost_1_66_0\lib64-msvc-14.1\libboost_program_options-vc141-mt-gd-x64-1_66.lib;%(AdditionalDependencies) - xcopy /y "C:\mlpack\mlpack-3.0.4\build\Debug\mlpack.dll" $(OutDir) -xcopy /y "C:\mlpack\mlpack-3.0.4\packages\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll" $(OutDir) + xcopy /y "C:\mlpack\mlpack-3.1.1\build\Debug\mlpack.dll" $(OutDir) +xcopy /y "C:\mlpack\mlpack-3.1.1\packages\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll" $(OutDir) xcopy /y "$(ProjectDir)..\..\..\..\src\mlpack\tests\data\german.csv" "$(ProjectDir)data\german.csv*" diff -Nru mlpack-3.1.0/doc/guide/build.hpp mlpack-3.1.1/doc/guide/build.hpp --- mlpack-3.1.0/doc/guide/build.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/doc/guide/build.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -29,7 +29,7 @@ is based on older versions). You can download the latest mlpack release from here: -mlpack-3.0.4 +mlpack-3.1.1 @section build_simple Simple Linux build instructions @@ -37,9 +37,9 @@ below directly to build and install mlpack. @code -$ wget https://www.mlpack.org/files/mlpack-3.0.4.tar.gz -$ tar -xvzpf mlpack-3.0.4.tar.gz -$ mkdir mlpack-3.0.4/build && cd mlpack-3.0.4/build +$ wget https://www.mlpack.org/files/mlpack-3.1.1.tar.gz +$ tar -xvzpf mlpack-3.1.1.tar.gz +$ mkdir mlpack-3.1.1/build && cd mlpack-3.1.1/build $ cmake ../ $ make -j4 # The -j is the number of cores you want to use for a build. $ sudo make install @@ -64,8 +64,8 @@ First we should unpack the mlpack source and create a build directory. @code -$ tar -xvzpf mlpack-3.0.4.tar.gz -$ cd mlpack-3.0.4 +$ tar -xvzpf mlpack-3.1.1.tar.gz +$ cd mlpack-3.1.1 $ mkdir build @endcode diff -Nru mlpack-3.1.0/doc/guide/build_windows.hpp mlpack-3.1.1/doc/guide/build_windows.hpp --- mlpack-3.1.0/doc/guide/build_windows.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/doc/guide/build_windows.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -8,16 +8,10 @@ @section build_windows_intro Introduction -This document discusses how to build mlpack for Windows from source, so you can -later create your own C++ applications. There are a couple of other tutorials -for Windows, but they may be out of date: - - * Github wiki Windows Build page
- * Keon's tutorial for mlpack 2.0.3
- * Kirizaki's tutorial for mlpack 2
- -Those guides could be used in addition to this tutorial. Furthermore, mlpack is -now available for Windows installation through vcpkg: +This tutorial will show you how to build mlpack for Windows from source, so you can +later create your own C++ applications. Before you try building mlpack, you may +want to install mlpack using vcpkg for Windows. If you don't want to install +using vcpkg, skip this section and continue with the build tutorial. - Install Git (https://git-scm.com/downloads and execute setup) @@ -25,7 +19,7 @@ - Install vcpkg (https://github.com/Microsoft/vcpkg and execute setup) -- To install only mlpack library: +- To install the mlpack library only: @code PS> .\vcpkg install mlpack:x64-windows @@ -41,12 +35,12 @@ (via preprocessor directives) and used in your project without additional configuration. -@section build_windows_env Environment +@section build_windows_env Build Environment This tutorial has been designed and tested using: - Windows 10 - Visual Studio 2017 (toolset v141) -- mlpack-3.0.4 +- mlpack - OpenBLAS.0.2.14.1 - boost_1_66_0-msvc-14.1-64 - armadillo-8.500.1 @@ -64,10 +58,10 @@ @section build_windows_instructions Windows build instructions -- Unzip mlpack to "C:\mlpack\mlpack-3.0.4" +- Unzip mlpack to "C:\mlpack\mlpack" - Open Visual Studio and select: File > New > Project from Existing Code - Type of project: Visual C++ - - Project location: "C:\mlpack\mlpack-3.0.4" + - Project location: "C:\mlpack\mlpack" - Project name: mlpack - Finish - We will use this Visual Studio project to get the OpenBLAS dependency in the next section @@ -86,72 +80,86 @@ You can either get Boost via NuGet or you can download the prebuilt Windows binaries separately. This tutorial follows the second approach for simplicity. -- Download the "Prebuilt Windows binaries" of the Boost library ("boost_1_66_0-msvc-14.1-64") from +- Download the "Prebuilt Windows binaries" of the Boost library ("boost_1_66_0-msvc-14.1-64") from Sourceforge @note Make sure you download the MSVC version that matches your Visual Studio -- Install or unzip to "C:\boost\boost_1_66_0" +- Install or unzip to "C:\boost\" Armadillo Dependency -- Download "Armadillo" (armadillo-8.500.1.tar.xz) from Sourceforge -- Unzip to "C:\mlpack\armadillo-8.500.1" -- Create a "build" directory into "C:\mlpack\armadillo-8.500.1\" -- Open the Command Prompt and navigate to "C:\mlpack\armadillo-8.500.1\build" -- Run cmake: +- Download the newest version of Armadillo from Sourceforge +- Unzip to "C:\mlpack\armadillo" +- Create a "build" directory into "C:\mlpack\armadillo\" +- Open the Command Prompt and navigate to "C:\mlpack\armadillo\build" +- Run cmake: @code -cmake -G "Visual Studio 15 2017 Win64" -DBLAS_LIBRARY:FILEPATH="C:/mlpack/mlpack-3.0.4/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DLAPACK_LIBRARY:FILEPATH="C:/mlpack/mlpack-3.0.4/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DCMAKE_PREFIX:FILEPATH="C:/mlpack/armadillo" .. +cmake -G "Visual Studio 15 2017 Win64" -DBLAS_LIBRARY:FILEPATH="C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DLAPACK_LIBRARY:FILEPATH="C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" .. @endcode @note If you are using different directory paths, a different configuration (e.g. Release) or a different VS version, update the cmake command accordingly. -- Once it has successfully finished, open "C:\mlpack\armadillo-8.500.1\build\armadillo.sln" +- Once it has successfully finished, open "C:\mlpack\armadillo\build\armadillo.sln" - Build > Build Solution - Once it has successfully finished, close Visual Studio @section build_windows_mlpack Building mlpack -- Create a "build" directory into "C:\mlpack\mlpack-3.0.4\" -- Use either the CMake GUI or the CMake command line to configure Armadillo. +- Create a "build" directory into "C:\mlpack\mlpack\" +- You can generate the project using either cmake via command line or GUI. If you prefer to use GUI, refer to the \ref build_windows_appendix "appendix" +- To use the CMake command line prompt, open the Command Prompt and navigate to "C:\mlpack\mlpack\build" +- Run cmake: + +@code +cmake -G "Visual Studio 15 2017 Win64" -DBLAS_LIBRARY:FILEPATH="C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DLAPACK_LIBRARY:FILEPATH="C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DARMADILLO_INCLUDE_DIR="C:/mlpack/armadillo/include" -DARMADILLO_LIBRARY:FILEPATH="C:/mlpack/armadillo/build/Debug/armadillo.lib" -DBOOST_INCLUDEDIR:PATH="C:/boost/" -DBOOST_LIBRARYDIR:PATH="C:/boost/lib64-msvc-14.1" -DDEBUG=OFF -DPROFILE=OFF .. +@endcode + +@note cmake will attempt to automatically download the ensmallen dependency. If for some reason cmake can't download the dependency, you will need to manually download ensmallen from http://ensmallen.org/ and extract it to "C:\mlpack\mlpack\deps\". Then, specify the path to ensmallen using the flag: -DENSMALLEN_INCLUDE_DIR=C:/mlpack/mlpack/deps/ensmallen/include + +- Once CMake configuration has successfully finished, open "C:\mlpack\mlpack\build\mlpack.sln" +- Build > Build Solution (this may be by default in Debug mode) +- Once it has sucessfully finished, you will find the library files you need in: "C:\mlpack\mlpack\build\Debug" (or "C:\mlpack\mlpack\build\Release" if you changed to Release mode) + +You are ready to create your first application, take a look at the @ref sample_ml_app "Sample C++ ML App" + +@section build_windows_appendix Appendix + +If you prefer to use cmake GUI, follow these instructions: + - To use the CMake GUI, open "CMake". - - For "Where is the source code:" set `C:\mlpack\mlpack-3.0.4\` - - For "Where to build the binaries:" set `C:\mlpack\mlpack-3.0.4\build` + - For "Where is the source code:" set `C:\mlpack\mlpack\` + - For "Where to build the binaries:" set `C:\mlpack\mlpack\build` - Click `Configure` - If there is an error and Armadillo is not found, try "Add Entry" with the following variables and reconfigure: - - Name: `ARMADILLO_INCLUDE_DIR`; type `PATH`; value `C:/mlpack/armadillo-8.500.1/include/` - - Name: `ARMADILLO_LIBRARY`; type `FILEPATH`; value `C:/mlpack/armadillo-8.500.1/build/Debug/armadillo.lib` - - Name: `BLAS_LIBRARY`; type `FILEPATH`; value `C:/mlpack/mlpack-3.0.4/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a` - - Name: `LAPACK_LIBRARY`; type `FILEPATH`; value `C:/mlpack/mlpack-3.0.4/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a` + - Name: `ARMADILLO_INCLUDE_DIR`; type `PATH`; value `C:/mlpack/armadillo/include/` + - Name: `ARMADILLO_LIBRARY`; type `FILEPATH`; value `C:/mlpack/armadillo/build/Debug/armadillo.lib` + - Name: `BLAS_LIBRARY`; type `FILEPATH`; value `C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a` + - Name: `LAPACK_LIBRARY`; type `FILEPATH`; value `C:/mlpack/mlpack/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a` - If there is an error and Boost is not found, try "Add Entry" with the following variables and reconfigure: - - Name: `BOOST_INCLUDEDIR`; type `PATH`; value `C:/boost/boost_1_66_0/` - - Name: `BOOST_LIBRARYDIR`; type `PATH`; value `C:/boost/boost_1_66_0/lib64-msvc-14.1` + - Name: `BOOST_INCLUDEDIR`; type `PATH`; value `C:/boost/` + - Name: `BOOST_LIBRARYDIR`; type `PATH`; value `C:/boost/lib64-msvc-14.1` - If Boost is still not found, try adding the following variables and reconfigure: - - Name: `Boost_INCLUDE_DIR`; type `PATH`; value `C:/boost/boost_1_66_0/` - - Name: `Boost_PROGRAM_OPTIONS_LIBRARY_DEBUG`; type `FILEPATH`; value should be `C:/boost/boost_1_66_0/lib64-msvc-14.1/boost_program_options-vc141-mt-gd-x64-1_66.lib` - - Name: `Boost_PROGRAM_OPTIONS_LIBRARY_RELEASE`; type `FILEPATH`; value should be `C:/boost/boost_1_66_0/lib64-msvc-14.1/boost_program_options-vc141-mt-x64-1_66.lib` - - Name: `Boost_SERIALIZATION_LIBRARY_DEBUG`; type `FILEPATH`; value should be `C:/boost/boost_1_66_0/lib64-msvc-14.1/boost_serialization-vc141-mt-gd-x64-1_66.lib` - - Name: `Boost_SERIALIZATION_LIBRARY_RELEASE`; type `FILEPATH`; value should be `C:/boost/boost_1_66_0/lib64-msvc-14.1/boost_program_options-vc141-mt-x64-1_66.lib` - - Name: `Boost_UNIT_TEST_FRAMEWORK_LIBRARY_DEBUG`; type `FILEPATH`; value should be `C:/boost/boost_1_66_0/lib64-msvc-14.1/boost_unit_test_framework-vc141-mt-gd-x64-1_66.lib` - - Name: `Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE`; type `FILEPATH`; value should be `C:/boost/boost_1_66_0/lib64-msvc-14.1/boost_unit_test_framework-vc141-mt-x64-1_66.lib` + - Name: `Boost_INCLUDE_DIR`; type `PATH`; value `C:/boost/` + - Name: `Boost_PROGRAM_OPTIONS_LIBRARY_DEBUG`; type `FILEPATH`; value should be `C:/boost/lib64-msvc-14.1/boost_program_options-vc141-mt-gd-x64-1_66.lib` + - Name: `Boost_PROGRAM_OPTIONS_LIBRARY_RELEASE`; type `FILEPATH`; value should be `C:/boost/lib64-msvc-14.1/boost_program_options-vc141-mt-x64-1_66.lib` + - Name: `Boost_SERIALIZATION_LIBRARY_DEBUG`; type `FILEPATH`; value should be `C:/boost/lib64-msvc-14.1/boost_serialization-vc141-mt-gd-x64-1_66.lib` + - Name: `Boost_SERIALIZATION_LIBRARY_RELEASE`; type `FILEPATH`; value should be `C:/boost/lib64-msvc-14.1/boost_program_options-vc141-mt-x64-1_66.lib` + - Name: `Boost_UNIT_TEST_FRAMEWORK_LIBRARY_DEBUG`; type `FILEPATH`; value should be `C:/boost/lib64-msvc-14.1/boost_unit_test_framework-vc141-mt-gd-x64-1_66.lib` + - Name: `Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE`; type `FILEPATH`; value should be `C:/boost/lib64-msvc-14.1/boost_unit_test_framework-vc141-mt-x64-1_66.lib` - Once CMake has configured successfully, hit "Generate" to create the `.sln` file. - - To use the CMake command line prompt: - - Open the Command Prompt and navigate to "C:\mlpack\mlpack-3.0.4\build" - - Run cmake: -@code -cmake -G "Visual Studio 15 2017 Win64" -DBLAS_LIBRARY:FILEPATH="C:/mlpack/mlpack-3.0.4/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DLAPACK_LIBRARY:FILEPATH="C:/mlpack/mlpack-3.0.4/packages/OpenBLAS.0.2.14.1/lib/native/lib/x64/libopenblas.dll.a" -DARMADILLO_INCLUDE_DIR="C:/mlpack/armadillo-8.500.1/include" -DARMADILLO_LIBRARY:FILEPATH="C:/mlpack/armadillo-8.500.1/build/Debug/armadillo.lib" -DBOOST_INCLUDEDIR:PATH="C:/boost/boost_1_66_0/" -DBOOST_LIBRARYDIR:PATH="C:/boost/boost_1_66_0/lib64-msvc-14.1" -DDEBUG=OFF -DPROFILE=OFF .. -@endcode +@section build_windows_additional_information Additional Information -- Once CMake configuration has successfully finished, open "C:\mlpack\mlpack-3.0.4\build\mlpack.sln" -- Build > Build Solution (this may be by default in Debug mode) -- Once it has sucessfully finished, you will find the library files you need in: "C:\mlpack\mlpack-3.0.4\build\Debug" (or "C:\mlpack\mlpack-3.0.4\build\Release" if you changed to Release mode) +If you are facing issues during the build process of mlpack, you may take a look at other third-party tutorials for Windows, but they may be out of date: -You are ready to create your first application, take a look at the @ref sample_ml_app "Sample C++ ML App" + * Github wiki Windows Build page
+ * Keon's tutorial for mlpack 2.0.3
+ * Kirizaki's tutorial for mlpack 2
*/ diff -Nru mlpack-3.1.0/doc/guide/python_quickstart.hpp mlpack-3.1.1/doc/guide/python_quickstart.hpp --- mlpack-3.1.0/doc/guide/python_quickstart.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/doc/guide/python_quickstart.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -31,9 +31,9 @@ @code{.sh} sudo apt-get install libboost-all-dev g++ cmake libarmadillo-dev python-pip wget sudo pip install cython setuptools distutils numpy pandas -wget https://www.mlpack.org/files/mlpack-3.0.4.tar.gz -tar -xvzpf mlpack-3.0.4.tar.gz -mkdir -p mlpack-3.0.4/build/ && cd mlpack-3.0.4/build/ +wget https://www.mlpack.org/files/mlpack-3.1.1.tar.gz +tar -xvzpf mlpack-3.1.1.tar.gz +mkdir -p mlpack-3.1.1/build/ && cd mlpack-3.1.1/build/ cmake ../ && make -j4 && sudo make install @endcode @@ -99,7 +99,8 @@ # Now print the accuracy. The 'probabilities' output could also be used # to generate an ROC curve. -correct = np.sum(output['predictions'] == test_labels) +correct = np.sum( + output['predictions'] == np.reshape(test_labels, (test_labels.shape[0],))) print(str(correct) + ' correct out of ' + str(len(test_labels)) + ' (' + str(100 * float(correct) / float(len(test_labels))) + '%).') @endcode diff -Nru mlpack-3.1.0/doc/guide/sample_ml_app.hpp mlpack-3.1.1/doc/guide/sample_ml_app.hpp --- mlpack-3.1.0/doc/guide/sample_ml_app.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/doc/guide/sample_ml_app.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -29,18 +29,18 @@ @code - C:\boost\boost_1_66_0 - C:\mlpack\armadillo-8.500.1\include - - C:\mlpack\mlpack-3.0.4\build\include + - C:\mlpack\mlpack-3.1.1\build\include @endcode - Under Linker > Input > Additional Dependencies add: @code - - C:\mlpack\mlpack-3.0.4\build\Debug\mlpack.lib + - C:\mlpack\mlpack-3.1.1\build\Debug\mlpack.lib - C:\boost\boost_1_66_0\lib64-msvc-14.1\libboost_serialization-vc141-mt-gd-x64-1_66.lib - C:\boost\boost_1_66_0\lib64-msvc-14.1\libboost_program_options-vc141-mt-gd-x64-1_66.lib @endcode - Under Build Events > Post-Build Event > Command Line add: @code - - xcopy /y "C:\mlpack\mlpack-3.0.4\build\Debug\mlpack.dll" $(OutDir) - - xcopy /y "C:\mlpack\mlpack-3.0.4\packages\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll" $(OutDir) + - xcopy /y "C:\mlpack\mlpack-3.1.1\build\Debug\mlpack.dll" $(OutDir) + - xcopy /y "C:\mlpack\mlpack-3.1.1\packages\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll" $(OutDir) @endcode @note Recent versions of Visual Studio set "Conformance Mode" enabled by default. This causes some issues with diff -Nru mlpack-3.1.0/HISTORY.md mlpack-3.1.1/HISTORY.md --- mlpack-3.1.0/HISTORY.md 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/HISTORY.md 2019-05-27 02:18:49.000000000 +0000 @@ -1,3 +1,29 @@ +### mlpack 3.1.1 +###### 2019-05-26 + * Fix random forest bug for numerical-only data (#1887). + + * Significant speedups for random forest (#1887). + + * Random forest now has `minimum_gain_split` and `subspace_dim` parameters + (#1887). + + * Decision tree parameter `print_training_error` deprecated in favor of + `print_training_accuracy`. + + * `output` option changed to `predictions` for adaboost and perceptron + binding. Old options are now deprecated and will be preserved until mlpack + 4.0.0 (#1882). + + * Concatenated ReLU layer (#1843). + + * Accelerate NormalizeLabels function using hashing instead of linear search + (see `src/mlpack/core/data/normalize_labels_impl.hpp`) (#1780). + + * Add `ConfusionMatrix()` function for checking performance of classifiers + (#1798). + + * Install ensmallen headers when it is downloaded during build (#1900). + ### mlpack 3.1.0 ###### 2019-04-25 * Add DiagonalGaussianDistribution and DiagonalGMM classes to speed up the diff -Nru mlpack-3.1.0/README.md mlpack-3.1.1/README.md --- mlpack-3.1.0/README.md 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/README.md 2019-05-27 02:18:49.000000000 +0000 @@ -23,7 +23,7 @@

Download: - current stable version (3.0.4) + current stable version (3.1.0)

diff -Nru mlpack-3.1.0/src/mlpack/bindings/markdown/binding_info.cpp mlpack-3.1.1/src/mlpack/bindings/markdown/binding_info.cpp --- mlpack-3.1.0/src/mlpack/bindings/markdown/binding_info.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/markdown/binding_info.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -28,9 +28,8 @@ return GetSingleton().map.at(bindingName); } -/** - * Register a ProgramDoc object with the given bindingName. - */ + +//! Register a ProgramDoc object with the given bindingName. void BindingInfo::RegisterProgramDoc(const std::string& bindingName, const util::ProgramDoc& programDoc) { diff -Nru mlpack-3.1.0/src/mlpack/bindings/markdown/binding_info.hpp mlpack-3.1.1/src/mlpack/bindings/markdown/binding_info.hpp --- mlpack-3.1.0/src/mlpack/bindings/markdown/binding_info.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/markdown/binding_info.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -1,5 +1,5 @@ /** - * @file binding_name.hpp + * @file binding_info.hpp * @author Ryan Curtin * * This file defines the BindingInfo singleton class that is used specifically @@ -30,14 +30,10 @@ class BindingInfo { public: - /** - * Return a ProgramDoc object for a given bindingName. - */ + //! Return a ProgramDoc object for a given bindingName. static util::ProgramDoc& GetProgramDoc(const std::string& bindingName); - /** - * Register a ProgramDoc object with the given bindingName. - */ + //! Register a ProgramDoc object with the given bindingName. static void RegisterProgramDoc(const std::string& bindingName, const util::ProgramDoc& programDoc); diff -Nru mlpack-3.1.0/src/mlpack/bindings/markdown/default_param.hpp mlpack-3.1.1/src/mlpack/bindings/markdown/default_param.hpp --- mlpack-3.1.0/src/mlpack/bindings/markdown/default_param.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/markdown/default_param.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -10,6 +10,7 @@ * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ + #ifndef MLPACK_BINDINGS_MARKDOWN_DEFAULT_PARAM_HPP #define MLPACK_BINDINGS_MARKDOWN_DEFAULT_PARAM_HPP diff -Nru mlpack-3.1.0/src/mlpack/bindings/markdown/generate_markdown.binding.cpp.in mlpack-3.1.1/src/mlpack/bindings/markdown/generate_markdown.binding.cpp.in --- mlpack-3.1.0/src/mlpack/bindings/markdown/generate_markdown.binding.cpp.in 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/markdown/generate_markdown.binding.cpp.in 2019-05-27 02:18:49.000000000 +0000 @@ -5,7 +5,13 @@ * Print Markdown for a specific binding. This provides two utility * methods---one that prints info for a table of contents, and one that prints * the Markdown bindings themselves. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ + #define BINDING_NAME "${BINDING}" #include diff -Nru mlpack-3.1.0/src/mlpack/bindings/markdown/generate_markdown.binding.hpp.in mlpack-3.1.1/src/mlpack/bindings/markdown/generate_markdown.binding.hpp.in --- mlpack-3.1.0/src/mlpack/bindings/markdown/generate_markdown.binding.hpp.in 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/markdown/generate_markdown.binding.hpp.in 2019-05-27 02:18:49.000000000 +0000 @@ -5,6 +5,11 @@ * Print Markdown for a specific binding. This provides two utility * methods---one that prints info for a table of contents, and one that prints * the Markdown bindings themselves. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ #ifndef MLPACK_BINDINGS_MARKDOWN_GENERATE_MARKDOWN_${BINDING}_HPP #define MLPACK_BINDINGS_MARKDOWN_GENERATE_MARKDOWN_${BINDING}_HPP diff -Nru mlpack-3.1.0/src/mlpack/bindings/markdown/print_doc_functions_impl.hpp mlpack-3.1.1/src/mlpack/bindings/markdown/print_doc_functions_impl.hpp --- mlpack-3.1.0/src/mlpack/bindings/markdown/print_doc_functions_impl.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/markdown/print_doc_functions_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -2,7 +2,7 @@ * @file print_doc_functions_impl.hpp * @author Ryan Curtin * - * Call out to different printing functionality for different binding languages. + * Calls out to different printing functionality for different binding languages. * If a new binding is added, this code must be modified. * * mlpack is free software; you may redistribute it and/or modify it under the @@ -67,7 +67,7 @@ } /** - * Print any imports that need to be done before using the binding. + * Print any import that needs to be done before using the binding. */ inline std::string PrintImport(const std::string& bindingName) { @@ -129,7 +129,7 @@ } /** - * Print details about the different types for a language. + * Print details about the different types of a language. */ inline std::string PrintTypeDocs() { @@ -145,7 +145,7 @@ oss << "mlpack bindings for " << PrintLanguage(BindingInfo::Language()) << " take and return a restricted set of types, for simplicity. These " << "include primitive types, matrix/vector types, categorical matrix " - << "types, and model types. Each type is detailed below." << std::endl; + << "types, and model types. Each type is detailed below." << std::endl; oss << std::endl; // Create fake ParamData to pass around. diff -Nru mlpack-3.1.0/src/mlpack/bindings/markdown/print_docs.cpp mlpack-3.1.1/src/mlpack/bindings/markdown/print_docs.cpp --- mlpack-3.1.0/src/mlpack/bindings/markdown/print_docs.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/markdown/print_docs.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -62,7 +62,7 @@ } cout << endl; - // Next we want to print the logical name of the binding (that's known by + // Next, print the logical name of the binding (that's known by // ProgramInfo). cout << "#### " << programDoc.programName << endl; cout << endl; diff -Nru mlpack-3.1.0/src/mlpack/bindings/markdown/print_type_doc.hpp mlpack-3.1.1/src/mlpack/bindings/markdown/print_type_doc.hpp --- mlpack-3.1.0/src/mlpack/bindings/markdown/print_type_doc.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/markdown/print_type_doc.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -40,7 +40,7 @@ else { throw std::invalid_argument("PrintTypeDoc(): unknown " - "BindingInfo::Language()" + BindingInfo::Language() + "!"); + "BindingInfo::Language() " + BindingInfo::Language() + "!"); } } diff -Nru mlpack-3.1.0/src/mlpack/bindings/markdown/res/change_language.js mlpack-3.1.1/src/mlpack/bindings/markdown/res/change_language.js --- mlpack-3.1.0/src/mlpack/bindings/markdown/res/change_language.js 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/markdown/res/change_language.js 2019-05-27 02:18:49.000000000 +0000 @@ -1,7 +1,15 @@ /** + * @file change_language.js + * @author Ryan Curtin + * * A utility function to change the language displayed on the page. This * function should be called whenever the language is changed from the * drop-down. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ function changeLanguage() { diff -Nru mlpack-3.1.0/src/mlpack/bindings/markdown/res/formatting.css mlpack-3.1.1/src/mlpack/bindings/markdown/res/formatting.css --- mlpack-3.1.0/src/mlpack/bindings/markdown/res/formatting.css 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/markdown/res/formatting.css 2019-05-27 02:18:49.000000000 +0000 @@ -1,3 +1,12 @@ +/* + * @file formatting.css + * @author Ryan Curtin + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ body { background: #000000; diff -Nru mlpack-3.1.0/src/mlpack/bindings/python/print_input_processing.hpp mlpack-3.1.1/src/mlpack/bindings/python/print_input_processing.hpp --- mlpack-3.1.0/src/mlpack/bindings/python/print_input_processing.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/python/print_input_processing.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -323,44 +323,43 @@ { if (T::is_row || T::is_col) { - std::cout << prefix << " " << d.name << "_tuple = to_matrix(" - << d.name << ", dtype=" << GetNumpyType() + std::cout << prefix << d.name << "_tuple = to_matrix(" << d.name + << ", dtype=" << GetNumpyType() << ", copy=CLI.HasParam('copy_all_inputs'))" << std::endl; - std::cout << prefix << " if len(" << d.name << "_tuple[0].shape) > 1:" + std::cout << prefix << "if len(" << d.name << "_tuple[0].shape) > 1:" << std::endl; - std::cout << prefix << " if " << d.name << "_tuple[0]" - << ".shape[0] == 1 or " << d.name << "_tuple[0].shape[1] == 1:" - << std::endl; - std::cout << prefix << " " << d.name << "_tuple[0].shape = (" + std::cout << prefix << " if " << d.name << "_tuple[0].shape[0] == 1 or " + << d.name << "_tuple[0].shape[1] == 1:" << std::endl; + std::cout << prefix << " " << d.name << "_tuple[0].shape = (" << d.name << "_tuple[0].size,)" << std::endl; - std::cout << prefix << " " << d.name << "_mat = arma_numpy.numpy_to_" + std::cout << prefix << d.name << "_mat = arma_numpy.numpy_to_" << GetArmaType() << "_" << GetNumpyTypeChar() << "(" << d.name << "_tuple[0], " << d.name << "_tuple[1])" << std::endl; - std::cout << prefix << " SetParam[" << GetCythonType(d) + std::cout << prefix << "SetParam[" << GetCythonType(d) << "]( '" << d.name << "', dereference(" << d.name << "_mat))"<< std::endl; - std::cout << prefix << " CLI.SetPassed( '" << d.name - << "')" << std::endl; - std::cout << prefix << " del " << d.name << "_mat" << std::endl; + std::cout << prefix << "CLI.SetPassed( '" << d.name << "')" + << std::endl; + std::cout << prefix << "del " << d.name << "_mat" << std::endl; } else { - std::cout << prefix << " " << d.name << "_tuple = to_matrix(" - << d.name << ", dtype=" << GetNumpyType() + std::cout << prefix << d.name << "_tuple = to_matrix(" << d.name + << ", dtype=" << GetNumpyType() << ", copy=CLI.HasParam('copy_all_inputs'))" << std::endl; - std::cout << prefix << " if len(" << d.name << "_tuple[0].shape) > 2:" + std::cout << prefix << "if len(" << d.name << "_tuple[0].shape) > 2:" << std::endl; - std::cout << prefix << " " << d.name << "_tuple[0].shape = (" << d.name + std::cout << prefix << " " << d.name << "_tuple[0].shape = (" << d.name << "_tuple[0].shape[0], 1)" << std::endl; - std::cout << prefix << " " << d.name << "_mat = arma_numpy.numpy_to_" + std::cout << prefix << d.name << "_mat = arma_numpy.numpy_to_" << GetArmaType() << "_" << GetNumpyTypeChar() << "(" << d.name << "_tuple[0], " << d.name << "_tuple[1])" << std::endl; - std::cout << prefix << " SetParam[" << GetCythonType(d) - << "]( '" << d.name << "', dereference(" - << d.name << "_mat))"<< std::endl; - std::cout << prefix << " CLI.SetPassed( '" << d.name - << "')" << std::endl; - std::cout << prefix << " del " << d.name << "_mat" << std::endl; + std::cout << prefix << "SetParam[" << GetCythonType(d) + << "]( '" << d.name << "', dereference(" << d.name + << "_mat))" << std::endl; + std::cout << prefix << "CLI.SetPassed( '" << d.name << "')" + << std::endl; + std::cout << prefix << "del " << d.name << "_mat" << std::endl; } } std::cout << std::endl; diff -Nru mlpack-3.1.0/src/mlpack/bindings/python/tests/test_python_binding_main.cpp mlpack-3.1.1/src/mlpack/bindings/python/tests/test_python_binding_main.cpp --- mlpack-3.1.0/src/mlpack/bindings/python/tests/test_python_binding_main.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/python/tests/test_python_binding_main.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -27,6 +27,10 @@ PARAM_STRING_IN_REQ("string_in", "Input string, must be 'hello'.", "s"); PARAM_INT_IN_REQ("int_in", "Input int, must be 12.", "i"); PARAM_DOUBLE_IN_REQ("double_in", "Input double, must be 4.0.", "d"); +PARAM_MATRIX_IN_REQ("mat_req_in", "Input matrix, must be 1x1 and contain '1'.", + ""); +PARAM_COL_IN_REQ("col_req_in", "Input column, must have '1' as the only " + "element.", ""); PARAM_FLAG("flag1", "Input flag, must be specified.", "f"); PARAM_FLAG("flag2", "Input flag, must not be specified.", "F"); PARAM_MATRIX_IN("matrix_in", "Input matrix.", "m"); @@ -86,6 +90,20 @@ CLI::GetParam("double_out") = 5.0; } + const arma::mat& matReqIn = CLI::GetParam("mat_req_in"); + const arma::vec& colReqIn = CLI::GetParam("col_req_in"); + if (matReqIn.n_rows != 1 || matReqIn.n_cols != 1 || matReqIn(0, 0) != 1.0) + { + throw std::invalid_argument("mat_req_in must be 1x1 and contain only " + "'1.0'!"); + } + + if (colReqIn.n_elem != 1 || colReqIn(0) != 1.0) + { + throw std::invalid_argument("col_req_in must have '1.0' as its only " + "single element!"); + } + // Input matrices should be at least 5 rows; the 5th row will be dropped and // the 3rd row will be multiplied by two. if (CLI::HasParam("matrix_in")) diff -Nru mlpack-3.1.0/src/mlpack/bindings/python/tests/test_python_binding.py mlpack-3.1.1/src/mlpack/bindings/python/tests/test_python_binding.py --- mlpack-3.1.0/src/mlpack/bindings/python/tests/test_python_binding.py 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/bindings/python/tests/test_python_binding.py 2019-05-27 02:18:49.000000000 +0000 @@ -29,6 +29,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True) self.assertEqual(output['string_out'], 'hello2') @@ -41,7 +43,9 @@ """ output = test_python_binding(string_in='hello', int_in=12, - double_in=4.0) + double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0]) self.assertNotEqual(output['string_out'], 'hello2') self.assertNotEqual(output['int_out'], 13) @@ -54,6 +58,8 @@ output = test_python_binding(string_in='goodbye', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True) self.assertNotEqual(output['string_out'], 'hello2') @@ -65,6 +71,8 @@ output = test_python_binding(string_in='hello', int_in=15, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True) self.assertNotEqual(output['int_out'], 13) @@ -76,6 +84,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=2.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True) self.assertNotEqual(output['double_out'], 5.0) @@ -87,6 +97,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, flag2=True) @@ -105,6 +117,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_in=z) self.assertEqual(output['matrix_out'].shape[0], 100) @@ -127,6 +141,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_in=x, copy_all_inputs=True) @@ -152,6 +168,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_in=z) self.assertEqual(output['matrix_out'].shape[0], 100) @@ -174,6 +192,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_in=x, copy_all_inputs=True) @@ -197,6 +217,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], smatrix_in=z) self.assertEqual(output['smatrix_out'].shape[0], 100) @@ -215,6 +237,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], smatrix_in=x, copy_all_inputs=True) @@ -234,6 +258,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], s_umatrix_in=z) self.assertEqual(output['s_umatrix_out'].shape[0], 100) @@ -252,6 +278,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], s_umatrix_in=x, copy_all_inputs=True) @@ -271,6 +299,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], col_in=z) self.assertEqual(output['col_out'].shape[0], 100) @@ -288,6 +318,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], col_in=x, copy_all_inputs=True) @@ -308,6 +340,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_in=z) self.assertEqual(output['matrix_out'].shape[0], 100) @@ -330,6 +364,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_in=x, copy_all_inputs=True) @@ -354,6 +390,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_in=x) self.assertEqual(output['matrix_out'].shape[0], 3) @@ -383,6 +421,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_in=x, copy_all_inputs=True) @@ -414,6 +454,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], umatrix_in=z) self.assertEqual(output['umatrix_out'].shape[0], 100) @@ -435,6 +477,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], umatrix_in=x, copy_all_inputs=True) @@ -459,6 +503,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], umatrix_in=x) self.assertEqual(output['umatrix_out'].shape[0], 3) @@ -488,6 +534,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], umatrix_in=x, copy_all_inputs=True) @@ -519,6 +567,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], col_in=z) self.assertEqual(output['col_out'].shape[0], 100) @@ -536,6 +586,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], col_in=x, copy_all_inputs=True) @@ -555,6 +607,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], ucol_in=z) self.assertEqual(output['ucol_out'].shape[0], 100) @@ -571,6 +625,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], ucol_in=x, copy_all_inputs=True) @@ -589,6 +645,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], row_in=z) self.assertEqual(output['row_out'].shape[0], 100) @@ -606,6 +664,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], row_in=x, copy_all_inputs=True) @@ -625,6 +685,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], urow_in=z) self.assertEqual(output['urow_out'].shape[0], 100) @@ -642,6 +704,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], urow_in=x, copy_all_inputs=True) @@ -661,6 +725,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_and_info_in=z) self.assertEqual(output['matrix_and_info_out'].shape[0], 100) @@ -679,6 +745,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_and_info_in=x, copy_all_inputs=True) @@ -701,6 +769,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_and_info_in=z) self.assertEqual(output['matrix_and_info_out'].shape[0], 10) @@ -726,6 +796,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_and_info_in=x, copy_all_inputs=True) @@ -751,6 +823,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], vector_in=x) self.assertEqual(output['vector_out'], [1, 2, 3, 4]) @@ -765,6 +839,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], str_vector_in=x) self.assertEqual(output['str_vector_out'], @@ -778,11 +854,15 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], build_model=True) output2 = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], model_in=output['model_out']) self.assertEqual(output2['model_bw_out'], 20.0) @@ -797,6 +877,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], smatrix_in=z) self.assertEqual(output['smatrix_out'].shape[0], 100) @@ -815,6 +897,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], smatrix_in=x, copy_all_inputs=True) @@ -834,6 +918,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], s_umatrix_in=z) self.assertEqual(output['s_umatrix_out'].shape[0], 100) @@ -851,6 +937,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], s_umatrix_in=x, copy_all_inputs=True) @@ -870,6 +958,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], col_in=z) self.assertEqual(output['col_out'].shape[0], 100) @@ -887,6 +977,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], col_in=x, copy_all_inputs=True) @@ -906,6 +998,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], ucol_in=z) self.assertEqual(output['ucol_out'].shape[0], 100) @@ -922,6 +1016,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], ucol_in=x, copy_all_inputs=True) @@ -940,6 +1036,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], row_in=x) self.assertEqual(output['row_out'].shape[0], 100) @@ -957,6 +1055,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], row_in=x, copy_all_inputs=True) @@ -976,6 +1076,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], urow_in=z) self.assertEqual(output['urow_out'].shape[0], 100) @@ -993,6 +1095,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], urow_in=x, copy_all_inputs=True) @@ -1012,6 +1116,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_and_info_in=z[0]) self.assertEqual(output['matrix_and_info_out'].shape[0], 10) @@ -1028,6 +1134,8 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], matrix_and_info_in=x[0], copy_all_inputs=True) @@ -1045,24 +1153,32 @@ lambda : test_python_binding(string_in=10, int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True)) self.assertRaises(TypeError, lambda : test_python_binding(string_in='hello', int_in=10.0, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True)) self.assertRaises(TypeError, lambda : test_python_binding(string_in='hello', int_in=12, double_in='bad', + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True)) self.assertRaises(TypeError, lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, flag2=10)) @@ -1070,6 +1186,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, matrix_in= 10.0)) @@ -1077,6 +1195,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, matrix_in= 1)) @@ -1084,6 +1204,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, matrix_and_info_in = 10.0)) @@ -1091,6 +1213,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, copy_all_inputs = 10.0)) @@ -1098,6 +1222,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, col_in = 10)) @@ -1105,6 +1231,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, row_in = 10.0)) @@ -1112,6 +1240,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, str_vector_in = 'bad')) @@ -1119,6 +1249,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, urow_in = 10.0)) @@ -1126,6 +1258,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, ucol_in = 10.0)) @@ -1133,6 +1267,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, umatrix_in = 10.0)) @@ -1140,6 +1276,8 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, verbose = 10)) @@ -1147,9 +1285,27 @@ lambda : test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], flag1=True, vector_in = 10.0)) + self.assertRaises(TypeError, + lambda : test_python_binding(string_in='hello', + int_in=12, + double_in=4.0, + mat_req_in=False, + col_req_in=[1.0], + flag1=True)) + + self.assertRaises(TypeError, + lambda : test_python_binding(string_in='hello', + int_in=12, + double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=False, + flag1=True)) + def testModelForceCopy(self): """ First create a GaussianKernel object, then send it back and make sure we get @@ -1158,17 +1314,23 @@ output = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], build_model=True) output2 = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], model_in=output['model_out'], copy_all_inputs=True) output3 = test_python_binding(string_in='hello', int_in=12, double_in=4.0, + mat_req_in=[[1.0]], + col_req_in=[1.0], model_in=output['model_out']) self.assertEqual(output2['model_bw_out'], 20.0) diff -Nru mlpack-3.1.0/src/mlpack/core/data/CMakeLists.txt mlpack-3.1.1/src/mlpack/core/data/CMakeLists.txt --- mlpack-3.1.0/src/mlpack/core/data/CMakeLists.txt 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/data/CMakeLists.txt 2019-05-27 02:18:49.000000000 +0000 @@ -24,6 +24,9 @@ split_data.hpp imputer.hpp binarize.hpp + confusion_matrix.hpp + one_hot_encoding.hpp + one_hot_encoding_impl.hpp ) # add directory name to sources diff -Nru mlpack-3.1.0/src/mlpack/core/data/confusion_matrix.hpp mlpack-3.1.1/src/mlpack/core/data/confusion_matrix.hpp --- mlpack-3.1.0/src/mlpack/core/data/confusion_matrix.hpp 1970-01-01 00:00:00.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/data/confusion_matrix.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -0,0 +1,65 @@ +/** + * @file confusion_matrix_impl.hpp + * @author Jeffin Sam + * + * Compute confusion matrix to evaluate the accuracy of a classification. + * The function works only for discrete data/categorical data. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_CORE_DATA_CONFUSION_MATRIX_HPP +#define MLPACK_CORE_DATA_CONFUSION_MATRIX_HPP + +#include + +namespace mlpack { +namespace data { + +/** + * A confusion matrix is a summary of prediction results on a classification + * problem. The number of correct and incorrect predictions are summarized + * by count and broken down by each class. + * For example, for 2 classes, the function call will be + * + * @code + * ConfusionMatrix(predictors, responses, output, 2) + * @endcode + * + * In this case, the output matrix will be of size 2 * 2: + * + * @code + * 0 1 + * 0 TP FN + * 1 FP TN + * @endcode + * + * The confusion matrix for two labels will look like what is shown above. In + * this confusion matrix, TP represents the number of true positives, FP + * represents the number of false positives, FN represents the number of false + * negatives, and TN represents the number of true negatives. + * + * When generalizing to 2 or more classes, the row index of the confusion matrix + * represents the predicted classes and column index represents the actual + * class. + * + * @param predictors Vector of data points. + * @param responses The measured data for each point. + * @param output Matrix which is represented as confusion matrix. + * @param numClasses Number of classes. + */ +template +void ConfusionMatrix(const arma::Row predictors, + const arma::Row responses, + arma::Mat& output, + const size_t numClasses); + +} // namespace data +} // namespace mlpack + +// Include implementation. +#include "confusion_matrix_impl.hpp" + +#endif diff -Nru mlpack-3.1.0/src/mlpack/core/data/confusion_matrix_impl.hpp mlpack-3.1.1/src/mlpack/core/data/confusion_matrix_impl.hpp --- mlpack-3.1.0/src/mlpack/core/data/confusion_matrix_impl.hpp 1970-01-01 00:00:00.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/data/confusion_matrix_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -0,0 +1,66 @@ +/** + * @file confusion_matrix_impl.hpp + * @author Jeffin Sam + * + * Compute confusion matrix to evaluate the accuracy of a classification. + * The function works only for discrete data/categorical data. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_CORE_DATA_CONFUSION_MATRIX_IMPL_HPP +#define MLPACK_CORE_DATA_CONFUSION_MATRIX_IMPL_HPP + +// In case it hasn't been included yet. +#include "confusion_matrix.hpp" + +namespace mlpack { +namespace data { + +/** + * A confusion matrix is a summary of prediction results on a classification + * problem. The number of correct and incorrect predictions are summarized + * by count and broken down by each class. + * For example, for 2 classes, the function call will be + * + * @code + * ConfusionMatrix(predictors, responses, output, 2) + * @endcode + * + * In this case, the output matrix will be of size 2 * 2: + * + * @code + * 0 1 + * 0 TP FN + * 1 FP TN + * @endcode + * + * The confusion matrix for two labels will look like what is shown above. In + * this confusion matrix, TP represents the number of true positives, FP + * represents the number of false positives, FN represents the number of false + * negatives, and TN represents the number of true negatives. + * + * When generalizing to 2 or more classes, the row index of the confusion matrix + * represents the predicted classes and column index represents the actual + * class. + */ +template +void ConfusionMatrix(const arma::Row predictors, + const arma::Row responses, + arma::Mat& output, + const size_t numClasses) +{ + // Loop over the actual labels and predicted labels and add the count. + output = arma::zeros >(numClasses, numClasses); + for (size_t i = 0; i < predictors.n_elem; ++i) + { + output.at(predictors[i], responses[i])++; + } +} + +} // namespace data +} // namespace mlpack + +#endif diff -Nru mlpack-3.1.0/src/mlpack/core/data/normalize_labels_impl.hpp mlpack-3.1.1/src/mlpack/core/data/normalize_labels_impl.hpp --- mlpack-3.1.0/src/mlpack/core/data/normalize_labels_impl.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/data/normalize_labels_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -39,32 +39,31 @@ // we'll resize it back down to its actual size. mapping.set_size(labelsIn.n_elem); labels.set_size(labelsIn.n_elem); + // Map for mapping labelIn to their label. + std::unordered_map labelMap; size_t curLabel = 0; for (size_t i = 0; i < labelsIn.n_elem; ++i) { - bool found = false; - for (size_t j = 0; j < curLabel; ++j) + // If labelsIn[i] is already in the map, use the existing label. + if (labelMap.count(labelsIn[i]) > 0) { - // Is the label already in the list of labels we have seen? - if (labelsIn[i] == mapping[j]) - { - labels[i] = j; - found = true; - break; - } + labels[i] = labelMap[labelsIn[i]]; } - - // Do we need to add this new label? - if (!found) + else { - mapping[curLabel] = labelsIn[i]; + // If labelsIn[i] not there then add it to map. + labelMap[labelsIn[i]] = curLabel; labels[i] = curLabel; ++curLabel; } } - // Resize mapping back down to necessary size. mapping.resize(curLabel); + // Mapping array created with encoded labels. + for (auto it = labelMap.begin(); it != labelMap.end(); ++it) + { + mapping[it->second] = it->first; + } } /** diff -Nru mlpack-3.1.0/src/mlpack/core/data/one_hot_encoding.hpp mlpack-3.1.1/src/mlpack/core/data/one_hot_encoding.hpp --- mlpack-3.1.0/src/mlpack/core/data/one_hot_encoding.hpp 1970-01-01 00:00:00.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/data/one_hot_encoding.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -0,0 +1,41 @@ +/** + * @file one_hot_encoding.hpp + * @author Jeffin Sam + * + * One hot encoding functions. The purpose of this function is to convert + * categorical variables to binary vectors. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_CORE_DATA_ONE_HOT_ENCODING_HPP +#define MLPACK_CORE_DATA_ONE_HOT_ENCODING_HPP + +#include + +namespace mlpack { +namespace data { + +/** + * Given a set of labels of a particular datatype, convert them to binary + * vector. The categorical values be mapped to integer values. + * Then, each integer value is represented as a binary vector that is + * all zero values except the index of the integer, which is marked + * with a 1. + * + * @param labelsIn Input labels of arbitrary datatype. + * @param output Binary matrix. + */ +template +void OneHotEncoding(const RowType& labelsIn, + arma::Mat& output); + +} // namespace data +} // namespace mlpack + +// Include implementation. +#include "one_hot_encoding_impl.hpp" + +#endif diff -Nru mlpack-3.1.0/src/mlpack/core/data/one_hot_encoding_impl.hpp mlpack-3.1.1/src/mlpack/core/data/one_hot_encoding_impl.hpp --- mlpack-3.1.0/src/mlpack/core/data/one_hot_encoding_impl.hpp 1970-01-01 00:00:00.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/data/one_hot_encoding_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -0,0 +1,70 @@ +/** + * @file one_hot_encoding_impl.hpp + * @author Jeffin Sam + * + * Implementation of one hot encoding functions; categorical variables as binary + * vectors. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_CORE_DATA_ONE_HOT_ENCODING_IMPL_HPP +#define MLPACK_CORE_DATA_ONE_HOT_ENCODING_IMPL_HPP + +// In case it hasn't been included yet. +#include "one_hot_encoding.hpp" + + +namespace mlpack { +namespace data { + +/** + * Given a set of labels of a particular datatype, convert them to binary + * vector. The categorical values be mapped to integer values. + * Then, each integer value is represented as a binary vector that is + * all zero values except the index of the integer, which is marked + * with a 1. + * + * @param labelsIn Input labels of arbitrary datatype. + * @param output Binary matrix. + */ +template +void OneHotEncoding(const RowType& labelsIn, + arma::Mat& output) +{ + arma::Row labels; + labels.set_size(labelsIn.n_elem); + + // Loop over the input labels, and develop the mapping. + std::unordered_map labelMap; // Map for labelsIn to labels. + size_t curLabel = 0; + for (size_t i = 0; i < labelsIn.n_elem; ++i) + { + // If labelsIn[i] is already in the map, use the existing label. + if (labelMap.count(labelsIn[i]) != 0) + { + labels[i] = labelMap[labelsIn[i]] - 1; + } + else + { + // If labelsIn[i] not there then add it to the map. + labelMap[labelsIn[i]] = curLabel + 1; + labels[i] = curLabel; + ++curLabel; + } + } + // Resize output matrix to necessary size, and fill it with zeros. + output.zeros(labelsIn.n_elem, curLabel); + // Fill ones in at the required places. + for (size_t i = 0; i < labelsIn.n_elem; ++i) + { + output(i, labels[i]) = 1; + } + labelMap.clear(); +} +} // namespace data +} // namespace mlpack + +#endif diff -Nru mlpack-3.1.0/src/mlpack/core/dists/gamma_distribution.hpp mlpack-3.1.1/src/mlpack/core/dists/gamma_distribution.hpp --- mlpack-3.1.0/src/mlpack/core/dists/gamma_distribution.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/dists/gamma_distribution.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -6,7 +6,7 @@ * Implementation of a Gamma distribution of multidimensional data that fits * gamma parameters (alpha, beta) to data. * The fitting is done independently for each dataset dimension (row), based on - * the assumption each dimension is fully indepeendent. + * the assumption each dimension is fully independent. * * Based on "Estimating a Gamma Distribution" by Thomas P. Minka: * research.microsoft.com/~minka/papers/minka-gamma.pdf @@ -154,7 +154,7 @@ * @param x The 1-dimensional observation. * @param dim The dimension for which to calculate the probability. */ - double Probability(double x, size_t dim) const; + double Probability(double x, const size_t dim) const; /** * This function returns the logarithm of the probability of a group of @@ -179,12 +179,12 @@ /** * This function returns the logarithm of the probability of a single - * observation. + * observation. * * @param x The 1-dimensional observation. * @param dim The dimension for which to calculate the probability. */ - double LogProbability(double x, size_t dim) const; + double LogProbability(double x, const size_t dim) const; /** * This function returns an observation of this distribution. diff -Nru mlpack-3.1.0/src/mlpack/core/metrics/ip_metric.hpp mlpack-3.1.1/src/mlpack/core/metrics/ip_metric.hpp --- mlpack-3.1.0/src/mlpack/core/metrics/ip_metric.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/metrics/ip_metric.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -41,6 +41,12 @@ //! Destroy the IPMetric object. ~IPMetric(); + //! Copy the parameters of the given metric. + IPMetric(const IPMetric& other); + + //! Assign this metric to be a copy of the given metric. + IPMetric& operator=(const IPMetric& other); + /** * Evaluate the metric. * diff -Nru mlpack-3.1.0/src/mlpack/core/metrics/ip_metric_impl.hpp mlpack-3.1.1/src/mlpack/core/metrics/ip_metric_impl.hpp --- mlpack-3.1.0/src/mlpack/core/metrics/ip_metric_impl.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/metrics/ip_metric_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -13,7 +13,7 @@ #define MLPACK_METHODS_FASTMKS_IP_METRIC_IMPL_HPP // In case it hasn't been included yet. -#include "ip_metric_impl.hpp" +#include "ip_metric.hpp" #include #include @@ -48,6 +48,28 @@ } template +IPMetric::IPMetric(const IPMetric& other) : + kernel(other.kernel), + kernelOwner(other.kernelOwner) +{ + // Nothing to do. +} + +template +IPMetric& IPMetric::operator=(const IPMetric& other) +{ + if (this == &other) + return *this; + + if (kernelOwner) + delete kernel; + + kernel = new KernelType(*other.kernel); + kernelOwner = true; + return *this; +} + +template template inline typename Vec1Type::elem_type IPMetric::Evaluate( const Vec1Type& a, diff -Nru mlpack-3.1.0/src/mlpack/core/util/param.hpp mlpack-3.1.1/src/mlpack/core/util/param.hpp --- mlpack-3.1.0/src/mlpack/core/util/param.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/util/param.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -536,6 +536,32 @@ PARAM_COL(ID, DESC, ALIAS, false, true, true) /** + * Define a required vector input parameter (type arma::vec). From the command + * line, the user can specify the file that holds the vector, using the name of + * the vector parameter with "_file" appended (and the same alias). So for + * instance, if the name of the vector parameter was "vec", the user could + * specify that the "vec" vector was held in vec.csv by giving the parameter: + * + * @code + * --vec_file vector.csv + * @endcode + * + * @param ID Name of the parameter. + * @param DESC Description of the parameter (1-2 sentences). + * @param ALIAS An alias for the parameter (one letter). + * + * @bug + * The __COUNTER__ variable is used in most cases to guarantee a unique global + * identifier for options declared using the PARAM_*() macros. However, not all + * compilers have this support--most notably, gcc < 4.3. In that case, the + * __LINE__ macro is used as an attempt to get a unique global identifier, but + * collisions are still possible, and they produce bizarre error messages. See + * https://github.com/mlpack/mlpack/issues/100 for more information. + */ +#define PARAM_COL_IN_REQ(ID, DESC, ALIAS) \ + PARAM_COL(ID, DESC, ALIAS, true, true, true) + +/** * Define a row vector input parameter (type arma::rowvec). From the command * line, the user can specify the file that holds the vector, using the name of * the vector parameter with "_file" appended (and the same alias). So for diff -Nru mlpack-3.1.0/src/mlpack/core/util/version.hpp mlpack-3.1.1/src/mlpack/core/util/version.hpp --- mlpack-3.1.0/src/mlpack/core/util/version.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core/util/version.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -18,7 +18,7 @@ // with higher number than the most recent release. #define MLPACK_VERSION_MAJOR 3 #define MLPACK_VERSION_MINOR 1 -#define MLPACK_VERSION_PATCH 0 +#define MLPACK_VERSION_PATCH 1 // The name of the version (for use by --version). namespace mlpack { diff -Nru mlpack-3.1.0/src/mlpack/core.hpp mlpack-3.1.1/src/mlpack/core.hpp --- mlpack-3.1.0/src/mlpack/core.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/core.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -88,6 +88,8 @@ #include #include #include +#include +#include // mlpack::backtrace only for linux #ifdef HAS_BFD_DL diff -Nru mlpack-3.1.0/src/mlpack/methods/adaboost/adaboost_main.cpp mlpack-3.1.1/src/mlpack/methods/adaboost/adaboost_main.cpp --- mlpack-3.1.0/src/mlpack/methods/adaboost/adaboost_main.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/adaboost/adaboost_main.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -77,10 +77,17 @@ "predictions for a given test dataset. A test dataset may be specified " "with the " + PRINT_PARAM_STRING("test") + " parameter. The predicted " "classes for each point in the test dataset are output to the " + - PRINT_PARAM_STRING("output") + " output parameter. The AdaBoost model " - "itself is output to the " + PRINT_PARAM_STRING("output_model") + + PRINT_PARAM_STRING("predictions") + " output parameter. The AdaBoost " + "model itself is output to the " + PRINT_PARAM_STRING("output_model") + " output parameter." "\n\n" + "Note: the following parameter is deprecated and " + "will be removed in mlpack 4.0.0: " + PRINT_PARAM_STRING("output") + + "." + "\n" + "Use " + PRINT_PARAM_STRING("predictions") + " instead of " + + PRINT_PARAM_STRING("output") + '.' + + "\n\n" "For example, to run AdaBoost on an input dataset " + PRINT_DATASET("data") + " with perceptrons as the weak learner type, " "storing the trained model in " + PRINT_MODEL("model") + ", one could " @@ -95,7 +102,7 @@ PRINT_DATASET("predictions") + " with the following command: " "\n\n" + PRINT_CALL("adaboost", "input_model", "model", "test", "test_data", - "output", "predictions"), + "predictions", "predictions"), // See also... SEE_ALSO("AdaBoost on Wikipedia", "https://en.wikipedia.org/wiki/AdaBoost"), SEE_ALSO("Improved boosting algorithms using confidence-rated predictions " @@ -111,7 +118,9 @@ // Classification options. PARAM_MATRIX_IN("test", "Test dataset.", "T"); +// PARAM_UROW_OUT("output") is deprecated and will be removed in mlpack 4.0.0. PARAM_UROW_OUT("output", "Predicted labels for the test set.", "o"); +PARAM_UROW_OUT("predictions", "Predicted labels for the test set.", "P"); // Training options. PARAM_INT_IN("iterations", "The maximum number of boosting iterations to be run" @@ -155,10 +164,11 @@ if (CLI::HasParam("input_model")) RequireAtLeastOnePassed({ "test" }, false, "no task will be performed"); - RequireAtLeastOnePassed({ "output_model", "output" }, false, + RequireAtLeastOnePassed({ "output_model", "output", "predictions" }, false, "no results will be saved"); - ReportIgnoredParam({{ "test", false }}, "output"); + // "output" will be removed in mlpack 4.0.0. + ReportIgnoredParam({{ "test", false }}, "predictions"); AdaBoostModel* m; if (CLI::HasParam("training")) @@ -230,7 +240,11 @@ Row results; data::RevertLabels(predictedLabels, m->Mappings(), results); - CLI::GetParam>("output") = std::move(results); + // Save the predicted labels. + if (CLI::HasParam("output")) + CLI::GetParam>("output") = results; + if (CLI::HasParam("predictions")) + CLI::GetParam>("predictions") = std::move(results); } CLI::GetParam("output_model") = m; diff -Nru mlpack-3.1.0/src/mlpack/methods/ann/layer/CMakeLists.txt mlpack-3.1.1/src/mlpack/methods/ann/layer/CMakeLists.txt --- mlpack-3.1.0/src/mlpack/methods/ann/layer/CMakeLists.txt 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/ann/layer/CMakeLists.txt 2019-05-27 02:18:49.000000000 +0000 @@ -86,6 +86,8 @@ transposed_convolution_impl.hpp vr_class_reward.hpp vr_class_reward_impl.hpp + c_relu.hpp + c_relu_impl.hpp ) # Add directory name to sources. diff -Nru mlpack-3.1.0/src/mlpack/methods/ann/layer/concat.hpp mlpack-3.1.1/src/mlpack/methods/ann/layer/concat.hpp --- mlpack-3.1.0/src/mlpack/methods/ann/layer/concat.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/ann/layer/concat.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -1,6 +1,7 @@ /** * @file concat.hpp * @author Marcus Edel + * @author Mehul Kumar Nirala * * Definition of the Concat class, which acts as a concatenation contain. * @@ -54,6 +55,19 @@ const bool run = true); /** + * Create the Concat object using the specified parameters. + * + * @param inputSize A vector denoting input size of each layer added. + * @param axis Concat axis. + * @param model Expose all network modules. + * @param run Call the Forward/Backward method before the output is merged. + */ + Concat(arma::Row& inputSize, + const size_t axis, + const bool model = false, + const bool run = true); + + /** * Destroy the layers held by the model. */ ~Concat(); @@ -194,6 +208,15 @@ void serialize(Archive& /* ar */, const unsigned int /* version */); private: + //! Parameter which indicates the input size of modules. + arma::Row inputSize; + + //! Parameter which indicates the axis of concatenation. + size_t axis; + + //! Parameter which indicates whether to use the axis of concatenation. + bool useAxis; + //! Parameter which indicates if the modules should be exposed. bool model; @@ -201,6 +224,9 @@ //! before merging the output. bool run; + //! Parameter to store channels. + size_t channels; + //! Locally-stored network modules. std::vector > network; diff -Nru mlpack-3.1.0/src/mlpack/methods/ann/layer/concat_impl.hpp mlpack-3.1.1/src/mlpack/methods/ann/layer/concat_impl.hpp --- mlpack-3.1.0/src/mlpack/methods/ann/layer/concat_impl.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/ann/layer/concat_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -1,6 +1,7 @@ /** * @file concat_impl.hpp * @author Marcus Edel + * @author Mehul Kumar Nirala * * Implementation of the Concat class, which acts as a concatenation contain. * @@ -25,13 +26,68 @@ template Concat::Concat( - const bool model, const bool run) : model(model), run(run) + const bool model, const bool run) : + axis(0), + useAxis(false), + model(model), + run(run), + channels(1) { parameters.set_size(0, 0); } template +Concat::Concat( + arma::Row& inputSize, + const size_t axis, + const bool model, + const bool run) : + inputSize(inputSize), + axis(axis), + useAxis(true), + model(model), + run(run) +{ + parameters.set_size(0, 0); + + // Parameters to help calculate the number of channels. + size_t oldColSize = 1, newColSize = 1; + // Axis is specified and useAxis is true. + if (useAxis) + { + // Axis is specified without input dimension. + // Throw an error. + if (inputSize.n_elem > 0) + { + // Calculate rowSize, newColSize based on the axis + // of concatenation. Finally concat along cols and + // reshape to original format i.e. (input, batch_size). + size_t i = std::min(axis + 1, (size_t) inputSize.n_elem); + for (; i < inputSize.n_elem; ++i) + { + newColSize *= inputSize[i]; + } + } + else + { + throw std::logic_error("Input dimensions not specified."); + } + } + else + { + channels = 1; + } + if (newColSize <= 0) + { + throw std::logic_error("Col size is zero."); + } + channels = newColSize / oldColSize; + inputSize.clear(); +} + +template Concat::~Concat() { // Clear memory. @@ -54,13 +110,24 @@ network[i]); } } - // Vertically concatentate output from each layer. + output = boost::apply_visitor(outputParameterVisitor, network.front()); + + // Reshape output to incorporate the channels. + output.reshape(output.n_rows / channels, output.n_cols * channels); + for (size_t i = 1; i < network.size(); ++i) { - output = arma::join_cols(output, - boost::apply_visitor(outputParameterVisitor, network[i])); + arma::Mat out = boost::apply_visitor(outputParameterVisitor, + network[i]); + + out.reshape(out.n_rows / channels, out.n_cols * channels); + + // Vertically concatentate output from each layer. + output = arma::join_cols(output, out); } + // Reshape output to its original shape. + output.reshape(output.n_rows * channels, output.n_cols / channels); } template delta; + gy.reshape(gy.n_rows / channels, gy.n_cols * channels); for (size_t i = 0; i < network.size(); ++i) { // Use rows from the error corresponding to the output from each layer. size_t rows = boost::apply_visitor( outputParameterVisitor, network[i]).n_rows; - delta = gy.rows(rowCount, rowCount + rows - 1); + + // Extract from gy the parameters for the i-th network. + delta = gy.rows(rowCount / channels, (rowCount + rows) / channels - 1); + delta.reshape(delta.n_rows * channels, delta.n_cols / channels); + boost::apply_visitor(BackwardVisitor(std::move( boost::apply_visitor(outputParameterVisitor, network[i])), std::move(delta), std::move( @@ -91,6 +163,7 @@ { g += boost::apply_visitor(deltaVisitor, network[i]); } + gy.reshape(gy.n_rows * channels, gy.n_cols / channels); } else { @@ -106,16 +179,28 @@ const size_t index) { size_t rowCount = 0, rows = 0; + for (size_t i = 0; i < index; ++i) { - rowCount += boost::apply_visitor(outputParameterVisitor, network[i]).n_rows; + rowCount += boost::apply_visitor( + outputParameterVisitor, network[i]).n_rows; } rows = boost::apply_visitor(outputParameterVisitor, network[index]).n_rows; - arma::mat delta = gy.rows(rowCount, rowCount + rows - 1); + + // Reshape gy to extract the i-th layer gy. + gy.reshape(gy.n_rows / channels, gy.n_cols * channels); + + arma::Mat delta = gy.rows(rowCount / channels, (rowCount + rows) / + channels - 1); + delta.reshape(delta.n_rows * channels, delta.n_cols / channels); + boost::apply_visitor(BackwardVisitor(std::move(boost::apply_visitor( outputParameterVisitor, network[index])), std::move(delta), std::move( boost::apply_visitor(deltaVisitor, network[index]))), network[index]); + // Reshape gy to its original shape. + gy.reshape(gy.n_rows * channels, gy.n_cols / channels); + g = boost::apply_visitor(deltaVisitor, network[index]); } @@ -130,14 +215,25 @@ if (run) { size_t rowCount = 0; + // Reshape error to extract the i-th layer error. + error.reshape(error.n_rows / channels, error.n_cols * channels); for (size_t i = 0; i < network.size(); ++i) { size_t rows = boost::apply_visitor( outputParameterVisitor, network[i]).n_rows; + + // Extract from error the parameters for the i-th network. + arma::Mat err = error.rows(rowCount / channels, (rowCount + rows) / + channels - 1); + err.reshape(err.n_rows * channels, err.n_cols / channels); + boost::apply_visitor(GradientVisitor(std::move(input), - std::move(error.rows(rowCount, rowCount + rows - 1))), network[i]); + std::move(err)), network[i]); rowCount += rows; } + + // Reshape error to its original shape. + error.reshape(error.n_rows * channels, error.n_cols / channels); } } @@ -153,12 +249,21 @@ size_t rowCount = 0; for (size_t i = 0; i < index; ++i) { - rowCount += boost::apply_visitor(outputParameterVisitor, network[i]).n_rows; + rowCount += boost::apply_visitor(outputParameterVisitor, + network[i]).n_rows; } size_t rows = boost::apply_visitor( outputParameterVisitor, network[index]).n_rows; + + error.reshape(error.n_rows / channels, error.n_cols * channels); + arma::Mat err = error.rows(rowCount / channels, (rowCount + rows) / + channels - 1); + err.reshape(err.n_rows * channels, err.n_cols / channels); + boost::apply_visitor(GradientVisitor(std::move(input), - std::move(error.rows(rowCount, rowCount + rows - 1))), network[index]); + std::move(err)), network[index]); + + error.reshape(error.n_rows * channels, error.n_cols / channels); } template + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { +/** + * + * A concatenated ReLU has two outputs, one ReLU and one negative ReLU, + * concatenated together. In other words, for positive x it produces [x, 0], + * and for negative x it produces [0, x]. Because it has two outputs, + * CReLU doubles the output dimension. + * + * Note: + * The CReLU doubles the output size. + * + * For more information, see the following. + * + * @code + * @inproceedings{ICML2016, + * title = {Understanding and Improving Convolutional Neural Networks + * via Concatenated Rectified Linear Units}, + * author = {LWenling Shang, Kihyuk Sohn, Diogo Almeida, Honglak Lee}, + * year = {2016} + * } + * @endcode + * + * @tparam InputDataType Type of the input data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat, + * arma::sp_mat or arma::cube). + */ +template < + typename InputDataType = arma::mat, + typename OutputDataType = arma::mat +> +class CReLU +{ + public: + /** + * Create the CReLU object. + */ + CReLU(); + + /** + * Ordinary feed forward pass of a neural network, evaluating the function + * f(x) by propagating the activity forward through f. + * Works only for 2D Tenosrs. + * + * @param input Input data used for evaluating the specified function. + * @param output Resulting output activation. + */ + template + void Forward(const InputType&& input, OutputType&& output); + + /** + * Ordinary feed backward pass of a neural network, calculating the function + * f(x) by propagating x backwards through f. Using the results from the feed + * forward pass. + * + * @param input The propagated input activation. + * @param gy The backpropagated error. + * @param g The calculated gradient. + */ + template + void Backward(const DataType&& input, DataType&& gy, DataType&& g); + + //! Get the output parameter. + OutputDataType const& OutputParameter() const { return outputParameter; } + //! Modify the output parameter. + OutputDataType& OutputParameter() { return outputParameter; } + + //! Get the delta. + OutputDataType const& Delta() const { return delta; } + //! Modify the delta. + OutputDataType& Delta() { return delta; } + + /** + * Serialize the layer. + */ + template + void serialize(Archive& /* ar */, const unsigned int /* version */); + + private: + //! Locally-stored delta object. + OutputDataType delta; + + //! Locally-stored output parameter object. + OutputDataType outputParameter; +}; // class CReLU + +} // namespace ann +} // namespace mlpack + +// Include implementation. +#include "c_relu_impl.hpp" + +#endif diff -Nru mlpack-3.1.0/src/mlpack/methods/ann/layer/c_relu_impl.hpp mlpack-3.1.1/src/mlpack/methods/ann/layer/c_relu_impl.hpp --- mlpack-3.1.0/src/mlpack/methods/ann/layer/c_relu_impl.hpp 1970-01-01 00:00:00.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/ann/layer/c_relu_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -0,0 +1,59 @@ +/** + * @file c_relu_impl.hpp + * @author Jeffin Sam + * + * Implementation of CReLU layer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_ANN_LAYER_C_RELU_IMPL_HPP +#define MLPACK_METHODS_ANN_LAYER_C_RELU_IMPL_HPP + +// In case it hasn't yet been included. +#include "c_relu.hpp" + +namespace mlpack { +namespace ann /** Artificial Neural Network. */ { + +template +CReLU::CReLU() +{ + // Nothing to do here. +} + +template +template +void CReLU::Forward( + const InputType&& input, OutputType&& output) +{ + output = arma::join_cols(arma::max(input, 0.0 * input), arma::max( + (-1 * input), 0.0 * input)); +} + +template +template +void CReLU::Backward( + const DataType&& input, DataType&& gy, DataType&& g) +{ + DataType temp; + temp = gy % (input >= 0.0); + g = temp.rows(0, (input.n_rows / 2 - 1)) - temp.rows(input.n_rows / 2, + (input.n_rows - 1)); +} + +template +template +void CReLU::serialize( + Archive& /* ar */, + const unsigned int /* version */) +{ + // Nothing to do here. +} + +} // namespace ann +} // namespace mlpack + +#endif diff -Nru mlpack-3.1.0/src/mlpack/methods/ann/layer/layer_types.hpp mlpack-3.1.1/src/mlpack/methods/ann/layer/layer_types.hpp --- mlpack-3.1.0/src/mlpack/methods/ann/layer/layer_types.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/ann/layer/layer_types.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -175,6 +176,7 @@ Join*, LayerNorm*, LeakyReLU*, + CReLU*, Linear*, LinearNoBias*, LogSoftMax*, diff -Nru mlpack-3.1.0/src/mlpack/methods/decision_tree/all_categorical_split_impl.hpp mlpack-3.1.1/src/mlpack/methods/decision_tree/all_categorical_split_impl.hpp --- mlpack-3.1.0/src/mlpack/methods/decision_tree/all_categorical_split_impl.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/decision_tree/all_categorical_split_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -53,7 +53,7 @@ // If each child will have the minimum number of points in it, we can split. // Otherwise we can't. if (arma::min(counts) < minimumLeafSize) - return bestGain; + return DBL_MAX; // Calculate the gain of the split. First we have to calculate the labels // that would be assigned to each child. @@ -106,7 +106,7 @@ } // Otherwise there was no improvement. - return bestGain; + return DBL_MAX; } template diff -Nru mlpack-3.1.0/src/mlpack/methods/decision_tree/all_dimension_select.hpp mlpack-3.1.1/src/mlpack/methods/decision_tree/all_dimension_select.hpp --- mlpack-3.1.0/src/mlpack/methods/decision_tree/all_dimension_select.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/decision_tree/all_dimension_select.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -25,7 +25,7 @@ /** * Construct the AllDimensionSelect object for the given number of dimensions. */ - AllDimensionSelect(const size_t dimensions) : i(0), dimensions(dimensions) { } + AllDimensionSelect() : i(0), dimensions(0) { } /** * Get the first dimension to select from. @@ -46,11 +46,16 @@ */ size_t Next() { return ++i; } + //! Get the number of dimensions. + size_t Dimensions() const { return dimensions; } + //! Modify the number of dimensions. + size_t& Dimensions() { return dimensions; } + private: //! The current dimension we are looking at. size_t i; //! The number of dimensions to select from. - const size_t dimensions; + size_t dimensions; }; } // namespace tree diff -Nru mlpack-3.1.0/src/mlpack/methods/decision_tree/best_binary_numeric_split_impl.hpp mlpack-3.1.1/src/mlpack/methods/decision_tree/best_binary_numeric_split_impl.hpp --- mlpack-3.1.0/src/mlpack/methods/decision_tree/best_binary_numeric_split_impl.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/decision_tree/best_binary_numeric_split_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -30,7 +30,9 @@ { // First sanity check: if we don't have enough points, we can't split. if (data.n_elem < (minimumLeafSize * 2)) - return bestGain; + return DBL_MAX; + if (bestGain == 0.0) + return DBL_MAX; // It can't be outperformed. // Next, sort the data. arma::uvec sortedIndices = arma::sort_index(data); @@ -39,6 +41,11 @@ for (size_t i = 0; i < sortedLabels.n_elem; ++i) sortedLabels[i] = labels[sortedIndices[i]]; + // Sanity check: if the first element is the same as the last, we can't split + // in this dimension. + if (data[sortedIndices[0]] == data[sortedIndices[sortedIndices.n_elem - 1]]) + return DBL_MAX; + // Only initialize if we are using weights. if (UseWeights) { @@ -50,10 +57,68 @@ // Loop through all possible split points, choosing the best one. Also, force // a minimum leaf size of 1 (empty children don't make sense). - double bestFoundGain = bestGain; + double bestFoundGain = std::min(bestGain + minimumGainSplit, 0.0); + bool improved = false; const size_t minimum = std::max(minimumLeafSize, (size_t) 1); - for (size_t index = minimum; index < data.n_elem - (minimum - 1); ++index) + + // We need to count the number of points for each class. + arma::Mat classCounts; + arma::mat classWeightSums; + double totalWeight = 0.0; + double totalLeftWeight = 0.0; + double totalRightWeight = 0.0; + if (UseWeights) + { + classWeightSums.zeros(numClasses, 2); + totalWeight = arma::accu(sortedWeights); + bestFoundGain *= totalWeight; + + // Initialize the counts. + // These points have to be on the left. + for (size_t i = 0; i < minimum - 1; ++i) + { + classWeightSums(sortedLabels[i], 0) += sortedWeights[i]; + totalLeftWeight += sortedWeights[i]; + } + + // These points have to be on the right. + for (size_t i = minimum - 1; i < data.n_elem; ++i) + { + classWeightSums(sortedLabels[i], 1) += sortedWeights[i]; + totalRightWeight += sortedWeights[i]; + } + } + else + { + classCounts.zeros(numClasses, 2); + bestFoundGain *= data.n_elem; + + // Initialize the counts. + // These points have to be on the left. + for (size_t i = 0; i < minimum - 1; ++i) + ++classCounts(sortedLabels[i], 0); + + // These points have to be on the right. + for (size_t i = minimum - 1; i < data.n_elem; ++i) + ++classCounts(sortedLabels[i], 1); + } + + for (size_t index = minimum; index < data.n_elem - minimum; ++index) { + // Update class weight sums or counts. + if (UseWeights) + { + classWeightSums(sortedLabels[index - 1], 1) -= sortedWeights[index - 1]; + classWeightSums(sortedLabels[index - 1], 0) += sortedWeights[index - 1]; + totalLeftWeight += sortedWeights[index - 1]; + totalRightWeight -= sortedWeights[index - 1]; + } + else + { + --classCounts(sortedLabels[index - 1], 1); + ++classCounts(sortedLabels[index - 1], 0); + } + // Make sure that the value has changed. if (data[sortedIndices[index]] == data[sortedIndices[index - 1]]) continue; @@ -61,36 +126,26 @@ // Calculate the gain for the left and right child. Only use weights if // needed. const double leftGain = UseWeights ? - FitnessFunction::template Evaluate(sortedLabels.subvec(0, - index - 1), numClasses, sortedWeights.subvec(0, index - 1)) : - FitnessFunction::template Evaluate(sortedLabels.subvec(0, - index - 1), numClasses, sortedWeights /* ignored */); + FitnessFunction::template EvaluatePtr(classWeightSums.colptr(0), + numClasses, totalLeftWeight) : + FitnessFunction::template EvaluatePtr(classCounts.colptr(0), + numClasses, index); const double rightGain = UseWeights ? - FitnessFunction::template Evaluate(sortedLabels.subvec(index, - sortedLabels.n_elem - 1), numClasses, sortedWeights.subvec(index, - sortedLabels.n_elem - 1)) : - FitnessFunction::template Evaluate(sortedLabels.subvec(index, - sortedLabels.n_elem - 1), numClasses, sortedWeights /* ignored */); + FitnessFunction::template EvaluatePtr(classWeightSums.colptr(1), + numClasses, totalRightWeight) : + FitnessFunction::template EvaluatePtr(classCounts.colptr(1), + numClasses, size_t(sortedLabels.n_elem - index)); double gain; if (UseWeights) { - const double leftWeights = arma::accu(sortedWeights.subvec(0, index - 1)); - const double rightWeights = arma::accu(sortedWeights.subvec(index, - sortedWeights.n_elem - 1)); - const double fullWeight = leftWeights + rightWeights; - - gain = (leftWeights / fullWeight) * leftGain + - (rightWeights / fullWeight) * rightGain; + gain = totalLeftWeight * leftGain + totalRightWeight * rightGain; } else { - // Calculate the fraction of points in the left and right children. - const double leftRatio = double(index) / double(sortedLabels.n_elem); - const double rightRatio = 1.0 - leftRatio; - // Calculate the gain at this split point. - gain = leftRatio * leftGain + rightRatio * rightGain; + gain = double(index) * leftGain + + double(sortedLabels.n_elem - index) * rightGain; } // Corner case: is this the best possible split? @@ -103,18 +158,30 @@ // and index. classProbabilities[0] = (data[sortedIndices[index - 1]] + data[sortedIndices[index]]) / 2.0; + return gain; } - else if (gain > bestFoundGain + minimumGainSplit) + else if (gain > bestFoundGain) { // We still have a better split. bestFoundGain = gain; classProbabilities.set_size(1); classProbabilities[0] = (data[sortedIndices[index - 1]] + data[sortedIndices[index]]) / 2.0; + improved = true; } } + // If we didn't improve, return the original gain exactly as we got it + // (without introducing floating point errors). + if (!improved) + return DBL_MAX; + + if (UseWeights) + bestFoundGain /= totalWeight; + else + bestFoundGain /= sortedLabels.n_elem; + return bestFoundGain; } diff -Nru mlpack-3.1.0/src/mlpack/methods/decision_tree/decision_tree.hpp mlpack-3.1.1/src/mlpack/methods/decision_tree/decision_tree.hpp --- mlpack-3.1.0/src/mlpack/methods/decision_tree/decision_tree.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/decision_tree/decision_tree.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -64,6 +64,7 @@ * @param numClasses Number of classes in the dataset. * @param minimumLeafSize Minimum number of points in each leaf node. * @param minimumGainSplit Minimum gain for the node to split. + * @param dimensionSelector Instantiated dimension selection policy. */ template DecisionTree(MatType data, @@ -71,7 +72,9 @@ LabelsType labels, const size_t numClasses, const size_t minimumLeafSize = 10, - const double minimumGainSplit = 1e-7); + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Construct the decision tree on the given data and labels, assuming that the @@ -86,13 +89,16 @@ * @param numClasses Number of classes in the dataset. * @param minimumLeafSize Minimum number of points in each leaf node. * @param minimumGainSplit Minimum gain for the node to split. + * @param dimensionSelector Instantiated dimension selection policy. */ template DecisionTree(MatType data, LabelsType labels, const size_t numClasses, const size_t minimumLeafSize = 10, - const double minimumGainSplit = 1e-7); + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Construct the decision tree on the given data and labels with weights, @@ -110,6 +116,7 @@ * @param weights The weight list of given label. * @param minimumLeafSize Minimum number of points in each leaf node. * @param minimumGainSplit Minimum gain for the node to split. + * @param dimensionSelector Instantiated dimension selection policy. */ template DecisionTree(MatType data, @@ -119,6 +126,8 @@ WeightsType weights, const size_t minimumLeafSize = 10, const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType(), const std::enable_if_t::type>::value>* = 0); @@ -138,6 +147,7 @@ * @param weights The Weight list of given labels. * @param minimumLeafSize Minimum number of points in each leaf node. * @param minimumGainSplit Minimum gain for the node to split. + * @param dimensionSelector Instantiated dimension selection policy. */ template DecisionTree(MatType data, @@ -146,6 +156,8 @@ WeightsType weights, const size_t minimumLeafSize = 10, const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType(), const std::enable_if_t::type>::value>* = 0); @@ -210,6 +222,7 @@ * @param weights Weights of all the labels * @param minimumLeafSize Minimum number of points in each leaf node. * @param minimumGainSplit Minimum gain for the node to split. + * @param dimensionSelector Instantiated dimension selection policy. * @return The final entropy of decision tree. */ template @@ -218,7 +231,9 @@ LabelsType labels, const size_t numClasses, const size_t minimumLeafSize = 10, - const double minimumGainSplit = 1e-7); + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Train the decision tree on the given data, assuming that all dimensions are @@ -234,6 +249,7 @@ * @param weights Weights of all the labels * @param minimumLeafSize Minimum number of points in each leaf node. * @param minimumGainSplit Minimum gain for the node to split. + * @param dimensionSelector Instantiated dimension selection policy. * @return The final entropy of decision tree. */ template @@ -241,7 +257,9 @@ LabelsType labels, const size_t numClasses, const size_t minimumLeafSize = 10, - const double minimumGainSplit = 1e-7); + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Train the decision tree on the given weighted data. This will overwrite @@ -260,6 +278,7 @@ * @param weights Weights of all the labels * @param minimumLeafSize Minimum number of points in each leaf node. * @param minimumGainSplit Minimum gain for the node to split. + * @param dimensionSelector Instantiated dimension selection policy. * @return The final entropy of decision tree. */ template @@ -270,6 +289,8 @@ WeightsType weights, const size_t minimumLeafSize = 10, const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType(), const std::enable_if_t::type>::value>* = 0); @@ -288,6 +309,7 @@ * @param weights Weights of all the labels * @param minimumLeafSize Minimum number of points in each leaf node. * @param minimumGainSplit Minimum gain for the node to split. + * @param dimensionSelector Instantiated dimension selection policy. * @return The final entropy of decision tree. */ template @@ -297,6 +319,8 @@ WeightsType weights, const size_t minimumLeafSize = 10, const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType(), const std::enable_if_t::type>::value>* = 0); @@ -363,6 +387,10 @@ //! Modify the child of the given index (be careful!). DecisionTree& Child(const size_t i) { return *children[i]; } + //! Get the split dimension (only meaningful if this is a non-leaf in a + //! trained tree). + size_t SplitDimension() const { return splitDimension; } + /** * Given a point and that this node is not a leaf, calculate the index of the * child node this point would go towards. This method is primarily used by @@ -435,8 +463,9 @@ arma::Row& labels, const size_t numClasses, arma::rowvec& weights, - const size_t minimumLeafSize = 10, - const double minimumGainSplit = 1e-7); + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType& dimensionSelector); /** * Corresponding to the public Train() method, this method is designed for @@ -460,8 +489,9 @@ arma::Row& labels, const size_t numClasses, arma::rowvec& weights, - const size_t minimumLeafSize = 10, - const double minimumGainSplit = 1e-7); + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType& dimensionSelector); }; /** diff -Nru mlpack-3.1.0/src/mlpack/methods/decision_tree/decision_tree_impl.hpp mlpack-3.1.1/src/mlpack/methods/decision_tree/decision_tree_impl.hpp --- mlpack-3.1.0/src/mlpack/methods/decision_tree/decision_tree_impl.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/decision_tree/decision_tree_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -28,12 +28,14 @@ CategoricalSplitType, DimensionSelectionType, ElemType, - NoRecursion>::DecisionTree(MatType data, - const data::DatasetInfo& datasetInfo, - LabelsType labels, - const size_t numClasses, - const size_t minimumLeafSize, - const double minimumGainSplit) + NoRecursion>::DecisionTree( + MatType data, + const data::DatasetInfo& datasetInfo, + LabelsType labels, + const size_t numClasses, + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { using TrueMatType = typename std::decay::type; using TrueLabelsType = typename std::decay::type; @@ -42,10 +44,13 @@ TrueMatType tmpData(std::move(data)); TrueLabelsType tmpLabels(std::move(labels)); + // Set the correct dimensionality for the dimension selector. + dimensionSelector.Dimensions() = tmpData.n_rows; + // Pass off work to the Train() method. arma::rowvec weights; // Fake weights, not used. Train(tmpData, 0, tmpData.n_cols, datasetInfo, tmpLabels, numClasses, - weights, minimumLeafSize, minimumGainSplit); + weights, minimumLeafSize, minimumGainSplit, dimensionSelector); } //! Construct and train. @@ -61,11 +66,13 @@ CategoricalSplitType, DimensionSelectionType, ElemType, - NoRecursion>::DecisionTree(MatType data, - LabelsType labels, - const size_t numClasses, - const size_t minimumLeafSize, - const double minimumGainSplit) + NoRecursion>::DecisionTree( + MatType data, + LabelsType labels, + const size_t numClasses, + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { using TrueMatType = typename std::decay::type; using TrueLabelsType = typename std::decay::type; @@ -74,10 +81,13 @@ TrueMatType tmpData(std::move(data)); TrueLabelsType tmpLabels(std::move(labels)); + // Set the correct dimensionality for the dimension selector. + dimensionSelector.Dimensions() = tmpData.n_rows; + // Pass off work to the Train() method. arma::rowvec weights; // Fake weights, not used. Train(tmpData, 0, tmpData.n_cols, tmpLabels, numClasses, weights, - minimumLeafSize, minimumGainSplit); + minimumLeafSize, minimumGainSplit, dimensionSelector); } //! Construct and train with weights. @@ -93,17 +103,19 @@ CategoricalSplitType, DimensionSelectionType, ElemType, - NoRecursion>::DecisionTree(MatType data, - const data::DatasetInfo& datasetInfo, - LabelsType labels, - const size_t numClasses, - WeightsType weights, - const size_t minimumLeafSize, - const double minimumGainSplit, - const std::enable_if_t< - arma::is_arma_type< - typename std::remove_reference< - WeightsType>::type>::value>*) + NoRecursion>::DecisionTree( + MatType data, + const data::DatasetInfo& datasetInfo, + LabelsType labels, + const size_t numClasses, + WeightsType weights, + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector, + const std::enable_if_t< + arma::is_arma_type< + typename std::remove_reference< + WeightsType>::type>::value>*) { using TrueMatType = typename std::decay::type; using TrueLabelsType = typename std::decay::type; @@ -114,9 +126,12 @@ TrueLabelsType tmpLabels(std::move(labels)); TrueWeightsType tmpWeights(std::move(weights)); + // Set the correct dimensionality for the dimension selector. + dimensionSelector.Dimensions() = tmpData.n_rows; + // Pass off work to the weighted Train() method. Train(tmpData, 0, tmpData.n_cols, datasetInfo, tmpLabels, numClasses, - tmpWeights, minimumLeafSize, minimumGainSplit); + tmpWeights, minimumLeafSize, minimumGainSplit, dimensionSelector); } //! Construct and train with weights. @@ -132,16 +147,18 @@ CategoricalSplitType, DimensionSelectionType, ElemType, - NoRecursion>::DecisionTree(MatType data, - LabelsType labels, - const size_t numClasses, - WeightsType weights, - const size_t minimumLeafSize, - const double minimumGainSplit, - const std::enable_if_t< - arma::is_arma_type< - typename std::remove_reference< - WeightsType>::type>::value>*) + NoRecursion>::DecisionTree( + MatType data, + LabelsType labels, + const size_t numClasses, + WeightsType weights, + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector, + const std::enable_if_t< + arma::is_arma_type< + typename std::remove_reference< + WeightsType>::type>::value>*) { using TrueMatType = typename std::decay::type; using TrueLabelsType = typename std::decay::type; @@ -152,9 +169,12 @@ TrueLabelsType tmpLabels(std::move(labels)); TrueWeightsType tmpWeights(std::move(weights)); + // Set the correct dimensionality for the dimension selector. + dimensionSelector.Dimensions() = tmpData.n_rows; + // Pass off work to the weighted Train() method. Train(tmpData, 0, tmpData.n_cols, tmpLabels, numClasses, tmpWeights, - minimumLeafSize, minimumGainSplit); + minimumLeafSize, minimumGainSplit, dimensionSelector); } //! Construct, don't train. @@ -345,12 +365,14 @@ CategoricalSplitType, DimensionSelectionType, ElemType, - NoRecursion>::Train(MatType data, - const data::DatasetInfo& datasetInfo, - LabelsType labels, - const size_t numClasses, - const size_t minimumLeafSize, - const double minimumGainSplit) + NoRecursion>::Train( + MatType data, + const data::DatasetInfo& datasetInfo, + LabelsType labels, + const size_t numClasses, + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { // Sanity check on data. if (data.n_cols != labels.n_elem) @@ -369,10 +391,14 @@ TrueMatType tmpData(std::move(data)); TrueLabelsType tmpLabels(std::move(labels)); + // Set the correct dimensionality for the dimension selector. + dimensionSelector.Dimensions() = tmpData.n_rows; + // Pass off work to the Train() method. arma::rowvec weights; // Fake weights, not used. return Train(tmpData, 0, tmpData.n_cols, datasetInfo, tmpLabels, - numClasses, weights, minimumLeafSize, minimumGainSplit); + numClasses, weights, minimumLeafSize, minimumGainSplit, + dimensionSelector); } //! Train on the given data, assuming all dimensions are numeric. @@ -388,11 +414,13 @@ CategoricalSplitType, DimensionSelectionType, ElemType, - NoRecursion>::Train(MatType data, - LabelsType labels, - const size_t numClasses, - const size_t minimumLeafSize, - const double minimumGainSplit) + NoRecursion>::Train( + MatType data, + LabelsType labels, + const size_t numClasses, + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { // Sanity check on data. if (data.n_cols != labels.n_elem) @@ -411,10 +439,13 @@ TrueMatType tmpData(std::move(data)); TrueLabelsType tmpLabels(std::move(labels)); + // Set the correct dimensionality for the dimension selector. + dimensionSelector.Dimensions() = tmpData.n_rows; + // Pass off work to the Train() method. arma::rowvec weights; // Fake weights, not used. return Train(tmpData, 0, tmpData.n_cols, tmpLabels, numClasses, - weights, minimumLeafSize, minimumGainSplit); + weights, minimumLeafSize, minimumGainSplit, dimensionSelector); } //! Train on the given weighted data. @@ -430,17 +461,19 @@ CategoricalSplitType, DimensionSelectionType, ElemType, - NoRecursion>::Train(MatType data, - const data::DatasetInfo& datasetInfo, - LabelsType labels, - const size_t numClasses, - WeightsType weights, - const size_t minimumLeafSize, - const double minimumGainSplit, - const std::enable_if_t< - arma::is_arma_type< - typename std::remove_reference< - WeightsType>::type>::value>*) + NoRecursion>::Train( + MatType data, + const data::DatasetInfo& datasetInfo, + LabelsType labels, + const size_t numClasses, + WeightsType weights, + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector, + const std::enable_if_t< + arma::is_arma_type< + typename std::remove_reference< + WeightsType>::type>::value>*) { // Sanity check on data. if (data.n_cols != labels.n_elem) @@ -461,9 +494,13 @@ TrueLabelsType tmpLabels(std::move(labels)); TrueWeightsType tmpWeights(std::move(weights)); + // Set the correct dimensionality for the dimension selector. + dimensionSelector.Dimensions() = tmpData.n_rows; + // Pass off work to the Train() method. return Train(tmpData, 0, tmpData.n_cols, datasetInfo, tmpLabels, - numClasses, tmpWeights, minimumLeafSize, minimumGainSplit); + numClasses, tmpWeights, minimumLeafSize, minimumGainSplit, + dimensionSelector); } //! Train on the given weighted data. @@ -479,16 +516,18 @@ CategoricalSplitType, DimensionSelectionType, ElemType, - NoRecursion>::Train(MatType data, - LabelsType labels, - const size_t numClasses, - WeightsType weights, - const size_t minimumLeafSize, - const double minimumGainSplit, - const std::enable_if_t< - arma::is_arma_type< - typename std::remove_reference< - WeightsType>::type>::value>*) + NoRecursion>::Train( + MatType data, + LabelsType labels, + const size_t numClasses, + WeightsType weights, + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector, + const std::enable_if_t< + arma::is_arma_type< + typename std::remove_reference< + WeightsType>::type>::value>*) { // Sanity check on data. if (data.n_cols != labels.n_elem) @@ -509,9 +548,12 @@ TrueLabelsType tmpLabels(std::move(labels)); TrueWeightsType tmpWeights(std::move(weights)); + // Set the correct dimensionality for the dimension selector. + dimensionSelector.Dimensions() = tmpData.n_rows; + // Pass off work to the Train() method. return Train(tmpData, 0, tmpData.n_cols, tmpLabels, numClasses, - tmpWeights, minimumLeafSize, minimumGainSplit); + tmpWeights, minimumLeafSize, minimumGainSplit, dimensionSelector); } //! Train on the given data. @@ -527,15 +569,17 @@ CategoricalSplitType, DimensionSelectionType, ElemType, - NoRecursion>::Train(MatType& data, - const size_t begin, - const size_t count, - const data::DatasetInfo& datasetInfo, - arma::Row& labels, - const size_t numClasses, - arma::rowvec& weights, - const size_t minimumLeafSize, - const double minimumGainSplit) + NoRecursion>::Train( + MatType& data, + const size_t begin, + const size_t count, + const data::DatasetInfo& datasetInfo, + arma::Row& labels, + const size_t numClasses, + arma::rowvec& weights, + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType& dimensionSelector) { // Clear children if needed. for (size_t i = 0; i < children.size(); ++i) @@ -552,9 +596,9 @@ numClasses, UseWeights ? weights.subvec(begin, begin + count - 1) : weights); size_t bestDim = datasetInfo.Dimensionality(); // This means "no split". - DimensionSelectionType dimensions(datasetInfo.Dimensionality()); - for (size_t i = dimensions.Begin(); i != dimensions.End(); - i = dimensions.Next()) + const size_t end = dimensionSelector.End(); + for (size_t i = dimensionSelector.Begin(); i != end; + i = dimensionSelector.Next()) { double dimGain = -DBL_MAX; if (datasetInfo.Type(i) == data::Datatype::categorical) @@ -583,12 +627,14 @@ *this); } + // If the splitter reported that it did not split, move to the next + // dimension. + if (dimGain == DBL_MAX) + continue; + // Was there an improvement? If so mark that it's the new best dimension. - if (dimGain > bestGain) - { - bestDim = i; - bestGain = dimGain; - } + bestDim = i; + bestGain = dimGain; // If the gain is the best possible, no need to keep looking. if (bestGain >= 0.0) @@ -660,14 +706,15 @@ { child->Train(data, currentChildBegin, currentCol - currentChildBegin, datasetInfo, labels, numClasses, - weights, currentCol - currentChildBegin, minimumGainSplit); + weights, currentCol - currentChildBegin, minimumGainSplit, + dimensionSelector); } else { // During recursion entropy of child node may change. double childGain = child->Train(data, currentChildBegin, currentCol - currentChildBegin, datasetInfo, labels, numClasses, - weights, minimumLeafSize, minimumGainSplit); + weights, minimumLeafSize, minimumGainSplit, dimensionSelector); bestGain += double(childCounts[i]) / double(count) * (-childGain); } children.push_back(child); @@ -701,14 +748,16 @@ CategoricalSplitType, DimensionSelectionType, ElemType, - NoRecursion>::Train(MatType& data, - const size_t begin, - const size_t count, - arma::Row& labels, - const size_t numClasses, - arma::rowvec& weights, - const size_t minimumLeafSize, - const double minimumGainSplit) + NoRecursion>::Train( + MatType& data, + const size_t begin, + const size_t count, + arma::Row& labels, + const size_t numClasses, + arma::rowvec& weights, + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType& dimensionSelector) { // Clear children if needed. for (size_t i = 0; i < children.size(); ++i) @@ -728,7 +777,8 @@ numClasses, UseWeights ? weights.subvec(begin, begin + count - 1) : weights); size_t bestDim = data.n_rows; // This means "no split". - for (size_t i = 0; i < data.n_rows; ++i) + for (size_t i = dimensionSelector.Begin(); i != dimensionSelector.End(); + i = dimensionSelector.Next()) { const double dimGain = NumericSplitType::template SplitIfBetter(bestGain, @@ -743,11 +793,13 @@ classProbabilities, *this); - if (dimGain > bestGain) - { - bestDim = i; - bestGain = dimGain; - } + // If the splitter did not report that it improved, then move to the next + // dimension. + if (dimGain == DBL_MAX) + continue; + + bestDim = i; + bestGain = dimGain; // If the gain is the best possible, no need to keep looking. if (bestGain >= 0.0) @@ -806,14 +858,15 @@ { child->Train(data, currentChildBegin, currentCol - currentChildBegin, labels, numClasses, weights, - currentCol - currentChildBegin, minimumGainSplit); + currentCol - currentChildBegin, minimumGainSplit, + dimensionSelector); } else { // During recursion entropy of child node may change. double childGain = child->Train(data, currentChildBegin, currentCol - currentChildBegin, labels, numClasses, weights, - minimumLeafSize, minimumGainSplit); + minimumLeafSize, minimumGainSplit, dimensionSelector); bestGain += double(childCounts[i]) / double(count) * (-childGain); } children.push_back(child); diff -Nru mlpack-3.1.0/src/mlpack/methods/decision_tree/decision_tree_main.cpp mlpack-3.1.1/src/mlpack/methods/decision_tree/decision_tree_main.cpp --- mlpack-3.1.0/src/mlpack/methods/decision_tree/decision_tree_main.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/decision_tree/decision_tree_main.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -67,7 +67,7 @@ "\n\n" + PRINT_CALL("decision_tree", "training", "data", "labels", "labels", "output_model", "tree", "minimum_leaf_size", 20, "minimum_gain_split", - 1e-3, "print_training_error", true) + + 1e-3, "print_training_accuracy", true) + "\n\n" "Then, to use that model to classify points in " + PRINT_DATASET("test_set") + " and print the test error given the " @@ -92,7 +92,7 @@ PARAM_UROW_IN("labels", "Training labels.", "l"); PARAM_MATRIX_AND_INFO_IN("test", "Testing dataset (may be categorical).", "T"); PARAM_MATRIX_IN("weights", "The weight of labels", "w"); -PARAM_UMATRIX_IN("test_labels", "Test point labels, if accuracy calculation " +PARAM_UROW_IN("test_labels", "Test point labels, if accuracy calculation " "is desired.", "L"); // Training parameters. @@ -100,7 +100,10 @@ 20); PARAM_DOUBLE_IN("minimum_gain_split", "Minimum gain for node splitting.", "g", 1e-7); -PARAM_FLAG("print_training_error", "Print the training error.", "e"); +// This is deprecated and should be removed in mlpack 4.0.0. +PARAM_FLAG("print_training_error", "Print the training error (deprecated; will " + "be removed in mlpack 4.0.0).", "e"); +PARAM_FLAG("print_training_accuracy", "Print the training accuracy.", "a"); // Output parameters. PARAM_MATRIX_OUT("probabilities", "Class probabilities for each test point.", @@ -146,7 +149,7 @@ ReportIgnoredParam({{ "test", false }}, "test_labels"); RequireAtLeastOnePassed({ "output_model", "probabilities", "predictions" }, false, "no output will be saved"); - ReportIgnoredParam({{ "training", false }}, "print_training_error"); + ReportIgnoredParam({{ "training", false }}, "print_training_accuracy"); ReportIgnoredParam({{ "test", false }}, "predictions"); ReportIgnoredParam({{ "test", false }}, "predictions"); @@ -158,6 +161,12 @@ { return (x > 0.0 && x < 1.0); }, true, "gain split must be a fraction in range [0,1]"); + if (CLI::HasParam("print_training_error")) + { + Log::Warn << "The option " << PRINT_PARAM_STRING("print_training_error") + << " is deprecated and will be removed in mlpack 4.0.0." << std::endl; + } + // Load the model or build the tree. DecisionTreeModel* model; arma::mat trainingSet; @@ -194,7 +203,8 @@ { arma::Row weights = std::move(CLI::GetParam>("weights")); - if (CLI::HasParam("print_training_error")) + if (CLI::HasParam("print_training_error") || + CLI::HasParam("print_training_accuracy")) { model->tree = DecisionTree<>(trainingSet, model->info, labels, numClasses, std::move(weights), minLeafSize, minimumGainSplit); @@ -234,7 +244,7 @@ ++correct; // Print number of correct points. - Log::Info << double(correct) / double(trainingSet.n_cols) * 100 << "%% " + Log::Info << double(correct) / double(trainingSet.n_cols) * 100 << "% " << "correct on training set (" << correct << " / " << trainingSet.n_cols << ")." << endl; } @@ -267,7 +277,7 @@ ++correct; // Print number of correct points. - Log::Info << double(correct) / double(testPoints.n_cols) * 100 << "%% " + Log::Info << double(correct) / double(testPoints.n_cols) * 100 << "% " << "correct on test set (" << correct << " / " << testPoints.n_cols << ")." << endl; } diff -Nru mlpack-3.1.0/src/mlpack/methods/decision_tree/gini_gain.hpp mlpack-3.1.1/src/mlpack/methods/decision_tree/gini_gain.hpp --- mlpack-3.1.0/src/mlpack/methods/decision_tree/gini_gain.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/decision_tree/gini_gain.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -28,6 +28,24 @@ { public: /** + * Evaluate the Gini impurity given a vector of class weight counts. + */ + template + static double EvaluatePtr(const CountType* counts, + const size_t countLength, + const CountType totalCount) + { + if (totalCount == 0) + return 0.0; + + CountType impurity = 0.0; + for (size_t i = 0; i < countLength; ++i) + impurity += counts[i] * (totalCount - counts[i]); + + return -((double) impurity / ((double) std::pow(totalCount, 2))); + } + + /** * Evaluate the Gini impurity on the given set of labels. RowType should be * an Armadillo vector that holds size_t objects. * diff -Nru mlpack-3.1.0/src/mlpack/methods/decision_tree/information_gain.hpp mlpack-3.1.1/src/mlpack/methods/decision_tree/information_gain.hpp --- mlpack-3.1.0/src/mlpack/methods/decision_tree/information_gain.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/decision_tree/information_gain.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -26,6 +26,26 @@ { public: /** + * Evaluate the Gini impurity given a vector of class weight counts. + */ + template + static double EvaluatePtr(const CountType* counts, + const size_t countLength, + const CountType totalCount) + { + double gain = 0.0; + + for (size_t i = 0; i < countLength; ++i) + { + const double f = ((double) counts[i] / (double) totalCount); + if (f > 0.0) + gain += f * std::log2(f); + } + + return gain; + } + + /** * Given a set of labels, calculate the information gain of those labels. * Note that it is possible that due to floating-point representation issues, * it is possible that the gain returned can be very slightly greater than 0! diff -Nru mlpack-3.1.0/src/mlpack/methods/decision_tree/multiple_random_dimension_select.hpp mlpack-3.1.1/src/mlpack/methods/decision_tree/multiple_random_dimension_select.hpp --- mlpack-3.1.0/src/mlpack/methods/decision_tree/multiple_random_dimension_select.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/decision_tree/multiple_random_dimension_select.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -17,21 +17,36 @@ /** * This dimension selection policy allows the selection from a few random - * dimensions. The number of random dimensions to use is specified by the - * NumDimensions template parameter. - * - * @tparam NumDimensions Number of random dimensions to select. + * dimensions. The number of random dimensions to use is specified in the + * constructor. */ -template class MultipleRandomDimensionSelect { public: /** * Instantiate the MultipleRandomDimensionSelect object. + * + * @param numDimensions Number of random dimensions to try for each split. */ - MultipleRandomDimensionSelect(const size_t dimensions) + MultipleRandomDimensionSelect(const size_t numDimensions = 0) : + numDimensions(numDimensions), + i(0), + dimensions(0) + { } + + /** + * Get the first random value. + */ + size_t Begin() { - for (size_t i = 0; i < NumDimensions; ++i) + // Reset if possible. + if (numDimensions == 0 || numDimensions > dimensions) + numDimensions = (size_t) std::sqrt(dimensions); + + values.set_size(numDimensions + 1); + + // Try setting new values. + for (size_t i = 0; i < numDimensions; ++i) { // Generate random different numbers. bool unique = false; @@ -55,14 +70,8 @@ values[i] = value; } - values[NumDimensions] = std::numeric_limits::max(); - } + values[numDimensions] = std::numeric_limits::max(); - /** - * Get the first random value. - */ - size_t Begin() - { i = 0; return values[0]; } @@ -80,11 +89,20 @@ return values[++i]; } + //! Get the number of dimensions. + size_t Dimensions() const { return dimensions; } + //! Set the number of dimensions. + size_t& Dimensions() { return dimensions; } + private: + //! The number of dimensions. + size_t numDimensions; //! The values we select from. - size_t values[NumDimensions + 1]; + arma::Col values; //! The current value we are looking at. size_t i; + //! Number of dimensions. + size_t dimensions; }; } // namespace tree diff -Nru mlpack-3.1.0/src/mlpack/methods/decision_tree/random_dimension_select.hpp mlpack-3.1.1/src/mlpack/methods/decision_tree/random_dimension_select.hpp --- mlpack-3.1.0/src/mlpack/methods/decision_tree/random_dimension_select.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/decision_tree/random_dimension_select.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -25,7 +25,7 @@ * Construct the RandomDimensionSelect object with the given number of * dimensions. */ - RandomDimensionSelect(const size_t dimensions) : dimensions(dimensions) { } + RandomDimensionSelect() : dimensions(0) { } /** * Get the first dimension to select from. @@ -43,9 +43,14 @@ */ size_t Next() const { return dimensions; } + //! Get the number of dimensions. + size_t Dimensions() const { return dimensions; } + //! Set the number of dimensions. + size_t& Dimensions() { return dimensions; } + private: //! The number of dimensions to select from. - const size_t dimensions; + size_t dimensions; }; } // namespace tree diff -Nru mlpack-3.1.0/src/mlpack/methods/fastmks/fastmks.hpp mlpack-3.1.1/src/mlpack/methods/fastmks/fastmks.hpp --- mlpack-3.1.0/src/mlpack/methods/fastmks/fastmks.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/fastmks/fastmks.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -105,6 +105,37 @@ const bool naive = false); /** + * Create the FastMKS object with the given reference set (this is the set + * that is searched), taking ownership of the reference set. Optionally, + * specify whether or not single-tree search or naive (brute-force) search + * should be used. + * + * @param referenceSet Set of reference data. + * @param singleMode Whether or not to run single-tree search. + * @param naive Whether or not to run brute-force (naive) search. + */ + FastMKS(MatType&& referenceSet, + const bool singleMode = false, + const bool naive = false); + + /** + * Create the FastMKS object using the reference set (this is the set that is + * searched) with an initialized kernel, taking ownership of the reference + * set. This is useful for when the kernel stores state. Optionally, specify + * whether or not single-tree search or naive (brute-force) search should be + * used. + * + * @param referenceSet Reference set of data for FastMKS. + * @param kernel Initialized kernel. + * @param single Whether or not to run single-tree search. + * @param naive Whether or not to run brute-force (naive) search. + */ + FastMKS(MatType&& referenceSet, + KernelType& kernel, + const bool singleMode = false, + const bool naive = false); + + /** * Create the FastMKS object with an already-initialized tree built on the * reference points. Be sure that the tree is built with the metric type * IPMetric. Optionally, whether or not to run single-tree search @@ -155,6 +186,25 @@ void Train(const MatType& referenceSet, KernelType& kernel); /** + * "Train" the FastMKS model on the given reference set (this will just build + * a tree, if the current search mode is not naive mode). This takes + * ownership of the reference set. + * + * @param referenceSet Set of reference points. + */ + void Train(MatType&& referenceSet); + + /** + * "Train" the FastMKS model on the given reference set and use the given + * kernel. This will just build a tree and replace the metric, if the current + * search mode is not naive mode. This takes ownership of the reference set. + * + * @param referenceSet Set of reference points. + * @param kernel Kernel to use for search. + */ + void Train(MatType&& referenceSet, KernelType& kernel); + + /** * Train the FastMKS model on the given reference tree. This takes ownership * of the tree, so you do not need to delete it! This will throw an exception * if the model is searching in naive mode (i.e. if Naive() == true). diff -Nru mlpack-3.1.0/src/mlpack/methods/fastmks/fastmks_impl.hpp mlpack-3.1.1/src/mlpack/methods/fastmks/fastmks_impl.hpp --- mlpack-3.1.0/src/mlpack/methods/fastmks/fastmks_impl.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/fastmks/fastmks_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -93,6 +93,62 @@ Timer::Stop("tree_building"); } +// No instantiated kernel. +template class TreeType> +FastMKS::FastMKS( + MatType&& referenceSet, + const bool singleMode, + const bool naive) : + referenceSet(naive ? new MatType(std::move(referenceSet)) : NULL), + referenceTree(NULL), + treeOwner(true), + setOwner(naive), + singleMode(singleMode), + naive(naive) +{ + Timer::Start("tree_building"); + if (!naive) + { + referenceTree = new Tree(std::move(referenceSet)); + referenceSet = &referenceTree->Dataset(); + } + Timer::Stop("tree_building"); +} + +// Instantiated kernel. +template class TreeType> +FastMKS::FastMKS(MatType&& referenceSet, + KernelType& kernel, + const bool singleMode, + const bool naive) : + referenceSet(naive ? new MatType(std::move(referenceSet)) : NULL), + referenceTree(NULL), + treeOwner(true), + setOwner(naive), + singleMode(singleMode), + naive(naive), + metric(kernel) +{ + Timer::Start("tree_building"); + + // If necessary, the reference tree should be built. There is no query tree. + if (!naive) + { + referenceTree = new Tree(referenceSet, metric); + referenceSet = &referenceTree->Dataset(); + } + + Timer::Stop("tree_building"); +} + // One dataset, pre-built tree. template& FastMKS::operator=(const FastMKS& other) { + if (this == &other) + return *this; + // Clear anything we currently have. if (treeOwner) delete referenceTree; @@ -256,6 +315,60 @@ template class TreeType> +void FastMKS::Train(MatType&& referenceSet) +{ + if (setOwner) + delete this->referenceSet; + + if (!naive) + { + if (treeOwner && referenceTree) + delete referenceTree; + referenceTree = new Tree(std::move(referenceSet), metric); + referenceSet = referenceTree->Dataset(); + treeOwner = true; + setOwner = false; + } + else + { + this->referenceSet = new MatType(std::move(referenceSet)); + this->setOwner = true; + } +} + +template class TreeType> +void FastMKS::Train(MatType&& referenceSet, + KernelType& kernel) +{ + if (setOwner) + delete this->referenceSet; + + this->metric = metric::IPMetric(kernel); + + if (!naive) + { + if (treeOwner && referenceTree) + delete referenceTree; + referenceTree = new Tree(std::move(referenceSet), metric); + treeOwner = true; + setOwner = false; + } + else + { + this->referenceSet = new MatType(std::move(referenceSet)); + this->setOwner = true; + } +} + +template class TreeType> void FastMKS::Train(Tree* tree) { if (naive) @@ -295,6 +408,15 @@ throw std::invalid_argument(ss.str()); } + if (querySet.n_rows != referenceSet->n_rows) + { + std::stringstream ss; + ss << "The number of dimensions in the query set (" << querySet.n_rows + << ") must be equal to the number of dimensions in the reference set (" + << referenceSet->n_rows << ")!"; + throw std::invalid_argument(ss.str()); + } + Timer::Start("computing_products"); // No remapping will be necessary because we are using the cover tree. @@ -387,6 +509,14 @@ << "points in the reference set (" << referenceSet->n_cols << ")"; throw std::invalid_argument(ss.str()); } + if (queryTree->Dataset().n_rows != referenceSet->n_rows) + { + std::stringstream ss; + ss << "The number of dimensions in the query set (" + << queryTree->Dataset().n_rows << ") must be equal to the number of " + << "dimensions in the reference set (" << referenceSet->n_rows << ")!"; + throw std::invalid_argument(ss.str()); + } // If either naive mode or single mode is specified, this must fail. if (naive || singleMode) diff -Nru mlpack-3.1.0/src/mlpack/methods/fastmks/fastmks_main.cpp mlpack-3.1.1/src/mlpack/methods/fastmks/fastmks_main.cpp --- mlpack-3.1.0/src/mlpack/methods/fastmks/fastmks_main.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/fastmks/fastmks_main.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -152,43 +152,43 @@ { LinearKernel lk; model->KernelType() = FastMKSModel::LINEAR_KERNEL; - model->BuildModel(referenceData, lk, single, naive, base); + model->BuildModel(std::move(referenceData), lk, single, naive, base); } else if (kernelType == "polynomial") { PolynomialKernel pk(degree, offset); model->KernelType() = FastMKSModel::POLYNOMIAL_KERNEL; - model->BuildModel(referenceData, pk, single, naive, base); + model->BuildModel(std::move(referenceData), pk, single, naive, base); } else if (kernelType == "cosine") { CosineDistance cd; model->KernelType() = FastMKSModel::COSINE_DISTANCE; - model->BuildModel(referenceData, cd, single, naive, base); + model->BuildModel(std::move(referenceData), cd, single, naive, base); } else if (kernelType == "gaussian") { GaussianKernel gk(bandwidth); model->KernelType() = FastMKSModel::GAUSSIAN_KERNEL; - model->BuildModel(referenceData, gk, single, naive, base); + model->BuildModel(std::move(referenceData), gk, single, naive, base); } else if (kernelType == "epanechnikov") { EpanechnikovKernel ek(bandwidth); model->KernelType() = FastMKSModel::EPANECHNIKOV_KERNEL; - model->BuildModel(referenceData, ek, single, naive, base); + model->BuildModel(std::move(referenceData), ek, single, naive, base); } else if (kernelType == "triangular") { TriangularKernel tk(bandwidth); model->KernelType() = FastMKSModel::TRIANGULAR_KERNEL; - model->BuildModel(referenceData, tk, single, naive, base); + model->BuildModel(std::move(referenceData), tk, single, naive, base); } else if (kernelType == "hyptan") { HyperbolicTangentKernel htk(scale, offset); model->KernelType() = FastMKSModel::HYPTAN_KERNEL; - model->BuildModel(referenceData, htk, single, naive, base); + model->BuildModel(std::move(referenceData), htk, single, naive, base); } } else diff -Nru mlpack-3.1.0/src/mlpack/methods/fastmks/fastmks_model.hpp mlpack-3.1.1/src/mlpack/methods/fastmks/fastmks_model.hpp --- mlpack-3.1.0/src/mlpack/methods/fastmks/fastmks_model.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/fastmks/fastmks_model.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -70,7 +70,7 @@ * to the correct entry in KernelTypes for the given KernelType class! */ template - void BuildModel(const arma::mat& referenceData, + void BuildModel(arma::mat&& referenceData, TKernelType& kernel, const bool singleMode, const bool naive, diff -Nru mlpack-3.1.0/src/mlpack/methods/fastmks/fastmks_model_impl.hpp mlpack-3.1.1/src/mlpack/methods/fastmks/fastmks_model_impl.hpp --- mlpack-3.1.0/src/mlpack/methods/fastmks/fastmks_model_impl.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/fastmks/fastmks_model_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -21,13 +21,18 @@ template void BuildFastMKSModel(FastMKS& f, KernelType& k, - const arma::mat& referenceData, + arma::mat&& referenceData, const double base) { // Do we need to build the tree? + if (base <= 1.0) + { + throw std::invalid_argument("base must be greater than 1"); + } + if (f.Naive()) { - f.Train(referenceData, k); + f.Train(std::move(referenceData), k); } else { @@ -35,7 +40,8 @@ Timer::Start("tree_building"); metric::IPMetric metric(k); typename FastMKS::Tree* tree = - new typename FastMKS::Tree(referenceData, metric, base); + new typename FastMKS::Tree(std::move(referenceData), + metric, base); Timer::Stop("tree_building"); f.Train(tree); @@ -47,7 +53,7 @@ typename FastMKSType> void BuildFastMKSModel(FastMKSType& /* f */, KernelType& /* k */, - const arma::mat& /* referenceData */, + arma::mat&& /* referenceData */, const double /* base */) { throw std::invalid_argument("FastMKSModel::BuildModel(): given kernel type is" @@ -55,7 +61,7 @@ } template -void FastMKSModel::BuildModel(const arma::mat& referenceData, +void FastMKSModel::BuildModel(arma::mat&& referenceData, TKernelType& kernel, const bool singleMode, const bool naive, @@ -90,37 +96,37 @@ { case LINEAR_KERNEL: linear = new FastMKS(singleMode, naive); - BuildFastMKSModel(*linear, kernel, referenceData, base); + BuildFastMKSModel(*linear, kernel, std::move(referenceData), base); break; case POLYNOMIAL_KERNEL: polynomial = new FastMKS(singleMode, naive); - BuildFastMKSModel(*polynomial, kernel, referenceData, base); + BuildFastMKSModel(*polynomial, kernel, std::move(referenceData), base); break; case COSINE_DISTANCE: cosine = new FastMKS(singleMode, naive); - BuildFastMKSModel(*cosine, kernel, referenceData, base); + BuildFastMKSModel(*cosine, kernel, std::move(referenceData), base); break; case GAUSSIAN_KERNEL: gaussian = new FastMKS(singleMode, naive); - BuildFastMKSModel(*gaussian, kernel, referenceData, base); + BuildFastMKSModel(*gaussian, kernel, std::move(referenceData), base); break; case EPANECHNIKOV_KERNEL: epan = new FastMKS(singleMode, naive); - BuildFastMKSModel(*epan, kernel, referenceData, base); + BuildFastMKSModel(*epan, kernel, std::move(referenceData), base); break; case TRIANGULAR_KERNEL: triangular = new FastMKS(singleMode, naive); - BuildFastMKSModel(*triangular, kernel, referenceData, base); + BuildFastMKSModel(*triangular, kernel, std::move(referenceData), base); break; case HYPTAN_KERNEL: hyptan = new FastMKS(singleMode, naive); - BuildFastMKSModel(*hyptan, kernel, referenceData, base); + BuildFastMKSModel(*hyptan, kernel, std::move(referenceData), base); break; } } diff -Nru mlpack-3.1.0/src/mlpack/methods/perceptron/perceptron_main.cpp mlpack-3.1.1/src/mlpack/methods/perceptron/perceptron_main.cpp --- mlpack-3.1.0/src/mlpack/methods/perceptron/perceptron_main.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/perceptron/perceptron_main.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -47,10 +47,18 @@ " parameter), or both those things at once. In addition, this program " "allows classification on a test dataset (via the " + PRINT_PARAM_STRING("test") + " parameter) and the classification results " - "on the test set may be saved with the " + PRINT_PARAM_STRING("output") + - "output parameter. The perceptron model may be saved with the " + + "on the test set may be saved with the " + + PRINT_PARAM_STRING("predictions") + + " output parameter. The perceptron model may be saved with the " + PRINT_PARAM_STRING("output_model") + " output parameter." "\n\n" + "Note: the following parameter is deprecated and " + "will be removed in mlpack 4.0.0: " + PRINT_PARAM_STRING("output") + + "." + "\n" + "Use " + PRINT_PARAM_STRING("predictions") + " instead of " + + PRINT_PARAM_STRING("output") + '.' + + "\n\n" "The training data given with the " + PRINT_PARAM_STRING("training") + " option may have class labels as its last dimension (so, if the training " "data is in CSV format, labels should be the last column). Alternately, " @@ -71,7 +79,7 @@ "saving the predicted classes to " + PRINT_DATASET("predictions") + "." "\n\n" + PRINT_CALL("perceptron", "input_model", "perceptron_model", "test", - "test_data", "output", "predictions") + + "test_data", "predictions", "predictions") + "\n\n" "Note that all of the options may be specified at once: predictions may be " "calculated right after training a model, and model training can occur even" @@ -126,8 +134,11 @@ // Testing/classification parameters. PARAM_MATRIX_IN("test", "A matrix containing the test set.", "T"); +// PARAM_UROW_OUT("output") is deprecated and will be removed in PARAM_UROW_OUT("output", "The matrix in which the predicted labels for the" " test set will be written.", "o"); +PARAM_UROW_OUT("predictions", "The matrix in which the predicted labels for the" + " test set will be written.", "P"); static void mlpackMain() { @@ -139,9 +150,10 @@ // If the user isn't going to save the output model or any predictions, we // should issue a warning. - RequireAtLeastOnePassed({ "output_model", "output" }, false, + RequireAtLeastOnePassed({ "output_model", "output", "predictions" }, false, "no output will be saved"); - ReportIgnoredParam({{ "test", false }}, "output"); + // "output" will be removed in mlpack 4.0.0. + ReportIgnoredParam({{ "test", false }}, "predictions"); // Check parameter validity. RequireParamValue("max_iterations", [](int x) { return x >= 0; }, @@ -296,7 +308,9 @@ // Save the predicted labels. if (CLI::HasParam("output")) - CLI::GetParam>("output") = std::move(results); + CLI::GetParam>("output") = results; + if (CLI::HasParam("predictions")) + CLI::GetParam>("predictions") = std::move(results); } // Lastly, save the output model. diff -Nru mlpack-3.1.0/src/mlpack/methods/random_forest/random_forest.hpp mlpack-3.1.1/src/mlpack/methods/random_forest/random_forest.hpp --- mlpack-3.1.0/src/mlpack/methods/random_forest/random_forest.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/random_forest/random_forest.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -20,7 +20,7 @@ namespace tree { template, + typename DimensionSelectionType = MultipleRandomDimensionSelect, template class NumericSplitType = BestBinaryNumericSplit, template class CategoricalSplitType = AllCategoricalSplit, typename ElemType = double> @@ -39,26 +39,35 @@ /** * Create a random forest, training on the given labeled training data with - * the given number of trees. The minimumLeafSize parameter is given to each - * individual decision tree during tree building. + * the given number of trees. The minimumLeafSize and minimumGainSplit + * parameters are given to each individual decision tree during tree building. + * Optionally, you may specify a DimensionSelectionType to set parameters for + * the strategy used to choose dimensions. * * @param dataset Dataset to train on. * @param labels Labels for dataset. * @param numClasses Number of classes in dataset. * @param numTrees Number of trees in the forest. * @param minimumLeafSize Minimum number of points in each tree's leaf nodes. + * @param minimumGainSplit Minimum gain for splitting a decision tree node. + * @param dimensionSelector Instantiated dimension selection policy. */ template RandomForest(const MatType& dataset, const arma::Row& labels, const size_t numClasses, - const size_t numTrees = 50, - const size_t minimumLeafSize = 20); + const size_t numTrees = 20, + const size_t minimumLeafSize = 1, + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Create a random forest, training on the given labeled training data with * the given dataset info and the given number of trees. The minimumLeafSize - * parameter is given to each individual decision tree during tree building. + * and minimumGainSplit parameters are given to each individual decision tree + * during tree building. Optionally, you may specify a DimensionSelectionType + * to set parameters for the strategy used to choose dimensions. * This constructor can be used to train on categorical data. * * @param dataset Dataset to train on. @@ -67,14 +76,19 @@ * @param numClasses Number of classes in dataset. * @param numTrees Number of trees in the forest. * @param minimumLeafSize Minimum number of points in each tree's leaf nodes. + * @param minimumGainSplit Minimum gain for splitting a decision tree node. + * @param dimensionSelector Instantiated dimension selection policy. */ template RandomForest(const MatType& dataset, const data::DatasetInfo& datasetInfo, const arma::Row& labels, const size_t numClasses, - const size_t numTrees = 50, - const size_t minimumLeafSize = 20); + const size_t numTrees = 20, + const size_t minimumLeafSize = 1, + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Create a random forest, training on the given weighted labeled training @@ -93,14 +107,19 @@ const arma::Row& labels, const size_t numClasses, const arma::rowvec& weights, - const size_t numTrees = 50, - const size_t minimumLeafSize = 20); + const size_t numTrees = 20, + const size_t minimumLeafSize = 1, + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Create a random forest, training on the given weighted labeled training * data with the given dataset info and the given number of trees. The - * minimumLeafSize parameter is given to each individual decision tree during - * tree building. This can be used for categorical weighted training. + * minimumLeafSize and minimumGainSplit parameters are given to each + * individual decision tree during tree building. Optionally, you may specify + * a DimensionSelectionType to set parameters for the strategy used to choose + * dimensions. This can be used for categorical weighted training. * * @param dataset Dataset to train on. * @param datasetInfo Dimension info for the dataset. @@ -109,6 +128,8 @@ * @param weights Weights (importances) of each point in the dataset. * @param numTrees Number of trees in the forest. * @param minimumLeafSize Minimum number of points in each tree's leaf nodes. + * @param minimumGainSplit Minimum gain for splitting a decision tree node. + * @param dimensionSelector Instantiated dimension selection policy. */ template RandomForest(const MatType& dataset, @@ -116,32 +137,45 @@ const arma::Row& labels, const size_t numClasses, const arma::rowvec& weights, - const size_t numTrees = 50, - const size_t minimumLeafSize = 20); + const size_t numTrees = 20, + const size_t minimumLeafSize = 1, + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Train the random forest on the given labeled training data with the given - * number of trees. The minimumLeafSize parameter is given to each individual - * decision tree during tree building. + * number of trees. The minimumLeafSize and minimumGainSplit parameters are + * given to each individual decision tree during tree building. Optionally, + * you may specify a DimensionSelectionType to set parameters for the strategy + * used to choose dimensions. * * @param data Dataset to train on. * @param labels Labels for dataset. * @param numClasses Number of classes in dataset. * @param numTrees Number of trees in the forest. * @param minimumLeafSize Minimum number of points in each tree's leaf nodes. + * @param minimumGainSplit Minimum gain for splitting a decision tree node. + * @param dimensionSelector Instantiated dimension selection policy. * @return The average entropy of all the decision trees trained under forest. */ template double Train(const MatType& data, const arma::Row& labels, const size_t numClasses, - const size_t numTrees = 50, - const size_t minimumLeafSize = 20); + const size_t numTrees = 20, + const size_t minimumLeafSize = 1, + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Train the random forest on the given labeled training data with the given * dataset info and the given number of trees. The minimumLeafSize parameter - * is given to each individual decision tree during tree building. This + * is given to each individual decision tree during tree building. + * Optionally, you may specify a DimensionSelectionType to set parameters for + * the strategy used to choose dimensions. + * This * overload can be used to train on categorical data. * * @param data Dataset to train on. @@ -150,6 +184,8 @@ * @param numClasses Number of classes in dataset. * @param numTrees Number of trees in the forest. * @param minimumLeafSize Minimum number of points in each tree's leaf nodes. + * @param minimumGainSplit Minimum gain for splitting a decision tree node. + * @param dimensionSelector Instantiated dimension selection policy. * @return The average entropy of all the decision trees trained under forest. */ template @@ -157,13 +193,18 @@ const data::DatasetInfo& datasetInfo, const arma::Row& labels, const size_t numClasses, - const size_t numTrees = 50, - const size_t minimumLeafSize = 20); + const size_t numTrees = 20, + const size_t minimumLeafSize = 1, + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Train the random forest on the given weighted labeled training data with - * the given number of trees. The minimumLeafSize parameter is given to each - * individual decision tree during tree building. + * the given number of trees. The minimumLeafSize and minimumGainSplit + * parameters are given to each individual decision tree during tree building. + * Optionally, you may specify a DimensionSelectionType to set parameters for + * the strategy used to choose dimensions. * * @param data Dataset to train on. * @param labels Labels for dataset. @@ -171,6 +212,8 @@ * @param weights Weights (importances) of each point in the dataset. * @param numTrees Number of trees in the forest. * @param minimumLeafSize Minimum number of points in each tree's leaf nodes. + * @param minimumGainSplit Minimum gain for splitting a decision tree node. + * @param dimensionSelector Instantiated dimension selection policy. * @return The average entropy of all the decision trees trained under forest. */ template @@ -178,14 +221,19 @@ const arma::Row& labels, const size_t numClasses, const arma::rowvec& weights, - const size_t numTrees = 50, - const size_t minimumLeafSize = 20); + const size_t numTrees = 20, + const size_t minimumLeafSize = 1, + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Train the random forest on the given weighted labeled training data with * the given dataset info and the given number of trees. The minimumLeafSize - * parameter is given to each individual decision tree during tree building. - * This overload can be used for categorical weighted training. + * and minimumGainSplit parameters are given to each individual decision tree + * during tree building. Optionally, you may specify a DimensionSelectionType + * to set parameters for the strategy used to choose dimensions. This + * overload can be used for categorical weighted training. * * @param data Dataset to train on. * @param datasetInfo Dimension info for the dataset. @@ -194,6 +242,8 @@ * @param weights Weights (importances) of each point in the dataset. * @param numTrees Number of trees in the forest. * @param minimumLeafSize Minimum number of points in each tree's leaf nodes. + * @param minimumGainSplit Minimum gain for splitting a decision tree node. + * @param dimensionSelector Instantiated dimension selection policy. * @return The average entropy of all the decision trees trained under forest. */ template @@ -202,8 +252,11 @@ const arma::Row& labels, const size_t numClasses, const arma::rowvec& weights, - const size_t numTrees = 50, - const size_t minimumLeafSize = 20); + const size_t numTrees = 20, + const size_t minimumLeafSize = 1, + const double minimumGainSplit = 1e-7, + DimensionSelectionType dimensionSelector = + DimensionSelectionType()); /** * Predict the class of the given point. If the random forest has not been @@ -280,6 +333,8 @@ * @param weights Weights for each point in the dataset (may be ignored). * @param numTrees Number of trees in the forest. * @param minimumLeafSize Minimum number of points in each leaf node. + * @param minimumGainSplit Minimum gain for splitting a decision tree node. + * @param dimensionSelector Instantiated dimension selection policy. * @tparam UseWeights Whether or not to use the weights parameter. * @tparam UseDatasetInfo Whether or not to use the datasetInfo parameter. * @tparam MatType The type of data matrix (i.e. arma::mat). @@ -292,7 +347,9 @@ const size_t numClasses, const arma::rowvec& weights, const size_t numTrees, - const size_t minimumLeafSize); + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType& dimensionSelector); //! The trees in the forest. std::vector trees; diff -Nru mlpack-3.1.0/src/mlpack/methods/random_forest/random_forest_impl.hpp mlpack-3.1.1/src/mlpack/methods/random_forest/random_forest_impl.hpp --- mlpack-3.1.0/src/mlpack/methods/random_forest/random_forest_impl.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/random_forest/random_forest_impl.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -36,13 +36,15 @@ const arma::Row& labels, const size_t numClasses, const size_t numTrees, - const size_t minimumLeafSize) + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { // Pass off work to the Train() method. data::DatasetInfo info; // Ignored. arma::rowvec weights; // Fake weights, not used. Train(dataset, info, labels, numClasses, weights, numTrees, - minimumLeafSize); + minimumLeafSize, minimumGainSplit, dimensionSelector); } template< @@ -64,12 +66,14 @@ const arma::Row& labels, const size_t numClasses, const size_t numTrees, - const size_t minimumLeafSize) + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { // Pass off work to the Train() method. arma::rowvec weights; // Fake weights, not used. Train(dataset, datasetInfo, labels, numClasses, weights, - numTrees, minimumLeafSize); + numTrees, minimumLeafSize, minimumGainSplit, dimensionSelector); } template< @@ -91,12 +95,14 @@ const size_t numClasses, const arma::rowvec& weights, const size_t numTrees, - const size_t minimumLeafSize) + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { // Pass off work to the Train() method. data::DatasetInfo info; // Ignored by Train(). Train(dataset, info, labels, numClasses, weights, numTrees, - minimumLeafSize); + minimumLeafSize, minimumGainSplit, dimensionSelector); } template< @@ -119,11 +125,13 @@ const size_t numClasses, const arma::rowvec& weights, const size_t numTrees, - const size_t minimumLeafSize) + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { // Pass off work to the Train() method. Train(dataset, datasetInfo, labels, numClasses, weights, numTrees, - minimumLeafSize); + minimumLeafSize, minimumGainSplit, dimensionSelector); } template< @@ -144,13 +152,15 @@ const arma::Row& labels, const size_t numClasses, const size_t numTrees, - const size_t minimumLeafSize) + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { // Pass off to Train(). data::DatasetInfo info; // Ignored by Train(). arma::rowvec weights; // Ignored by Train(). return Train(dataset, info, labels, numClasses, weights, - numTrees, minimumLeafSize); + numTrees, minimumLeafSize, minimumGainSplit, dimensionSelector); } template< @@ -172,12 +182,14 @@ const arma::Row& labels, const size_t numClasses, const size_t numTrees, - const size_t minimumLeafSize) + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { // Pass off to Train(). arma::rowvec weights; // Ignored by Train(). return Train(dataset, datasetInfo, labels, numClasses, weights, - numTrees, minimumLeafSize); + numTrees, minimumLeafSize, minimumGainSplit, dimensionSelector); } template< @@ -199,12 +211,14 @@ const size_t numClasses, const arma::rowvec& weights, const size_t numTrees, - const size_t minimumLeafSize) + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { // Pass off to Train(). data::DatasetInfo info; // Ignored by Train(). - return Train(dataset, info, labels, numClasses, weights, - numTrees, minimumLeafSize); + return Train(dataset, info, labels, numClasses, weights, + numTrees, minimumLeafSize, minimumGainSplit, dimensionSelector); } template< @@ -227,11 +241,13 @@ const size_t numClasses, const arma::rowvec& weights, const size_t numTrees, - const size_t minimumLeafSize) + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType dimensionSelector) { // Pass off to Train(). return Train(dataset, datasetInfo, labels, numClasses, weights, - numTrees, minimumLeafSize); + numTrees, minimumLeafSize, minimumGainSplit, dimensionSelector); } template< @@ -335,7 +351,9 @@ #pragma omp parallel for for (omp_size_t i = 0; i < data.n_cols; ++i) + { predictions[i] = Classify(data.col(i)); + } } template< @@ -427,7 +445,9 @@ const size_t numClasses, const arma::rowvec& weights, const size_t numTrees, - const size_t minimumLeafSize) + const size_t minimumLeafSize, + const double minimumGainSplit, + DimensionSelectionType& dimensionSelector) { // Train each tree individually. trees.resize(numTrees); // This will fill the vector with untrained trees. @@ -436,38 +456,46 @@ #pragma omp parallel for reduction( + : avgGain) for (omp_size_t i = 0; i < numTrees; ++i) { + Timer::Start("bootstrap"); MatType bootstrapDataset; arma::Row bootstrapLabels; arma::rowvec bootstrapWeights; Bootstrap(dataset, labels, weights, bootstrapDataset, bootstrapLabels, bootstrapWeights); + Timer::Stop("bootstrap"); // Now build the decision tree. + Timer::Start("train_tree"); if (UseWeights) { if (UseDatasetInfo) { - avgGain += trees[i].Train(dataset, datasetInfo, labels, numClasses, - weights, minimumLeafSize); + avgGain += trees[i].Train(bootstrapDataset, datasetInfo, + bootstrapLabels, numClasses, bootstrapWeights, minimumLeafSize, + minimumGainSplit, dimensionSelector); } else { - avgGain += trees[i].Train(dataset, labels, numClasses, weights, - minimumLeafSize); + avgGain += trees[i].Train(bootstrapDataset, bootstrapLabels, numClasses, + bootstrapWeights, minimumLeafSize, minimumGainSplit, + dimensionSelector); } } else { if (UseDatasetInfo) { - avgGain += trees[i].Train(dataset, datasetInfo, labels, numClasses, - minimumLeafSize); + avgGain += trees[i].Train(bootstrapDataset, datasetInfo, + bootstrapLabels, numClasses, minimumLeafSize, minimumGainSplit, + dimensionSelector); } else { - avgGain += trees[i].Train(dataset, labels, numClasses, minimumLeafSize); + avgGain += trees[i].Train(bootstrapDataset, bootstrapLabels, numClasses, + minimumLeafSize, minimumGainSplit, dimensionSelector); } } + Timer::Stop("train_tree"); } return avgGain / numTrees; } diff -Nru mlpack-3.1.0/src/mlpack/methods/random_forest/random_forest_main.cpp mlpack-3.1.1/src/mlpack/methods/random_forest/random_forest_main.cpp --- mlpack-3.1.0/src/mlpack/methods/random_forest/random_forest_main.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/random_forest/random_forest_main.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -47,7 +47,12 @@ " parameter specifies the minimum number of training points that must fall " "into each leaf for it to be split. The " + PRINT_PARAM_STRING("num_trees") + - " controls the number of trees in the random forest. If " + + " controls the number of trees in the random forest. The " + + PRINT_PARAM_STRING("minimum_gain_split") + " parameter controls the minimum" + " required gain for a decision tree node to split. Larger values will " + "force higher-confidence splits. The " + + PRINT_PARAM_STRING("subspace_dim") + " parameter is used to control the " + "number of random dimensions chosen for an individual node's split. If " + PRINT_PARAM_STRING("print_training_accuracy") + " is specified, the " "calculated accuracy on the training set will be printed." "\n\n" @@ -99,13 +104,21 @@ PARAM_INT_IN("num_trees", "Number of trees in the random forest.", "N", 10); PARAM_INT_IN("minimum_leaf_size", "Minimum number of points in each leaf " - "node.", "n", 20); + "node.", "n", 1); PARAM_MATRIX_OUT("probabilities", "Predicted class probabilities for each " "point in the test set.", "P"); PARAM_UROW_OUT("predictions", "Predicted classes for each point in the test " "set.", "p"); +PARAM_DOUBLE_IN("minimum_gain_split", "Minimum gain needed to make a split " + "when building a tree.", "g", 0); +PARAM_INT_IN("subspace_dim", "Dimensionality of random subspace to use for " + "each split. '0' will autoselect the square root of data dimensionality.", + "d", 0); + +PARAM_INT_IN("seed", "Random seed. If 0, 'std::time(NULL)' is used.", "s", 0); + /** * This is the class that we will serialize. It is a pretty simple wrapper * around DecisionTree<>. In order to support categoricals, it will need to @@ -135,21 +148,20 @@ static void mlpackMain() { + // Initialize random seed if needed. + if (CLI::GetParam("seed") != 0) + math::RandomSeed((size_t) CLI::GetParam("seed")); + else + math::RandomSeed((size_t) std::time(NULL)); + // Check for incompatible input parameters. RequireOnlyOnePassed({ "training", "input_model" }, true); ReportIgnoredParam({{ "training", false }}, "print_training_accuracy"); - - if (CLI::HasParam("test")) - { - RequireAtLeastOnePassed({ "probabilities", "predictions" }, "no test output" - " will be saved"); - } - ReportIgnoredParam({{ "test", false }}, "test_labels"); RequireAtLeastOnePassed({ "test", "output_model", "print_training_accuracy" }, - "the trained forest model will not be used or saved"); + false, "the trained forest model will not be used or saved"); if (CLI::HasParam("training")) { @@ -165,6 +177,11 @@ RequireParamValue("minimum_leaf_size", [](int x) { return x > 0; }, true, "minimum leaf size must be greater than 0"); + RequireParamValue("subspace_dim", [](int x) { return x >= 0; }, true, + "subspace dimensionality must be nonnegative"); + RequireParamValue("minimum_gain_split", + [](double x) { return x >= 0.0; }, true, + "minimum gain for splitting must be nonnegative"); ReportIgnoredParam({{ "training", false }}, "num_trees"); ReportIgnoredParam({{ "training", false }}, "minimum_leaf_size"); @@ -172,15 +189,27 @@ RandomForestModel* rfModel; if (CLI::HasParam("training")) { + Timer::Start("rf_training"); rfModel = new RandomForestModel(); // Train the model on the given input data. arma::mat data = std::move(CLI::GetParam("training")); arma::Row labels = std::move(CLI::GetParam>("labels")); + + // Make sure the subspace dimensionality is valid. + RequireParamValue("subspace_dim", + [data](int x) { return (size_t) x <= data.n_rows; }, true, "subspace " + "dimensionality must not be greater than data dimensionality"); + const size_t numTrees = (size_t) CLI::GetParam("num_trees"); const size_t minimumLeafSize = (size_t) CLI::GetParam("minimum_leaf_size"); + const double minimumGainSplit = CLI::GetParam("minimum_gain_split"); + const size_t randomDims = (CLI::GetParam("subspace_dim") == 0) ? + (size_t) std::sqrt(data.n_rows) : + (size_t) CLI::GetParam("subspace_dim"); + MultipleRandomDimensionSelect mrds(randomDims); Log::Info << "Training random forest with " << numTrees << " trees..." << endl; @@ -188,11 +217,14 @@ const size_t numClasses = arma::max(labels) + 1; // Train the model. - rfModel->rf.Train(data, labels, numClasses, numTrees, minimumLeafSize); + rfModel->rf.Train(data, labels, numClasses, numTrees, minimumLeafSize, + minimumGainSplit, mrds); + Timer::Stop("rf_training"); // Did we want training accuracy? if (CLI::HasParam("print_training_accuracy")) { + Timer::Start("rf_prediction"); arma::Row predictions; rfModel->rf.Classify(data, predictions); @@ -201,6 +233,7 @@ Log::Info << correct << " of " << labels.n_elem << " correct on training" << " set (" << (double(correct) / double(labels.n_elem) * 100) << ")." << endl; + Timer::Stop("rf_prediction"); } } else @@ -212,6 +245,7 @@ if (CLI::HasParam("test")) { arma::mat testData = std::move(CLI::GetParam("test")); + Timer::Start("rf_prediction"); // Get predictions and probabilities. arma::Row predictions; @@ -229,6 +263,7 @@ Log::Info << correct << " of " << testLabels.n_elem << " correct on test" << " set (" << (double(correct) / double(testLabels.n_elem) * 100) << ")." << endl; + Timer::Stop("rf_prediction"); } // Save the outputs. diff -Nru mlpack-3.1.0/src/mlpack/methods/reinforcement_learning/policy/greedy_policy.hpp mlpack-3.1.1/src/mlpack/methods/reinforcement_learning/policy/greedy_policy.hpp --- mlpack-3.1.0/src/mlpack/methods/reinforcement_learning/policy/greedy_policy.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/reinforcement_learning/policy/greedy_policy.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -1,6 +1,7 @@ /** * @file greedy_policy.hpp * @author Shangtong Zhang + * @author Abhinav Sagar * * This file is an implementation of epsilon greedy policy. * @@ -41,13 +42,16 @@ * @param annealInterval The steps during which the probability to explore * will anneal. * @param minEpsilon Epsilon will never be less than this value. + * @param decayRate How much to change the model in response to the + * estimated error each time the model weights are updated. */ GreedyPolicy(const double initialEpsilon, const size_t annealInterval, - const double minEpsilon) : + const double minEpsilon, + const double decayRate = 1.0) : epsilon(initialEpsilon), minEpsilon(minEpsilon), - delta((initialEpsilon - minEpsilon) / annealInterval) + delta(((initialEpsilon - minEpsilon) * decayRate) / annealInterval) { /* Nothing to do here. */ } /** diff -Nru mlpack-3.1.0/src/mlpack/methods/reinforcement_learning/q_learning.hpp mlpack-3.1.1/src/mlpack/methods/reinforcement_learning/q_learning.hpp --- mlpack-3.1.0/src/mlpack/methods/reinforcement_learning/q_learning.hpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/methods/reinforcement_learning/q_learning.hpp 2019-05-27 02:18:49.000000000 +0000 @@ -100,6 +100,16 @@ */ const size_t& TotalSteps() const { return totalSteps; } + //! Modify the state of the agent. + StateType& State() { return state; } + //! Get the state of the agent. + const StateType& State() const { return state; } + + //! Modify the environment in which the agent is. + EnvironmentType& Environment() { return environment; } + //! Get the environment in which the agent is. + const EnvironmentType& Environment() const { return environment; } + //! Modify the training mode / test mode indicator. bool& Deterministic() { return deterministic; } //! Get the indicator of training mode / test mode. diff -Nru mlpack-3.1.0/src/mlpack/tests/activation_functions_test.cpp mlpack-3.1.1/src/mlpack/tests/activation_functions_test.cpp --- mlpack-3.1.0/src/mlpack/tests/activation_functions_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/activation_functions_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -566,6 +566,36 @@ } /** + * Basic test of the CReLU function. + */ +BOOST_AUTO_TEST_CASE(CReLUFunctionTest) +{ + const arma::colvec desiredActivations("0 3.2 4.5 0 \ + 1 0 2 0 2 0 0 \ + 100.2 0 1 0 0"); + + const arma::colvec desiredDerivatives("0 0 0 0 \ + 0 0 0 0"); + CReLU<> crelu; + // Test the activation function using the entire vector as input. + arma::colvec activations; + crelu.Forward(std::move(activationData), std::move(activations)); + arma::colvec derivatives; + // This error vector will be set to 1 to get the derivatives. + arma::colvec error = arma::ones(desiredActivations.n_elem); + crelu.Backward(std::move(desiredActivations), std::move(error), + std::move(derivatives)); + for (size_t i = 0; i < activations.n_elem; i++) + { + BOOST_REQUIRE_CLOSE(activations.at(i), desiredActivations.at(i), 1e-3); + } + for (size_t i = 0; i < derivatives.n_elem; i++) + { + BOOST_REQUIRE_CLOSE(derivatives.at(i), desiredDerivatives.at(i), 1e-3); + } +} + +/** * Basic test of the swish function. */ BOOST_AUTO_TEST_CASE(SwishFunctionTest) diff -Nru mlpack-3.1.0/src/mlpack/tests/ann_layer_test.cpp mlpack-3.1.1/src/mlpack/tests/ann_layer_test.cpp --- mlpack-3.1.0/src/mlpack/tests/ann_layer_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/ann_layer_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -1208,6 +1208,89 @@ } /** + * Test to check Concat layer along different axes. + */ +BOOST_AUTO_TEST_CASE(ConcatAlongAxisTest) +{ + arma::mat output, input, error, outputA, outputB; + size_t inputWidth = 4, inputHeight = 4, inputChannel = 2; + size_t outputWidth, outputHeight, outputChannel = 2; + size_t kW = 3, kH = 3; + size_t batch = 1; + + // Using Convolution<> layer as inout to Concat<> layer. + // Compute the output shape of convolution layer. + outputWidth = (inputWidth - kW) + 1; + outputHeight = (inputHeight - kH) + 1; + + input = arma::ones(inputWidth * inputHeight * inputChannel, batch); + + Convolution<> moduleA(inputChannel, outputChannel, kW, kH, 1, 1, 0, 0, + inputWidth, inputHeight); + Convolution<> moduleB(inputChannel, outputChannel, kW, kH, 1, 1, 0, 0, + inputWidth, inputHeight); + + moduleA.Reset(); + moduleA.Parameters().randu(); + moduleB.Reset(); + moduleB.Parameters().randu(); + + // Compute output of each layer. + moduleA.Forward(std::move(input), std::move(outputA)); + moduleB.Forward(std::move(input), std::move(outputB)); + + arma::cube A(outputA.memptr(), outputWidth, outputHeight, outputChannel); + arma::cube B(outputB.memptr(), outputWidth, outputHeight, outputChannel); + + error = arma::ones(outputWidth * outputHeight * outputChannel * 2, 1); + + for (size_t axis = 0; axis < 3; ++axis) + { + size_t x = 1, y = 1, z = 1; + arma::cube calculatedOut; + if (axis == 0) + { + calculatedOut.set_size(2 * outputWidth, outputHeight, outputChannel); + for (size_t i = 0; i < A.n_slices; ++i) + { + arma::mat aMat = A.slice(i); + arma::mat bMat = B.slice(i); + calculatedOut.slice(i) = arma::join_cols(aMat, bMat); + } + x = 2; + } + if (axis == 1) + { + calculatedOut.set_size(outputWidth, 2 * outputHeight, outputChannel); + for (size_t i = 0; i < A.n_slices; ++i) + { + arma::mat aMat = A.slice(i); + arma::mat bMat = B.slice(i); + calculatedOut.slice(i) = arma::join_rows(aMat, bMat); + } + y = 2; + } + if (axis == 2) + { + calculatedOut = arma::join_slices(A, B); + z = 2; + } + + // Compute output of Concat<> layer. + arma::Row inputSize{outputWidth, outputHeight, outputChannel}; + Concat<> module(inputSize, axis); + module.Add(moduleA); + module.Add(moduleB); + module.Forward(std::move(input), std::move(output)); + arma::cube concatOut(output.memptr(), x * outputWidth, + y * outputHeight, z * outputChannel); + + // Verify if the output reshaped to cubes are similar. + CheckMatrices(concatOut, calculatedOut, 1e-12); + } +} + +/** * Concat layer numerical gradient test. */ BOOST_AUTO_TEST_CASE(GradientConcatLayerTest) diff -Nru mlpack-3.1.0/src/mlpack/tests/CMakeLists.txt mlpack-3.1.1/src/mlpack/tests/CMakeLists.txt --- mlpack-3.1.0/src/mlpack/tests/CMakeLists.txt 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/CMakeLists.txt 2019-05-27 02:18:49.000000000 +0000 @@ -118,6 +118,7 @@ main_tests/det_test.cpp main_tests/decision_tree_test.cpp main_tests/decision_stump_test.cpp + main_tests/fastmks_test.cpp main_tests/kde_test.cpp main_tests/linear_regression_test.cpp main_tests/logistic_regression_test.cpp diff -Nru mlpack-3.1.0/src/mlpack/tests/cv_test.cpp mlpack-3.1.1/src/mlpack/tests/cv_test.cpp --- mlpack-3.1.0/src/mlpack/tests/cv_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/cv_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -31,7 +31,7 @@ #include #include #include - +#include #include #include @@ -43,6 +43,7 @@ using namespace mlpack::naive_bayes; using namespace mlpack::regression; using namespace mlpack::tree; +using namespace mlpack::data; BOOST_AUTO_TEST_SUITE(CVTest); @@ -74,6 +75,25 @@ } /** + * Test for confusion matrix. + */ +BOOST_AUTO_TEST_CASE(ConfusionMatrixTest) +{ + // Labels that will be considered as "ground truth". + arma::Row labels("0 0 1 0 0 1 0 1 0 1"); + + // Predicted labels. + arma::Row predictedLabels("0 0 0 0 0 1 1 1 1 1"); + // Confusion matrix. + arma::Mat output; + data::ConfusionMatrix(predictedLabels, labels, output, 2); + BOOST_REQUIRE_EQUAL(output(0, 0), 4); + BOOST_REQUIRE_EQUAL(output(0, 1), 1); + BOOST_REQUIRE_EQUAL(output(1, 0), 2); + BOOST_REQUIRE_EQUAL(output(1, 1), 3); +} + +/** * Test metrics for multiclass classification. */ BOOST_AUTO_TEST_CASE(MulticlassClassificationMetricsTest) diff -Nru mlpack-3.1.0/src/mlpack/tests/dcgan_test.cpp mlpack-3.1.1/src/mlpack/tests/dcgan_test.cpp --- mlpack-3.1.0/src/mlpack/tests/dcgan_test.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/dcgan_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -0,0 +1,287 @@ +/** + * @file dcgan_test.cpp + * @author Shikhar Jaiswal + * + * Tests the DCGAN network. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "test_tools.hpp" + +using namespace mlpack; +using namespace mlpack::ann; +using namespace mlpack::math; +using namespace mlpack::regression; +using namespace std::placeholders; + +BOOST_AUTO_TEST_SUITE(DCGANNetworkTest); + +/* + * Tests the DCGAN implementation on the MNIST dataset. + * It's not viable to train on bigger parameters due to time constraints. + * Please refer mlpack/models repository for the tutorial. + */ +BOOST_AUTO_TEST_CASE(DCGANMNISTTest) +{ + size_t dNumKernels = 32; + size_t discriminatorPreTrain = 5; + size_t batchSize = 5; + size_t noiseDim = 100; + size_t generatorUpdateStep = 1; + size_t numSamples = 10; + double stepSize = 0.0003; + double eps = 1e-8; + size_t numEpoches = 1; + double tolerance = 1e-5; + int datasetMaxCols = 10; + bool shuffle = true; + double multiplier = 10; + + Log::Info << std::boolalpha + << " batchSize = " << batchSize << std::endl + << " generatorUpdateStep = " << generatorUpdateStep << std::endl + << " noiseDim = " << noiseDim << std::endl + << " numSamples = " << numSamples << std::endl + << " stepSize = " << stepSize << std::endl + << " numEpoches = " << numEpoches << std::endl + << " tolerance = " << tolerance << std::endl + << " shuffle = " << shuffle << std::endl; + + arma::mat trainData; + trainData.load("mnist_first250_training_4s_and_9s.arm"); + Log::Info << arma::size(trainData) << std::endl; + + trainData = trainData.cols(0, datasetMaxCols - 1); + + size_t numIterations = trainData.n_cols * numEpoches; + numIterations /= batchSize; + + Log::Info << "Dataset loaded (" << trainData.n_rows << ", " + << trainData.n_cols << ")" << std::endl; + Log::Info << trainData.n_rows << "--------" << trainData.n_cols << std::endl; + + // Create the Discriminator network + FFN > discriminator; + discriminator.Add >(1, dNumKernels, 4, 4, 2, 2, 1, 1, 28, 28); + discriminator.Add >(0.2); + discriminator.Add >(dNumKernels, 2 * dNumKernels, 4, 4, 2, 2, + 1, 1, 14, 14); + discriminator.Add >(0.2); + discriminator.Add >(2 * dNumKernels, 4 * dNumKernels, 4, 4, + 2, 2, 1, 1, 7, 7); + discriminator.Add >(0.2); + discriminator.Add >(4 * dNumKernels, 8 * dNumKernels, 4, 4, + 2, 2, 2, 2, 3, 3); + discriminator.Add >(0.2); + discriminator.Add >(8 * dNumKernels, 1, 4, 4, 1, 1, + 1, 1, 2, 2); + + // Create the Generator network + FFN > generator; + generator.Add >(noiseDim, 8 * dNumKernels, 2, 2, + 1, 1, 1, 1, 1, 1); + generator.Add >(1024); + generator.Add >(); + generator.Add >(8 * dNumKernels, 4 * dNumKernels, + 2, 2, 1, 1, 0, 0, 2, 2); + generator.Add >(1152); + generator.Add >(); + generator.Add >(4 * dNumKernels, 2 * dNumKernels, + 5, 5, 2, 2, 1, 1, 3, 3); + generator.Add >(3136); + generator.Add >(); + generator.Add >(2 * dNumKernels, dNumKernels, 8, 8, + 1, 1, 1, 1, 7, 7); + generator.Add >(6272); + generator.Add >(); + generator.Add >(dNumKernels, 1, 15, 15, 1, 1, 1, 1, + 14, 14); + generator.Add >(); + + // Create DCGAN + GaussianInitialization gaussian(0, 1); + ens::Adam optimizer(stepSize, batchSize, 0.9, 0.999, eps, numIterations, + tolerance, shuffle); + std::function noiseFunction = [] () { + return math::RandNormal(0, 1);}; + GAN >, GaussianInitialization, + std::function, DCGAN> dcgan(trainData, generator, discriminator, + gaussian, noiseFunction, noiseDim, batchSize, generatorUpdateStep, + discriminatorPreTrain, multiplier); + + Log::Info << "Training..." << std::endl; + double objVal = dcgan.Train(optimizer); + + // Test that objective value returned by GAN::Train() is finite. + BOOST_REQUIRE_EQUAL(std::isfinite(objVal), true); + + // Generate samples + Log::Info << "Sampling..." << std::endl; + arma::mat noise(noiseDim, 1); + size_t dim = std::sqrt(trainData.n_rows); + arma::mat generatedData(2 * dim, dim * numSamples); + + for (size_t i = 0; i < numSamples; i++) + { + arma::mat samples; + noise.imbue( [&]() { return noiseFunction(); } ); + + dcgan.Generator().Forward(noise, samples); + samples.reshape(dim, dim); + samples = samples.t(); + + generatedData.submat(0, i * dim, dim - 1, i * dim + dim - 1) = samples; + + samples = trainData.col(math::RandInt(0, trainData.n_cols)); + samples.reshape(dim, dim); + samples = samples.t(); + + generatedData.submat(dim, + i * dim, 2 * dim - 1, i * dim + dim - 1) = samples; + } + + Log::Info << "Output generated!" << std::endl; +} + +/* + * Tests the DCGAN implementation on the CelebA dataset. + * It's currently not possible to run this every time due to time constraints. + * Please refer mlpack/models repository for the tutorial. + +BOOST_AUTO_TEST_CASE(DCGANCelebATest) +{ + size_t dNumKernels = 64; + size_t discriminatorPreTrain = 300; + size_t batchSize = 1; + size_t noiseDim = 100; + size_t generatorUpdateStep = 1; + size_t numSamples = 10; + double stepSize = 0.0003; + double eps = 1e-8; + size_t numEpoches = 20; + double tolerance = 1e-5; + int datasetMaxCols = -1; + bool shuffle = true; + double multiplier = 10; + + Log::Info << std::boolalpha + << " batchSize = " << batchSize << std::endl + << " generatorUpdateStep = " << generatorUpdateStep << std::endl + << " noiseDim = " << noiseDim << std::endl + << " numSamples = " << numSamples << std::endl + << " stepSize = " << stepSize << std::endl + << " numEpoches = " << numEpoches << std::endl + << " tolerance = " << tolerance << std::endl + << " shuffle = " << shuffle << std::endl; + + arma::mat trainData; + trainData.load("celeba.csv"); + Log::Info << arma::size(trainData) << std::endl; + + if (datasetMaxCols > 0) + trainData = trainData.cols(0, datasetMaxCols - 1); + + size_t numIterations = trainData.n_cols * numEpoches; + numIterations /= batchSize; + + Log::Info << "Dataset loaded (" << trainData.n_rows << ", " + << trainData.n_cols << ")" << std::endl; + Log::Info << trainData.n_rows << "--------" << trainData.n_cols << std::endl; + + // Create the Discriminator network + FFN > discriminator; + discriminator.Add >(3, dNumKernels, 4, 4, 2, 2, 1, 1, 64, 64); + discriminator.Add >(0.2); + discriminator.Add >(dNumKernels, 2 * dNumKernels, 4, 4, 2, 2, + 1, 1, 32, 32); + discriminator.Add >(0.2); + discriminator.Add >(2 * dNumKernels, 4 * dNumKernels, 4, 4, + 2, 2, 1, 1, 16, 16); + discriminator.Add >(0.2); + discriminator.Add >(4 * dNumKernels, 8 * dNumKernels, 4, 4, + 2, 2, 1, 1, 8, 8); + discriminator.Add >(0.2); + discriminator.Add >(8 * dNumKernels, 1, 4, 4, 1, 1, + 0, 0, 4, 4); + + // Create the Generator network + FFN > generator; + generator.Add >(noiseDim, 8 * dNumKernels, 4, 4, + 1, 1, 2, 2, 1, 1); + generator.Add >(4096); + generator.Add >(); + generator.Add >(8 * dNumKernels, 4 * dNumKernels, + 5, 5, 1, 1, 1, 1, 4, 4); + generator.Add >(8192); + generator.Add >(); + generator.Add >(4 * dNumKernels, 2 * dNumKernels, + 9, 9, 1, 1, 1, 1, 8, 8); + generator.Add >(16384); + generator.Add >(); + generator.Add >(2 * dNumKernels, dNumKernels, 17, 17, + 1, 1, 1, 1, 16, 16); + generator.Add >(32768); + generator.Add >(); + generator.Add >(dNumKernels, 3, 33, 33, 1, 1, 1, 1, + 32, 32); + generator.Add >(); + + // Create DCGAN + GaussianInitialization gaussian(0, 1); + ens::Adam optimizer(stepSize, batchSize, 0.9, 0.999, eps, numIterations, + tolerance, shuffle); + std::function noiseFunction = [] () { + return math::RandNormal(0, 1);}; + GAN >, GaussianInitialization, + std::function, DCGAN> dcgan(trainData, generator, discriminator, + gaussian, noiseFunction, noiseDim, batchSize, generatorUpdateStep, + discriminatorPreTrain, multiplier); + + Log::Info << "Training..." << std::endl; + dcgan.Train(optimizer); + + // Generate samples + Log::Info << "Sampling..." << std::endl; + arma::mat noise(noiseDim, 1); + size_t dim = std::sqrt(trainData.n_rows); + arma::mat generatedData(2 * dim, dim * numSamples); + + for (size_t i = 0; i < numSamples; i++) + { + arma::mat samples; + noise.imbue( [&]() { return noiseFunction(); } ); + + dcgan.Generator().Forward(noise, samples); + samples.reshape(dim, dim); + samples = samples.t(); + + generatedData.submat(0, i * dim, dim - 1, i * dim + dim - 1) = samples; + + samples = trainData.col(math::RandInt(0, trainData.n_cols)); + samples.reshape(dim, dim); + samples = samples.t(); + + generatedData.submat(dim, + i * dim, 2 * dim - 1, i * dim + dim - 1) = samples; + } + + Log::Info << "Output generated!" << std::endl; +} +*/ + +BOOST_AUTO_TEST_SUITE_END(); diff -Nru mlpack-3.1.0/src/mlpack/tests/decision_tree_test.cpp mlpack-3.1.1/src/mlpack/tests/decision_tree_test.cpp --- mlpack-3.1.0/src/mlpack/tests/decision_tree_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/decision_tree_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -296,7 +296,7 @@ // Make sure that a split was made. BOOST_REQUIRE_GT(gain, bestGain); - // Make sure weight works and make no different with no weighted one + // Make sure weight works and is not different than the unweighted one. BOOST_REQUIRE_EQUAL(gain, weightedGain); // The split is perfect, so we should be able to accomplish a gain of 0. @@ -332,7 +332,7 @@ labels, 2, weights, 8, 1e-7, classProbabilities, aux); // Make sure that no split was made. - BOOST_REQUIRE_EQUAL(gain, bestGain); + BOOST_REQUIRE_EQUAL(gain, DBL_MAX); BOOST_REQUIRE_EQUAL(gain, weightedGain); BOOST_REQUIRE_EQUAL(classProbabilities.n_elem, 0); } @@ -363,7 +363,7 @@ bestGain, values, labels, 2, weights, 10, 1e-7, classProbabilities, aux); // Make sure there was no split. - BOOST_REQUIRE_EQUAL(gain, bestGain); + BOOST_REQUIRE_EQUAL(gain, DBL_MAX); BOOST_REQUIRE_EQUAL(classProbabilities.n_elem, 0); } @@ -424,7 +424,7 @@ aux); // Make sure it's not split. - BOOST_REQUIRE_EQUAL(gain, bestGain); + BOOST_REQUIRE_EQUAL(gain, DBL_MAX); BOOST_REQUIRE_EQUAL(classProbabilities.n_elem, 0); } @@ -460,7 +460,7 @@ labels, 3, weights, 10, 1e-7, classProbabilities, aux); // Make sure that there was no split. - BOOST_REQUIRE_EQUAL(gain, bestGain); + BOOST_REQUIRE_EQUAL(gain, DBL_MAX); BOOST_REQUIRE_EQUAL(gain, weightedGain); BOOST_REQUIRE_EQUAL(classProbabilities.n_elem, 0); } @@ -471,14 +471,21 @@ */ BOOST_AUTO_TEST_CASE(BasicConstructionTest) { - arma::mat dataset(10, 1000, arma::fill::randu); - arma::Row labels(1000); - - for (size_t i = 0; i < 1000; ++i) - labels[i] = i % 3; // 3 classes. + arma::mat dataset(10, 100, arma::fill::randu); + arma::Row labels(100); + for (size_t i = 0; i < 50; ++i) + { + dataset(3, i) = 0.0; + labels[i] = 0; + } + for (size_t i = 50; i < 100; ++i) + { + dataset(3, i) = 1.0; + labels[i] = 1; + } // Use default parameters. - DecisionTree<> d(dataset, labels, 3, 50); + DecisionTree<> d(dataset, labels, 2, 10); // Now require that we have some children. BOOST_REQUIRE_GT(d.NumChildren(), 0); @@ -489,17 +496,24 @@ */ BOOST_AUTO_TEST_CASE(BasicConstructionTestWithWeight) { - arma::mat dataset(10, 1000, arma::fill::randu); - arma::Row labels(1000); + arma::mat dataset(10, 100, arma::fill::randu); + arma::Row labels(100); + for (size_t i = 0; i < 50; ++i) + { + dataset(3, i) = 0.0; + labels[i] = 0; + } + for (size_t i = 50; i < 100; ++i) + { + dataset(3, i) = 1.0; + labels[i] = 1; + } arma::rowvec weights(labels.n_elem); weights.ones(); - for (size_t i = 0; i < 1000; ++i) - labels[i] = i % 3; // 3 classes. - // Use default parameters. - DecisionTree<> wd(dataset, labels, 3, weights, 50); - DecisionTree<> d(dataset, labels, 3, 50); + DecisionTree<> wd(dataset, labels, 2, weights, 10); + DecisionTree<> d(dataset, labels, 2, 10); // Now require that we have some children. BOOST_REQUIRE_GT(wd.NumChildren(), 0); @@ -512,25 +526,30 @@ */ BOOST_AUTO_TEST_CASE(PerfectTrainingSet) { - // Completely random dataset with no structure. - arma::mat dataset(10, 1000, arma::fill::randu); - arma::Row labels(1000); - for (size_t i = 0; i < 1000; ++i) - labels[i] = i % 3; // 3 classes. - arma::rowvec weights(labels.n_elem); - weights.ones(); + arma::mat dataset(10, 100, arma::fill::randu); + arma::Row labels(100); + for (size_t i = 0; i < 50; ++i) + { + dataset(3, i) = 0.0; + labels[i] = 0; + } + for (size_t i = 50; i < 100; ++i) + { + dataset(3, i) = 1.0; + labels[i] = 1; + } - DecisionTree<> d(dataset, labels, 3, 1); // Minimum leaf size of 1. + DecisionTree<> d(dataset, labels, 2, 1, 0.0); // Minimum leaf size of 1. // Make sure that we can get perfect accuracy on the training set. - for (size_t i = 0; i < 1000; ++i) + for (size_t i = 0; i < 100; ++i) { size_t prediction; arma::vec probabilities; d.Classify(dataset.col(i), prediction, probabilities); BOOST_REQUIRE_EQUAL(prediction, labels[i]); - BOOST_REQUIRE_EQUAL(probabilities.n_elem, 3); + BOOST_REQUIRE_EQUAL(probabilities.n_elem, 2); for (size_t j = 0; j < 3; ++j) { if (labels[i] == j) @@ -547,24 +566,33 @@ BOOST_AUTO_TEST_CASE(PerfectTrainingSetWithWeight) { // Completely random dataset with no structure. - arma::mat dataset(10, 1000, arma::fill::randu); - arma::Row labels(1000); - for (size_t i = 0; i < 1000; ++i) - labels[i] = i % 3; // 3 classes. + arma::mat dataset(10, 100, arma::fill::randu); + arma::Row labels(100); + for (size_t i = 0; i < 50; ++i) + { + dataset(3, i) = 0.0; + labels[i] = 0; + } + for (size_t i = 50; i < 100; ++i) + { + dataset(3, i) = 1.0; + labels[i] = 1; + } arma::rowvec weights(labels.n_elem); weights.ones(); - DecisionTree<> d(dataset, labels, 3, weights, 1); // Minimum leaf size of 1. + // Minimum leaf size of 1. + DecisionTree<> d(dataset, labels, 2, weights, 1, 0.0); // This part of code is dupliacte with no weighted one. - for (size_t i = 0; i < 1000; ++i) + for (size_t i = 0; i < 100; ++i) { size_t prediction; arma::vec probabilities; d.Classify(dataset.col(i), prediction, probabilities); BOOST_REQUIRE_EQUAL(prediction, labels[i]); - BOOST_REQUIRE_EQUAL(probabilities.n_elem, 3); + BOOST_REQUIRE_EQUAL(probabilities.n_elem, 2); for (size_t j = 0; j < 3; ++j) { if (labels[i] == j) @@ -981,7 +1009,8 @@ */ BOOST_AUTO_TEST_CASE(RandomDimensionSelectTest) { - RandomDimensionSelect r(10); + RandomDimensionSelect r; + r.Dimensions() = 10; BOOST_REQUIRE_LT(r.Begin(), 10); BOOST_REQUIRE_EQUAL(r.Next(), r.End()); @@ -995,7 +1024,11 @@ BOOST_AUTO_TEST_CASE(RandomDimensionSelectRandomTest) { // We'll check that 4 values are not all the same. - RandomDimensionSelect r1(100000), r2(100000), r3(100000), r4(100000); + RandomDimensionSelect r1, r2, r3, r4; + r1.Dimensions() = 100000; + r2.Dimensions() = 100000; + r3.Dimensions() = 100000; + r4.Dimensions() = 100000; BOOST_REQUIRE((r1.Begin() != r2.Begin()) || (r1.Begin() != r3.Begin()) || @@ -1008,7 +1041,8 @@ */ BOOST_AUTO_TEST_CASE(MultipleRandomDimensionSelectTest) { - MultipleRandomDimensionSelect<5> r(10); + MultipleRandomDimensionSelect r(5); + r.Dimensions() = 10; // Make sure we get five elements. BOOST_REQUIRE_LT(r.Begin(), 10); @@ -1024,7 +1058,8 @@ */ BOOST_AUTO_TEST_CASE(MultipleRandomDimensionAllSelectTest) { - MultipleRandomDimensionSelect<3> r(3); + MultipleRandomDimensionSelect r(3); + r.Dimensions() = 3; bool found[3]; found[0] = found[1] = found[2] = false; diff -Nru mlpack-3.1.0/src/mlpack/tests/fastmks_test.cpp mlpack-3.1.1/src/mlpack/tests/fastmks_test.cpp --- mlpack-3.1.0/src/mlpack/tests/fastmks_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/fastmks_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -325,7 +325,7 @@ arma::mat data = arma::randu(5, 5); FastMKSModel m(FastMKSModel::LINEAR_KERNEL); - BOOST_REQUIRE_THROW(m.BuildModel(data, pk, false, false, 2.0), + BOOST_REQUIRE_THROW(m.BuildModel(std::move(data), pk, false, false, 2.0), std::invalid_argument); } @@ -334,6 +334,9 @@ { LinearKernel lk; arma::mat referenceData = arma::randu(10, 100); + arma::mat referenceCopy1(referenceData); + arma::mat referenceCopy2(referenceData); + arma::mat referenceCopy3(referenceData); FastMKS f(referenceData, lk); @@ -341,9 +344,9 @@ FastMKSModel mNaive(FastMKSModel::LINEAR_KERNEL); FastMKSModel mSingle(FastMKSModel::LINEAR_KERNEL); - m.BuildModel(referenceData, lk, false, false, 2.0); - mNaive.BuildModel(referenceData, lk, false, true, 2.0); - mSingle.BuildModel(referenceData, lk, true, false, 2.0); + m.BuildModel(std::move(referenceCopy1), lk, false, false, 2.0); + mNaive.BuildModel(std::move(referenceCopy2), lk, false, true, 2.0); + mSingle.BuildModel(std::move(referenceCopy3), lk, true, false, 2.0); // Now search, first monochromatically. arma::Mat indices, mIndices, mNaiveIndices, mSingleIndices; @@ -440,6 +443,9 @@ { PolynomialKernel pk(2.0); arma::mat referenceData = arma::randu(10, 100); + arma::mat referenceCopy1(referenceData); + arma::mat referenceCopy2(referenceData); + arma::mat referenceCopy3(referenceData); FastMKS f(referenceData, pk); @@ -447,9 +453,9 @@ FastMKSModel mNaive(FastMKSModel::POLYNOMIAL_KERNEL); FastMKSModel mSingle(FastMKSModel::POLYNOMIAL_KERNEL); - m.BuildModel(referenceData, pk, false, false, 2.0); - mNaive.BuildModel(referenceData, pk, false, true, 2.0); - mSingle.BuildModel(referenceData, pk, true, false, 2.0); + m.BuildModel(std::move(referenceCopy1), pk, false, false, 2.0); + mNaive.BuildModel(std::move(referenceCopy2), pk, false, true, 2.0); + mSingle.BuildModel(std::move(referenceCopy3), pk, true, false, 2.0); // Now search, first monochromatically. arma::Mat indices, mIndices, mNaiveIndices, mSingleIndices; @@ -546,6 +552,9 @@ { CosineDistance ck; arma::mat referenceData = arma::randu(10, 100); + arma::mat referenceCopy1(referenceData); + arma::mat referenceCopy2(referenceData); + arma::mat referenceCopy3(referenceData); FastMKS f(referenceData, ck); @@ -553,9 +562,9 @@ FastMKSModel mNaive(FastMKSModel::COSINE_DISTANCE); FastMKSModel mSingle(FastMKSModel::COSINE_DISTANCE); - m.BuildModel(referenceData, ck, false, false, 2.0); - mNaive.BuildModel(referenceData, ck, false, true, 2.0); - mSingle.BuildModel(referenceData, ck, true, false, 2.0); + m.BuildModel(std::move(referenceCopy1), ck, false, false, 2.0); + mNaive.BuildModel(std::move(referenceCopy2), ck, false, true, 2.0); + mSingle.BuildModel(std::move(referenceCopy3), ck, true, false, 2.0); // Now search, first monochromatically. arma::Mat indices, mIndices, mNaiveIndices, mSingleIndices; @@ -652,6 +661,9 @@ { GaussianKernel gk(1.5); arma::mat referenceData = arma::randu(10, 100); + arma::mat referenceCopy1(referenceData); + arma::mat referenceCopy2(referenceData); + arma::mat referenceCopy3(referenceData); FastMKS f(referenceData, gk); @@ -659,9 +671,9 @@ FastMKSModel mNaive(FastMKSModel::GAUSSIAN_KERNEL); FastMKSModel mSingle(FastMKSModel::GAUSSIAN_KERNEL); - m.BuildModel(referenceData, gk, false, false, 2.0); - mNaive.BuildModel(referenceData, gk, false, true, 2.0); - mSingle.BuildModel(referenceData, gk, true, false, 2.0); + m.BuildModel(std::move(referenceCopy1), gk, false, false, 2.0); + mNaive.BuildModel(std::move(referenceCopy2), gk, false, true, 2.0); + mSingle.BuildModel(std::move(referenceCopy3), gk, true, false, 2.0); // Now search, first monochromatically. arma::Mat indices, mIndices, mNaiveIndices, mSingleIndices; @@ -758,6 +770,9 @@ { EpanechnikovKernel ek(2.5); arma::mat referenceData = arma::randu(10, 100); + arma::mat referenceCopy1(referenceData); + arma::mat referenceCopy2(referenceData); + arma::mat referenceCopy3(referenceData); FastMKS f(referenceData, ek); @@ -765,9 +780,9 @@ FastMKSModel mNaive(FastMKSModel::EPANECHNIKOV_KERNEL); FastMKSModel mSingle(FastMKSModel::EPANECHNIKOV_KERNEL); - m.BuildModel(referenceData, ek, false, false, 2.0); - mNaive.BuildModel(referenceData, ek, false, true, 2.0); - mSingle.BuildModel(referenceData, ek, true, false, 2.0); + m.BuildModel(std::move(referenceCopy1), ek, false, false, 2.0); + mNaive.BuildModel(std::move(referenceCopy2), ek, false, true, 2.0); + mSingle.BuildModel(std::move(referenceCopy3), ek, true, false, 2.0); // Now search, first monochromatically. arma::Mat indices, mIndices, mNaiveIndices, mSingleIndices; @@ -864,6 +879,9 @@ { TriangularKernel tk(2.0); arma::mat referenceData = arma::randu(10, 100); + arma::mat referenceCopy1(referenceData); + arma::mat referenceCopy2(referenceData); + arma::mat referenceCopy3(referenceData); FastMKS f(referenceData, tk); @@ -871,9 +889,9 @@ FastMKSModel mNaive(FastMKSModel::TRIANGULAR_KERNEL); FastMKSModel mSingle(FastMKSModel::TRIANGULAR_KERNEL); - m.BuildModel(referenceData, tk, false, false, 2.0); - mNaive.BuildModel(referenceData, tk, false, true, 2.0); - mSingle.BuildModel(referenceData, tk, true, false, 2.0); + m.BuildModel(std::move(referenceCopy1), tk, false, false, 2.0); + mNaive.BuildModel(std::move(referenceCopy2), tk, false, true, 2.0); + mSingle.BuildModel(std::move(referenceCopy3), tk, true, false, 2.0); // Now search, first monochromatically. arma::Mat indices, mIndices, mNaiveIndices, mSingleIndices; diff -Nru mlpack-3.1.0/src/mlpack/tests/gan_test.cpp mlpack-3.1.1/src/mlpack/tests/gan_test.cpp --- mlpack-3.1.0/src/mlpack/tests/gan_test.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/gan_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -0,0 +1,249 @@ +/** + * @file gan_test.cpp + * @author Kris Singh + * @author Shikhar Jaiswal + * + * Tests the GAN network. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include "test_tools.hpp" + +using namespace mlpack; +using namespace mlpack::ann; +using namespace mlpack::math; +using namespace mlpack::regression; +using namespace std::placeholders; + +BOOST_AUTO_TEST_SUITE(GANNetworkTest); + +/* + * Load pre trained network values + * for generating distribution that + * is close to N(4, 0.5) + */ +BOOST_AUTO_TEST_CASE(GANTest) +{ + size_t generatorHiddenLayerSize = 8; + size_t discriminatorHiddenLayerSize = 8; + size_t generatorOutputSize = 1; + size_t discriminatorOutputSize = 1; + size_t discriminatorPreTrain = 0; + size_t batchSize = 8; + size_t noiseDim = 1; + size_t generatorUpdateStep = 1; + size_t numSamples = 10000; + double multiplier = 1; + + arma::mat trainData(1, 10000); + trainData.imbue( [&]() { return arma::as_scalar(RandNormal(4, 0.5));}); + trainData = arma::sort(trainData); + + // Create the Discriminator network + FFN > discriminator; + discriminator.Add > ( + generatorOutputSize, discriminatorHiddenLayerSize * 2); + discriminator.Add >(); + discriminator.Add > ( + discriminatorHiddenLayerSize * 2, discriminatorHiddenLayerSize * 2); + discriminator.Add >(); + discriminator.Add > ( + discriminatorHiddenLayerSize * 2, discriminatorHiddenLayerSize * 2); + discriminator.Add >(); + discriminator.Add > ( + discriminatorHiddenLayerSize * 2, discriminatorOutputSize); + + // Create the Generator network + FFN > generator; + generator.Add >(noiseDim, generatorHiddenLayerSize); + generator.Add >(); + generator.Add >(generatorHiddenLayerSize, generatorOutputSize); + + // Create GAN + GaussianInitialization gaussian(0, 0.1); + std::function noiseFunction = [](){ return math::Random(-8, 8) + + math::RandNormal(0, 1) * 0.01;}; + GAN >, + GaussianInitialization, + std::function > + gan(trainData, generator, discriminator, gaussian, noiseFunction, + noiseDim, batchSize, generatorUpdateStep, discriminatorPreTrain, + multiplier); + gan.Reset(); + + Log::Info << "Loading Parameters" << std::endl; + arma::mat parameters, generatorParameters; + parameters.load("preTrainedGAN.arm"); + gan.Parameters() = parameters; + + // Generate samples + Log::Info << "Sampling..." << std::endl; + arma::mat noise(noiseDim, batchSize); + + size_t dim = std::sqrt(trainData.n_rows); + arma::mat generatedData(2 * dim, dim * numSamples); + + for (size_t i = 0; i < numSamples; i++) + { + arma::mat samples; + noise.imbue( [&]() { return noiseFunction(); } ); + + gan.Generator().Forward(noise, samples); + samples.reshape(dim, dim); + samples = samples.t(); + + generatedData.submat(0, i * dim, dim - 1, i * dim + dim - 1) = samples; + + samples = trainData.col(math::RandInt(0, trainData.n_cols)); + samples.reshape(dim, dim); + samples = samples.t(); + + generatedData.submat(dim, + i * dim, 2 * dim - 1, i * dim + dim - 1) = samples; + } + + double generatedMean = arma::as_scalar(arma::mean( + generatedData.rows(0, dim - 1), 1)); + double originalMean = arma::as_scalar(arma::mean( + generatedData.rows(dim, 2 * dim - 1), 1)); + double generatedStd = arma::as_scalar(arma::stddev( + generatedData.rows(0, dim - 1), 0, 1)); + double originalStd = arma::as_scalar(arma::stddev( + generatedData.rows(dim, 2 * dim - 1), 0, 1)); + + BOOST_REQUIRE_LE(generatedMean - originalMean, 0.2); + BOOST_REQUIRE_LE(generatedStd - originalStd, 0.2); +} + +/* + * Tests the GAN implementation of the O'Reilly Test on the MNIST dataset. + * It's not viable to train on bigger parameters due to time constraints. + * Please refer mlpack/models repository for the tutorial. + */ +BOOST_AUTO_TEST_CASE(GANMNISTTest) +{ + size_t dNumKernels = 32; + size_t discriminatorPreTrain = 5; + size_t batchSize = 5; + size_t noiseDim = 100; + size_t generatorUpdateStep = 1; + size_t numSamples = 10; + double stepSize = 0.0003; + double eps = 1e-8; + size_t numEpoches = 1; + double tolerance = 1e-5; + int datasetMaxCols = 10; + bool shuffle = true; + double multiplier = 10; + + Log::Info << std::boolalpha + << " batchSize = " << batchSize << std::endl + << " generatorUpdateStep = " << generatorUpdateStep << std::endl + << " noiseDim = " << noiseDim << std::endl + << " numSamples = " << numSamples << std::endl + << " stepSize = " << stepSize << std::endl + << " numEpoches = " << numEpoches << std::endl + << " tolerance = " << tolerance << std::endl + << " shuffle = " << shuffle << std::endl; + + arma::mat trainData; + trainData.load("mnist_first250_training_4s_and_9s.arm"); + Log::Info << arma::size(trainData) << std::endl; + + trainData = trainData.cols(0, datasetMaxCols - 1); + + size_t numIterations = trainData.n_cols * numEpoches; + numIterations /= batchSize; + + Log::Info << "Dataset loaded (" << trainData.n_rows << ", " + << trainData.n_cols << ")" << std::endl; + Log::Info << trainData.n_rows << "--------" << trainData.n_cols << std::endl; + + // Create the Discriminator network + FFN > discriminator; + discriminator.Add >(1, dNumKernels, 5, 5, 1, 1, 2, 2, 28, 28); + discriminator.Add >(); + discriminator.Add >(2, 2, 2, 2); + discriminator.Add >(dNumKernels, 2 * dNumKernels, 5, 5, 1, 1, + 2, 2, 14, 14); + discriminator.Add >(); + discriminator.Add >(2, 2, 2, 2); + discriminator.Add >(7 * 7 * 2 * dNumKernels, 1024); + discriminator.Add >(); + discriminator.Add >(1024, 1); + + // Create the Generator network + FFN > generator; + generator.Add >(noiseDim, 3136); + generator.Add >(3136); + generator.Add >(); + generator.Add >(1, noiseDim / 2, 3, 3, 2, 2, 1, 1, 56, 56); + generator.Add >(39200); + generator.Add >(); + generator.Add >(28, 28, 56, 56, noiseDim / 2); + generator.Add >(noiseDim / 2, noiseDim / 4, 3, 3, 2, 2, 1, 1, + 56, 56); + generator.Add >(19600); + generator.Add >(); + generator.Add >(28, 28, 56, 56, noiseDim / 4); + generator.Add >(noiseDim / 4, 1, 3, 3, 2, 2, 1, 1, 56, 56); + generator.Add >(); + + // Create GAN + GaussianInitialization gaussian(0, 1); + ens::Adam optimizer(stepSize, batchSize, 0.9, 0.999, eps, numIterations, + tolerance, shuffle); + std::function noiseFunction = [] () { + return math::RandNormal(0, 1);}; + GAN >, GaussianInitialization, + std::function > gan(trainData, generator, discriminator, + gaussian, noiseFunction, noiseDim, batchSize, generatorUpdateStep, + discriminatorPreTrain, multiplier); + + Log::Info << "Training..." << std::endl; + double objVal = gan.Train(optimizer); + BOOST_REQUIRE_EQUAL(std::isfinite(objVal), true); + + // Generate samples. + Log::Info << "Sampling..." << std::endl; + arma::mat noise(noiseDim, batchSize); + size_t dim = std::sqrt(trainData.n_rows); + arma::mat generatedData(2 * dim, dim * numSamples); + + for (size_t i = 0; i < numSamples; i++) + { + arma::mat samples; + noise.imbue( [&]() { return noiseFunction(); } ); + + gan.Generator().Forward(noise, samples); + samples.reshape(dim, dim); + samples = samples.t(); + + generatedData.submat(0, i * dim, dim - 1, i * dim + dim - 1) = samples; + + samples = trainData.col(math::RandInt(0, trainData.n_cols)); + samples.reshape(dim, dim); + samples = samples.t(); + + generatedData.submat(dim, + i * dim, 2 * dim - 1, i * dim + dim - 1) = samples; + } + + Log::Info << "Output generated!" << std::endl; +} + +BOOST_AUTO_TEST_SUITE_END(); diff -Nru mlpack-3.1.0/src/mlpack/tests/load_save_test.cpp mlpack-3.1.1/src/mlpack/tests/load_save_test.cpp --- mlpack-3.1.0/src/mlpack/tests/load_save_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/load_save_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -14,7 +14,6 @@ #include #include #include - #include #include "test_tools.hpp" @@ -907,6 +906,30 @@ #endif /** + * Test one hot encoding. + */ +BOOST_AUTO_TEST_CASE(OneHotEncodingTest) +{ + arma::Mat matrix; + matrix = "1 0;" + "0 1;" + "1 0;" + "1 0;" + "1 0;" + "1 0;" + "0 1;" + "1 0;"; +// Output matrix to save onehotencoding results. + arma::Mat output; + arma::irowvec labels("-1 1 -1 -1 -1 -1 1 -1"); + data::OneHotEncoding(labels, output); + + BOOST_REQUIRE_EQUAL(matrix.n_cols, output.n_cols); + BOOST_REQUIRE_EQUAL(matrix.n_rows, output.n_rows); + CheckMatrices(output, matrix); +} + +/** * Test normalization of labels. */ BOOST_AUTO_TEST_CASE(NormalizeLabelSmallDatasetTest) diff -Nru mlpack-3.1.0/src/mlpack/tests/main_tests/adaboost_test.cpp mlpack-3.1.1/src/mlpack/tests/main_tests/adaboost_test.cpp --- mlpack-3.1.0/src/mlpack/tests/main_tests/adaboost_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/main_tests/adaboost_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -207,6 +207,29 @@ } /** + * This test can be removed in mlpack 4.0.0. This tests that the output and + * predictions outputs are the same. + */ +BOOST_AUTO_TEST_CASE(AdaBoostOutputPredictionsTest) +{ + arma::mat trainData; + if (!data::Load("vc2.csv", trainData)) + BOOST_FAIL("Unable to load train dataset vc2.csv!"); + + arma::Row labels; + if (!data::Load("vc2_labels.txt", labels)) + BOOST_FAIL("Unable to load label dataset vc2_labels.txt!"); + + SetInputParam("training", std::move(trainData)); + SetInputParam("labels", std::move(labels)); + + mlpackMain(); + + CheckMatrices(CLI::GetParam>("output"), + CLI::GetParam>("predictions")); +} + +/** * Weak learner should be either Decision Stump or Perceptron. */ BOOST_AUTO_TEST_CASE(AdaBoostWeakLearnerTest) diff -Nru mlpack-3.1.0/src/mlpack/tests/main_tests/fastmks_test.cpp mlpack-3.1.1/src/mlpack/tests/main_tests/fastmks_test.cpp --- mlpack-3.1.0/src/mlpack/tests/main_tests/fastmks_test.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/main_tests/fastmks_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -0,0 +1,674 @@ +/** + * @file fastmks_test.cpp + * @author Yashwant Singh + * @author Prabhat Sharma + * + * Test mlpackMain() of fastmks_main.cpp. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#include + +#define BINDING_TYPE BINDING_TYPE_TEST +static const std::string testName = "FastMaxKernelSearch"; + +#include +#include +#include "test_helper.hpp" +#include + +#include +#include "../test_tools.hpp" + +using namespace mlpack; + +struct FastMKSTestFixture +{ + public: + FastMKSTestFixture() + { + // Cache in the options for this program. + CLI::RestoreSettings(testName); + } + + ~FastMKSTestFixture() + { + // Clear the settings. + bindings::tests::CleanMemory(); + CLI::ClearSettings(); + } +}; + +BOOST_FIXTURE_TEST_SUITE(FastMKSMainTest, FastMKSTestFixture); + +/* + * Check that we can't provide reference and query matrices + * with different dimensions. + */ +BOOST_AUTO_TEST_CASE(FastMKSEqualDimensionTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + + // Now we specify an invalid dimension(2) for the query data. + // Note that the number of points in query and reference matrices + // are allowed to be different + // 90 points in 2 dimensions. + arma::mat queryData(2, 90, arma::fill::randu); + + // Random input, some k <= number of reference points. + SetInputParam("reference", std::move(referenceData)); + SetInputParam("query", std::move(queryData)); + SetInputParam("k", (int) 4); + + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::invalid_argument); + Log::Fatal.ignoreInput = false; +} + +/* + * Check that we can't specify an invalid k when only reference + * matrix is given. + */ +BOOST_AUTO_TEST_CASE(FastMKSInvalidKTest) +{ + // 50 points in 3 dimensions. + arma::mat referenceData(3, 50, arma::fill::randu); + + // Random input, some k > number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("k", (int) 51); // Invalid + + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::invalid_argument); + Log::Fatal.ignoreInput = false; +} + +/* + * Check that we can't specify an invalid k when both reference + * and query matrices are given. + */ +BOOST_AUTO_TEST_CASE(FastMKSInvalidKQueryDataTest) +{ + // 50 points in 3 dimensions. + arma::mat referenceData(3, 50, arma::fill::randu); + // 10 points in 3 dimensions. + arma::mat queryData(3, 10, arma::fill::randu); + + // Random input, some k > number of reference points. + SetInputParam("reference", std::move(referenceData)); + SetInputParam("query", std::move(queryData)); + SetInputParam("k", (int) 51); + + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::invalid_argument); + Log::Fatal.ignoreInput = false; +} + +/* + * Check that we can't pass both input_model and reference matrix. + */ +BOOST_AUTO_TEST_CASE(FastMKSRefModelTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("k", (int) 10); + + mlpackMain(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + SetInputParam("reference", std::move(referenceData)); + // Input pre-trained model. + SetInputParam("input_model", + std::move(CLI::GetParam("output_model"))); + + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); + Log::Fatal.ignoreInput = false; +} + +/* + * Check that we can't pass an invalid kernel. + */ +BOOST_AUTO_TEST_CASE(FastMKSInvalidKernelTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + string kernelName = "dummy"; + + // Random input, some k <= number of reference points. + SetInputParam("reference", std::move(referenceData)); + SetInputParam("k", (int) 10); + SetInputParam("kernel", std::move(kernelName)); // Invalid. + + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); + Log::Fatal.ignoreInput = false; +} + +/** + * Make sure that dimensions of the indices and kernel + * matrices are correct given a value of k. + */ +BOOST_AUTO_TEST_CASE(FastMKSOutputDimensionTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + + // Random input, some k <= number of reference points. + SetInputParam("reference", std::move(referenceData)); + SetInputParam("k", (int) 10); + + mlpackMain(); + + // Check the indices matrix has 10 points for each input point. + BOOST_REQUIRE_EQUAL(CLI::GetParam> + ("indices").n_rows, 10); + BOOST_REQUIRE_EQUAL(CLI::GetParam> + ("indices").n_cols, 100); + + // Check the kernel matrix has 10 points for each input point. + BOOST_REQUIRE_EQUAL(CLI::GetParam("kernels").n_rows, 10); + BOOST_REQUIRE_EQUAL(CLI::GetParam("kernels").n_cols, 100); +} + +/** + * Ensure that saved model can be used again. + */ +BOOST_AUTO_TEST_CASE(FastMKSModelReuseTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + // 90 points in 3 dimensions. + arma::mat queryData(3, 90, arma::fill::randu); + + // Random input, some k <= number of reference points. + SetInputParam("reference", std::move(referenceData)); + SetInputParam("query", queryData); + + mlpackMain(); + + arma::Mat indices; + arma::mat kernel; + FastMKSModel* output_model; + indices = std::move(CLI::GetParam>("indices")); + kernel = std::move(CLI::GetParam("kernels")); + output_model = std::move(CLI::GetParam("output_model")); + + // Reset passed parameters. + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["query"].wasPassed = false; + + // Input saved model, pass the same query and keep k unchanged. + SetInputParam("input_model", output_model); + SetInputParam("query", queryData); + + mlpackMain(); + + // Check that initial output matrices and the output matrices using + // saved model are equal. + CheckMatrices(indices, CLI::GetParam>("indices")); + CheckMatrices(kernel, CLI::GetParam("kernels")); +} + +/* + * Ensure that reference dataset gives the same result when passed as + * a query dataset + */ +BOOST_AUTO_TEST_CASE(FastMKSQueryRefTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("query", referenceData); + SetInputParam("k", (int) 10); + + mlpackMain(); + + arma::Mat indices; + arma::mat kernel; + indices = std::move(CLI::GetParam>("indices")); + kernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["query"].wasPassed = false; + + + SetInputParam("reference", referenceData); + SetInputParam("query", referenceData); + + mlpackMain(); + + CheckMatrices(indices, + CLI::GetParam>("indices")); + CheckMatrices(kernel, + CLI::GetParam("kernels")); +} + +/* + * Ensure that naive mode returns the same result as tree mode. + */ +BOOST_AUTO_TEST_CASE(FastMKSNaiveModeTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("k", (int) 10); + + mlpackMain(); + + arma::Mat indices; + arma::mat kernel; + indices = std::move(CLI::GetParam>("indices")); + kernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["k"].wasPassed = false; + + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("k", (int) 10); + SetInputParam("naive", true); + + mlpackMain(); + + CheckMatrices(indices, + CLI::GetParam>("indices")); + CheckMatrices(kernel, + CLI::GetParam("kernels")); +} + +/* + * Ensure that single-tree search returns the same result as dual-tree search. + */ +BOOST_AUTO_TEST_CASE(FastMKSTreeTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("k", (int) 10); + + mlpackMain(); + + arma::Mat indices; + arma::mat kernel; + indices = std::move(CLI::GetParam>("indices")); + kernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["k"].wasPassed = false; + + SetInputParam("reference", std::move(referenceData)); + SetInputParam("k", (int) 10); + SetInputParam("single", true); + + mlpackMain(); + + CheckMatrices(indices, + CLI::GetParam>("indices")); + CheckMatrices(kernel, + CLI::GetParam("kernels")); +} + +/* + * Ensure that we get almost same results in cover tree search mode when + * different basis is specified. + */ +BOOST_AUTO_TEST_CASE(FastMKSBasisTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("k", (int) 10); + SetInputParam("base", 3.0); + + mlpackMain(); + + arma::Mat indices; + arma::mat kernel; + indices = std::move(CLI::GetParam>("indices")); + kernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["k"].wasPassed = false; + + SetInputParam("reference", std::move(referenceData)); + SetInputParam("k", (int) 10); + SetInputParam("base", 4.0); + + mlpackMain(); + + arma::Mat newindices; + arma::mat newkernel; + newindices = std::move(CLI::GetParam>("indices")); + newkernel = std::move(CLI::GetParam("kernels")); + + CheckMatrices(indices, newindices); + CheckMatrices(kernel, newkernel); +} + +/** + * Check that we can't specify base less than 1. + */ +BOOST_AUTO_TEST_CASE(FastMKSBaseTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + + // Random input, invalid base. + SetInputParam("reference", std::move(referenceData)); + SetInputParam("k", (int) 10); + SetInputParam("base", 0.0); // Invalid. + + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::invalid_argument); + Log::Fatal.ignoreInput = false; +} + +/** + * Ensure that different kernels returns different results. + */ +BOOST_AUTO_TEST_CASE(FastMKSKernelTest) +{ + std::string kerneltypes[] = {"polynomial", "cosine", "gaussian", + "epanechnikov", "triangular", "hyptan"}; + const int nofkerneltypes = 6; + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + // 90 points in 3 dimensions. + arma::mat queryData(3, 90, arma::fill::randu); + // Keep some k <= number of reference points same over all. + SetInputParam("k", (int) 10); + // For Hyptan Kernel + arma::mat inputData; + if (!data::Load("data_3d_mixed.txt", inputData)) + BOOST_FAIL("Cannot load test dataset data_3d_ind.txt!"); + + arma::Mat indicesCompare; + arma::mat kernelsCompare; + + arma::Mat indices; + arma::mat kernels; + + // Looping over all the kernels + for (size_t i = 0; i < nofkerneltypes; i++) + { + if (kerneltypes[i] == "hyptan") + { + // Same random inputs, different algorithms. + SetInputParam("reference", inputData); + SetInputParam("query", inputData); + SetInputParam("kernel", kerneltypes[i]); + } + else + { + // Same random inputs, different algorithms. + SetInputParam("reference", referenceData); + SetInputParam("query", queryData); + SetInputParam("kernel", kerneltypes[i]); + } + mlpackMain(); + + if (i == 0) + { + indicesCompare = + std::move(CLI::GetParam>("indices")); + kernelsCompare = std::move(CLI::GetParam("kernels")); + } + else + { + indices = std::move(CLI::GetParam>("indices")); + kernels = std::move(CLI::GetParam("kernels")); + + CheckMatricesNotEqual(indicesCompare, indices); + CheckMatricesNotEqual(kernelsCompare, kernels); + } + + // Reset passed parameters. + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["query"].wasPassed = false; + CLI::GetSingleton().Parameters()["kernel"].wasPassed = false; + } +} + +/** + * Ensure that offset affects the final result of polynomial and hyptan kernel. + */ +BOOST_AUTO_TEST_CASE(FastMKSOffsetTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("k", (int) 10); + SetInputParam("kernel", (string)"polynomial"); + SetInputParam("offset", 1.0); + + mlpackMain(); + + arma::mat polyKernel; + polyKernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["offset"].wasPassed = false; + + SetInputParam("reference", referenceData); + SetInputParam("offset", 4.0); + + mlpackMain(); + + CheckMatricesNotEqual(polyKernel, + CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + arma::mat inputData; + if (!data::Load("data_3d_mixed.txt", inputData)) + BOOST_FAIL("Cannot load test dataset data_3d_ind.txt!"); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["kernel"].wasPassed = false; + CLI::GetSingleton().Parameters()["offset"].wasPassed = false; + + SetInputParam("reference", inputData); + SetInputParam("kernel", (std::string)"hyptan"); + SetInputParam("offset", 1.0); + + mlpackMain(); + + arma::mat hyptanKernel; + hyptanKernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["offset"].wasPassed = false; + + SetInputParam("reference", inputData); + SetInputParam("offset", 4.0); + mlpackMain(); + + CheckMatricesNotEqual(hyptanKernel, + CLI::GetParam("kernels")); +} + +/** + * Ensure that degree affects the final result of polynomial kernel. + */ +BOOST_AUTO_TEST_CASE(FastMKSDegreeTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("k", (int) 10); + SetInputParam("kernel", (string)"polynomial"); + SetInputParam("degree", 2.0); // Default value. + + mlpackMain(); + + arma::mat polyKernel; + polyKernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["degree"].wasPassed = false; + + SetInputParam("reference", referenceData); + SetInputParam("degree", 4.0); + + mlpackMain(); + + CheckMatricesNotEqual(polyKernel, + CLI::GetParam("kernels")); +} + +/** + * Ensure that scale affects the final result of hyptan kernel. + */ +BOOST_AUTO_TEST_CASE(FastMKSScaleTest) +{ + arma::mat inputData; + if (!data::Load("data_3d_mixed.txt", inputData)) + BOOST_FAIL("Cannot load test dataset data_3d_ind.txt!"); + + // Random input, some k <= number of reference points. + SetInputParam("reference", inputData); + SetInputParam("k", (int) 10); + SetInputParam("kernel", (std::string)"hyptan"); + SetInputParam("scale", 1.0); // Default value. + + mlpackMain(); + + arma::mat hyptanKernel; + hyptanKernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["scale"].wasPassed = false; + + SetInputParam("reference", inputData); + SetInputParam("scale", 1.5); + + mlpackMain(); + + CheckMatricesNotEqual(hyptanKernel, + CLI::GetParam("kernels")); +} + +/** + * Ensure that bandwidth affects the final result of Gaussian, Epanechnikov, and + * triangular kernel. + */ +BOOST_AUTO_TEST_CASE(FastMKSBandwidthTest) +{ + // 100 points in 3 dimensions. + arma::mat referenceData(3, 100, arma::fill::randu); + + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("k", (int) 10); + SetInputParam("kernel", (string)"gaussian"); + SetInputParam("bandwidth", 1.0); // Default value. + + mlpackMain(); + + arma::mat gaussianKernel; + gaussianKernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["bandwidth"].wasPassed = false; + + SetInputParam("reference", referenceData); + SetInputParam("bandwidth", 4.0); + + mlpackMain(); + CheckMatricesNotEqual(gaussianKernel, + CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["bandwidth"].wasPassed = false; + CLI::GetSingleton().Parameters()["kernel"].wasPassed = false; + + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("kernel", (string)"epanechnikov"); + SetInputParam("bandwidth", 1.0); // Default value. + + mlpackMain(); + + arma::mat epanKernel; + epanKernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["bandwidth"].wasPassed = false; + + SetInputParam("reference", referenceData); + SetInputParam("bandwidth", 4.0); + + mlpackMain(); + CheckMatricesNotEqual(epanKernel, + CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["bandwidth"].wasPassed = false; + CLI::GetSingleton().Parameters()["kernel"].wasPassed = false; + + // Random input, some k <= number of reference points. + SetInputParam("reference", referenceData); + SetInputParam("kernel", (string)"triangular"); + SetInputParam("bandwidth", 1.0); // Default value. + + mlpackMain(); + + arma::mat triKernel; + triKernel = std::move(CLI::GetParam("kernels")); + + bindings::tests::CleanMemory(); + + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + CLI::GetSingleton().Parameters()["bandwidth"].wasPassed = false; + + SetInputParam("reference", referenceData); + SetInputParam("bandwidth", 4.0); + + mlpackMain(); + + CheckMatricesNotEqual(triKernel, + CLI::GetParam("kernels")); +} + +BOOST_AUTO_TEST_SUITE_END(); diff -Nru mlpack-3.1.0/src/mlpack/tests/main_tests/perceptron_test.cpp mlpack-3.1.1/src/mlpack/tests/main_tests/perceptron_test.cpp --- mlpack-3.1.0/src/mlpack/tests/main_tests/perceptron_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/main_tests/perceptron_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -163,6 +163,39 @@ } /** + * This test can be removed in mlpack 4.0.0. This tests that the output and + * predictions outputs are the same. + */ +BOOST_AUTO_TEST_CASE(PerceptronOutputPredictionsCheck) +{ + arma::mat trainX1; + arma::Row labelsX1; + + // Loading a train data set with 3 classes. + if (!data::Load("vc2.csv", trainX1)) + { + BOOST_FAIL("Could not load the train data (vc2.csv)"); + } + + // Loading the corresponding labels to the dataset. + if (!data::Load("vc2_labels.txt", labelsX1)) + { + BOOST_FAIL("Could not load the train data (vc2_labels.csv)"); + } + + SetInputParam("training", std::move(trainX1)); // Training data. + // Labels for the training data. + SetInputParam("labels", std::move(labelsX1)); + + // Training model using first training dataset. + mlpackMain(); + + // Check that the outputs are the same. + CheckMatrices(CLI::GetParam>("output"), + CLI::GetParam>("predictions")); +} + +/** * Ensure that saved model can be used again. */ BOOST_AUTO_TEST_CASE(PerceptronModelReuseTest) diff -Nru mlpack-3.1.0/src/mlpack/tests/main_tests/random_forest_test.cpp mlpack-3.1.1/src/mlpack/tests/main_tests/random_forest_test.cpp --- mlpack-3.1.0/src/mlpack/tests/main_tests/random_forest_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/main_tests/random_forest_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -215,7 +215,7 @@ } /** - * Ensure that training accuracy goes up as minimum_leaf_size decreases. + * Ensure that training accuracy changes as minimum_leaf_size decreases. */ BOOST_AUTO_TEST_CASE(RandomForestDiffMinLeafSizeTest) { @@ -228,62 +228,71 @@ if (!data::Load("vc2_labels.txt", labels)) BOOST_FAIL("Cannot load labels for vc2_labels.txt"); - // Input training data. - SetInputParam("training", inputData); - SetInputParam("labels", labels); - SetInputParam("minimum_leaf_size", (int) 20); + bool success = false; + for (size_t trial = 0; trial < 3; ++trial) + { + // Input training data. + SetInputParam("training", inputData); + SetInputParam("labels", labels); + SetInputParam("minimum_leaf_size", (int) 20); - mlpackMain(); + mlpackMain(); - // Calculate training accuracy. - arma::Row predictions; - CLI::GetParam("output_model")->rf.Classify(inputData, - predictions); + // Calculate training accuracy. + arma::Row predictions; + CLI::GetParam("output_model")->rf.Classify(inputData, + predictions); - size_t correct = arma::accu(predictions == labels); - double accuracy20 = (double(correct) / double(labels.n_elem) * 100); + size_t correct = arma::accu(predictions == labels); + double accuracy20 = (double(correct) / double(labels.n_elem) * 100); - bindings::tests::CleanMemory(); + bindings::tests::CleanMemory(); - // Train for minimum leaf size 10. + // Train for minimum leaf size 10. - // Input training data. - SetInputParam("training", inputData); - SetInputParam("labels", labels); - SetInputParam("minimum_leaf_size", (int) 10); + // Input training data. + SetInputParam("training", inputData); + SetInputParam("labels", labels); + SetInputParam("minimum_leaf_size", (int) 10); - mlpackMain(); + mlpackMain(); - // Calculate training accuracy. - CLI::GetParam("output_model")->rf.Classify(inputData, - predictions); + // Calculate training accuracy. + CLI::GetParam("output_model")->rf.Classify(inputData, + predictions); - correct = arma::accu(predictions == labels); - double accuracy10 = (double(correct) / double(labels.n_elem) * 100); + correct = arma::accu(predictions == labels); + double accuracy10 = (double(correct) / double(labels.n_elem) * 100); - bindings::tests::CleanMemory(); + bindings::tests::CleanMemory(); - // Train for minimum leaf size 1. + // Train for minimum leaf size 1. - // Input training data. - SetInputParam("training", inputData); - SetInputParam("labels", labels); - SetInputParam("minimum_leaf_size", (int) 1); + // Input training data. + SetInputParam("training", inputData); + SetInputParam("labels", labels); + SetInputParam("minimum_leaf_size", (int) 1); - mlpackMain(); + mlpackMain(); - // Calculate training accuracy. - CLI::GetParam("output_model")->rf.Classify(inputData, - predictions); + // Calculate training accuracy. + CLI::GetParam("output_model")->rf.Classify(inputData, + predictions); - correct = arma::accu(predictions == labels); - double accuracy1 = (double(correct) / double(labels.n_elem) * 100); + correct = arma::accu(predictions == labels); + double accuracy1 = (double(correct) / double(labels.n_elem) * 100); - BOOST_REQUIRE(accuracy1 > accuracy10 && accuracy10 > accuracy20); + success = (accuracy1 != accuracy10 && accuracy10 != accuracy20); + if (success) + break; + } + + BOOST_REQUIRE_EQUAL(success, true); } /** - * Ensure that test accuracy goes up as num_trees increases. + * Ensure that test accuracy changes as num_trees increases (we aren't + * guaranteed that accuracy will go up). */ BOOST_AUTO_TEST_CASE(RandomForestDiffNumTreeTest) { @@ -304,56 +313,69 @@ if (!data::Load("vc2_test_labels.txt", testLabels)) BOOST_FAIL("Cannot load labels for vc2__test_labels.txt"); - // Input training data. - SetInputParam("training", inputData); - SetInputParam("labels", labels); - SetInputParam("num_trees", (int) 1); - - mlpackMain(); - - // Calculate training accuracy. - arma::Row predictions; - CLI::GetParam("output_model")->rf.Classify(testData, - predictions); - bindings::tests::CleanMemory(); - - size_t correct = arma::accu(predictions == testLabels); - double accuracy1 = (double(correct) / double(testLabels.n_elem) * 100); - - // Train for num_trees 5. - - // Input training data. - SetInputParam("training", inputData); - SetInputParam("labels", labels); - SetInputParam("num_trees", (int) 5); - - mlpackMain(); - - // Calculate training accuracy. - CLI::GetParam("output_model")->rf.Classify(testData, - predictions); - bindings::tests::CleanMemory(); - - correct = arma::accu(predictions == testLabels); - double accuracy5 = (double(correct) / double(testLabels.n_elem) * 100); - - // Train for num_trees 10. - - // Input training data. - SetInputParam("training", std::move(inputData)); - SetInputParam("labels", std::move(labels)); - SetInputParam("num_trees", (int) 10); - - mlpackMain(); - - // Calculate training accuracy. - CLI::GetParam("output_model")->rf.Classify(testData, - predictions); - - correct = arma::accu(predictions == testLabels); - double accuracy10 = (double(correct) / double(testLabels.n_elem) * 100); + bool success = false; + for (size_t trial = 0; trial < 3; ++trial) + { + // Input training data. + SetInputParam("training", inputData); + SetInputParam("labels", labels); + SetInputParam("num_trees", (int) 1); + SetInputParam("minimum_leaf_size", (int) 1); + + mlpackMain(); + + // Calculate training accuracy. + arma::Row predictions; + CLI::GetParam("output_model")->rf.Classify(testData, + predictions); + bindings::tests::CleanMemory(); + + size_t correct = arma::accu(predictions == testLabels); + double accuracy1 = (double(correct) / double(testLabels.n_elem) * 100); + + // Train for num_trees 5. + + // Input training data. + SetInputParam("training", inputData); + SetInputParam("labels", labels); + SetInputParam("num_trees", (int) 5); + SetInputParam("minimum_leaf_size", (int) 1); + + mlpackMain(); + + // Calculate training accuracy. + CLI::GetParam("output_model")->rf.Classify(testData, + predictions); + bindings::tests::CleanMemory(); + + correct = arma::accu(predictions == testLabels); + double accuracy5 = (double(correct) / double(testLabels.n_elem) * 100); + + // Train for num_trees 10. + + // Input training data. + SetInputParam("training", std::move(inputData)); + SetInputParam("labels", std::move(labels)); + SetInputParam("num_trees", (int) 10); + SetInputParam("minimum_leaf_size", (int) 1); + + mlpackMain(); + + // Calculate training accuracy. + CLI::GetParam("output_model")->rf.Classify(testData, + predictions); + + correct = arma::accu(predictions == testLabels); + double accuracy10 = (double(correct) / double(testLabels.n_elem) * 100); + + // It's possible this might not work just because of randomness. The VC2 + // dataset is not very large. + success = (accuracy10 != accuracy5 || accuracy5 != accuracy1); + if (success) + break; + } - BOOST_REQUIRE(accuracy10 >= accuracy5 && accuracy5 >= accuracy1); + BOOST_REQUIRE_EQUAL(success, true); } BOOST_AUTO_TEST_SUITE_END(); diff -Nru mlpack-3.1.0/src/mlpack/tests/q_learning_test.cpp mlpack-3.1.1/src/mlpack/tests/q_learning_test.cpp --- mlpack-3.1.0/src/mlpack/tests/q_learning_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/q_learning_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -49,7 +49,7 @@ model.Add>(128, 2); // Set up the policy and replay method. - GreedyPolicy policy(1.0, 1000, 0.1); + GreedyPolicy policy(1.0, 1000, 0.1, 0.99); RandomReplay replayMethod(10, 10000); TrainingConfig config; @@ -122,7 +122,7 @@ model.Add>(20, 2); // Set up the policy and replay method. - GreedyPolicy policy(1.0, 1000, 0.1); + GreedyPolicy policy(1.0, 1000, 0.1, 0.99); RandomReplay replayMethod(10, 10000); TrainingConfig config; @@ -191,7 +191,7 @@ model.Add>(32, 3); // Set up the policy and replay method. - GreedyPolicy policy(1.0, 1000, 0.1); + GreedyPolicy policy(1.0, 1000, 0.1, 0.99); RandomReplay replayMethod(20, 10000); TrainingConfig config; @@ -268,7 +268,7 @@ model.Add>(32, 3); // Set up the policy and replay method. - GreedyPolicy policy(1.0, 1000, 0.1); + GreedyPolicy policy(1.0, 1000, 0.1, 0.99); RandomReplay replayMethod(20, 10000); TrainingConfig config; diff -Nru mlpack-3.1.0/src/mlpack/tests/random_forest_test.cpp mlpack-3.1.1/src/mlpack/tests/random_forest_test.cpp --- mlpack-3.1.0/src/mlpack/tests/random_forest_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/random_forest_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -129,8 +129,7 @@ data::Load("vc2_labels.txt", labels); // Build a random forest and a decision tree. - RandomForest rf(dataset, labels, 3, - 10 /* 10 trees */, 5); + RandomForest<> rf(dataset, labels, 3, 20 /* 20 trees */, 1, 1e-7); DecisionTree<> dt(dataset, labels, 3, 5); // Get performance statistics on test data. @@ -149,7 +148,7 @@ size_t rfCorrect = arma::accu(rfPredictions == testLabels); size_t dtCorrect = arma::accu(dtPredictions == testLabels); - BOOST_REQUIRE_GE(rfCorrect, dtCorrect); + BOOST_REQUIRE_GE(rfCorrect, dtCorrect * 0.9); BOOST_REQUIRE_GE(rfCorrect, size_t(0.7 * testDataset.n_cols)); } @@ -182,8 +181,7 @@ weights[i] = math::Random(0.0, 0.01); // Low weights for false points. // Train decision tree and random forest. - RandomForest rf(dataset, labels, 3, weights, - 10, 5); + RandomForest<> rf(dataset, labels, 3, weights, 20, 1); DecisionTree<> dt(dataset, labels, 3, weights, 5); // Get performance statistics on test data. @@ -202,7 +200,7 @@ size_t rfCorrect = arma::accu(rfPredictions == testLabels); size_t dtCorrect = arma::accu(dtPredictions == testLabels); - BOOST_REQUIRE_GE(rfCorrect, dtCorrect); + BOOST_REQUIRE_GE(rfCorrect, dtCorrect * 0.9); BOOST_REQUIRE_GE(rfCorrect, size_t(0.7 * testDataset.n_cols)); } @@ -224,7 +222,8 @@ arma::Row testLabels = l.subvec(2000, 3999); // Train a random forest and a decision tree. - RandomForest<> rf(trainingData, di, trainingLabels, 5, 15 /* 15 trees */, 5); + RandomForest<> rf(trainingData, di, trainingLabels, 5, 25 /* 25 trees */, 1, + 1e-7, MultipleRandomDimensionSelect(4)); DecisionTree<> dt(trainingData, di, trainingLabels, 5, 5); // Get performance statistics on test data. @@ -238,7 +237,7 @@ size_t rfCorrect = arma::accu(rfPredictions == testLabels); size_t dtCorrect = arma::accu(dtPredictions == testLabels); - BOOST_REQUIRE_GE(rfCorrect, dtCorrect - 50); + BOOST_REQUIRE_GE(rfCorrect, dtCorrect - 25); BOOST_REQUIRE_GE(rfCorrect, size_t(0.7 * testData.n_cols)); } @@ -281,7 +280,8 @@ arma::Row fullLabels = arma::join_rows(trainingLabels, randomLabels); // Build a random forest and a decision tree. - RandomForest<> rf(fullData, di, fullLabels, 5, weights, 15 /* 15 trees */, 5); + RandomForest<> rf(fullData, di, fullLabels, 5, weights, 25 /* 25 trees */, 1, + 1e-7, MultipleRandomDimensionSelect(4)); DecisionTree<> dt(fullData, di, fullLabels, 5, weights, 5); // Get performance statistics on test data. @@ -295,34 +295,11 @@ size_t rfCorrect = arma::accu(rfPredictions == testLabels); size_t dtCorrect = arma::accu(dtPredictions == testLabels); - BOOST_REQUIRE_GE(rfCorrect, dtCorrect - 50); + BOOST_REQUIRE_GE(rfCorrect, dtCorrect - 25); BOOST_REQUIRE_GE(rfCorrect, size_t(0.7 * testData.n_cols)); } /** - * Test that learning with a leaf size of 1 successfully memorizes the training - * set. - */ -BOOST_AUTO_TEST_CASE(LeafSize1Test) -{ - // Load the vc2 dataset. - arma::mat dataset; - data::Load("vc2.csv", dataset); - arma::Row labels; - data::Load("vc2_labels.txt", labels); - - // Build a random forest with a leaf size of 1. - RandomForest<> rf(dataset, labels, 3, 10 /* 10 trees */, 1); - - // Predict on the training set. - arma::Row predictions; - rf.Classify(dataset, predictions); - - const size_t correct = arma::accu(predictions == labels); - BOOST_REQUIRE_EQUAL(correct, dataset.n_cols); -} - -/** * Test that a leaf size equal to the dataset size learns nothing. */ BOOST_AUTO_TEST_CASE(LeafSizeDatasetTest) @@ -337,24 +314,20 @@ // dataset. RandomForest<> rf(dataset, labels, 3, 10 /* 10 trees */, dataset.n_cols); - // Calculate majority probabilities. - arma::vec majorityProbs(3, arma::fill::zeros); - for (size_t i = 0; i < dataset.n_cols; ++i) - majorityProbs[labels[i]]++; - majorityProbs /= dataset.n_cols; - arma::uword max; - majorityProbs.max(max); - size_t majorityClass = (size_t) max; - // Predict on the training set. arma::Row predictions; arma::mat probabilities; rf.Classify(dataset, predictions, probabilities); + // We want to check that all the classes and probabilities are the same for + // all predictions. + size_t majorityClass = predictions[0]; + arma::vec majorityProbs = probabilities.col(0); + BOOST_REQUIRE_EQUAL(probabilities.n_rows, 3); BOOST_REQUIRE_EQUAL(probabilities.n_cols, dataset.n_cols); BOOST_REQUIRE_EQUAL(predictions.n_elem, dataset.n_cols); - for (size_t i = 0; i < predictions.n_cols; ++i) + for (size_t i = 1; i < predictions.n_cols; ++i) { BOOST_REQUIRE_EQUAL(predictions[i], majorityClass); for (size_t j = 0; j < probabilities.n_rows; ++j) @@ -371,14 +344,14 @@ arma::Row labels; data::Load("vc2_labels.txt", labels); - RandomForest<> rf(dataset, labels, 3, 10 /* 10 trees */, 10); + RandomForest<> rf(dataset, labels, 3, 10 /* 10 trees */, 1); arma::Row beforePredictions; arma::mat beforeProbabilities; rf.Classify(dataset, beforePredictions, beforeProbabilities); RandomForest<> xmlForest, textForest, binaryForest; - binaryForest.Train(dataset, labels, 3, 3, 50); + binaryForest.Train(dataset, labels, 3, 3, 5, 1); SerializeObjectAll(rf, xmlForest, textForest, binaryForest); // Now check that we get the same results serializing other things. @@ -425,13 +398,13 @@ // Test random forest on unweighted numeric dataset. RandomForest rf; - double entropy = rf.Train(dataset, labels, 3, 10, 5); + double entropy = rf.Train(dataset, labels, 3, 10, 1); BOOST_REQUIRE_EQUAL(std::isfinite(entropy), true); // Test random forest on weighted numeric dataset. RandomForest wrf; - entropy = wrf.Train(dataset, labels, 3, weights, 10, 5); + entropy = wrf.Train(dataset, labels, 3, weights, 10, 1); BOOST_REQUIRE_EQUAL(std::isfinite(entropy), true); } @@ -471,16 +444,47 @@ // Test random forest on unweighted categorical dataset. RandomForest<> rf; - double entropy = rf.Train(fullData, di, fullLabels, 5, 15 /* 15 trees */, 5); + double entropy = rf.Train(fullData, di, fullLabels, 5, 15 /* 15 trees */, 1, + 1e-7, MultipleRandomDimensionSelect(3)); BOOST_REQUIRE_EQUAL(std::isfinite(entropy), true); // Test random forest on weighted categorical dataset. RandomForest<> wrf; entropy = wrf.Train(fullData, di, fullLabels, 5, weights, 15 /* 15 trees */, - 5); + 1, 1e-7, MultipleRandomDimensionSelect(3)); BOOST_REQUIRE_EQUAL(std::isfinite(entropy), true); } +/** + * Test that different trees get generated. + */ +BOOST_AUTO_TEST_CASE(DifferentTreesTest) +{ + arma::mat d(10, 100, arma::fill::randu); + arma::Row l(100); + for (size_t i = 0; i < 50; ++i) + l(i) = 0; + for (size_t i = 50; i < 100; ++i) + l(i) = 1; + + bool success = false; + size_t trial = 0; + + // It's possible we might get the same random dimensions selected, so let's do + // multiple trials. + while (!success && trial < 5) + { + RandomForest rf; + rf.Train(d, l, 2, 2, 5); + + success = (rf.Tree(0).SplitDimension() != rf.Tree(1).SplitDimension()); + + ++trial; + } + + BOOST_REQUIRE_EQUAL(success, true); +} + BOOST_AUTO_TEST_SUITE_END(); diff -Nru mlpack-3.1.0/src/mlpack/tests/reward_clipping_test.cpp mlpack-3.1.1/src/mlpack/tests/reward_clipping_test.cpp --- mlpack-3.1.0/src/mlpack/tests/reward_clipping_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/reward_clipping_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -67,7 +67,7 @@ model.Add>(32, 3); // Set up the policy and replay method. - GreedyPolicy> policy(1.0, 1000, 0.1); + GreedyPolicy> policy(1.0, 1000, 0.1, 0.99); RandomReplay> replayMethod(20, 10000); // Set up Acrobot task and reward clipping wrapper diff -Nru mlpack-3.1.0/src/mlpack/tests/rl_components_test.cpp mlpack-3.1.1/src/mlpack/tests/rl_components_test.cpp --- mlpack-3.1.0/src/mlpack/tests/rl_components_test.cpp 2019-04-26 05:07:37.000000000 +0000 +++ mlpack-3.1.1/src/mlpack/tests/rl_components_test.cpp 2019-05-27 02:18:49.000000000 +0000 @@ -50,7 +50,7 @@ } /** - * Constructs a Continuous MountainCar instance and check if the main rountine + * Constructs a Continuous MountainCar instance and check if the main rountine * works as it should be. */ BOOST_AUTO_TEST_CASE(SimpleContinuousMountainCarTest) @@ -172,7 +172,7 @@ */ BOOST_AUTO_TEST_CASE(GreedyPolicyTest) { - GreedyPolicy policy(1.0, 10, 0.0); + GreedyPolicy policy(1.0, 10, 0.0, 0.99); for (size_t i = 0; i < 15; ++i) policy.Anneal(); BOOST_REQUIRE_CLOSE(0.0, policy.Epsilon(), 1e-5);