diff -Nru r-cran-uwot-0.1.9/debian/changelog r-cran-uwot-0.1.10/debian/changelog --- r-cran-uwot-0.1.9/debian/changelog 2020-11-23 15:24:10.000000000 +0000 +++ r-cran-uwot-0.1.10/debian/changelog 2020-12-19 11:00:43.000000000 +0000 @@ -1,3 +1,10 @@ +r-cran-uwot (0.1.10-1) unstable; urgency=medium + + * Team upload. + * New upstream version + + -- Nilesh Patra Sat, 19 Dec 2020 16:30:43 +0530 + r-cran-uwot (0.1.9-1) unstable; urgency=medium * Team upload. diff -Nru r-cran-uwot-0.1.9/debian/control r-cran-uwot-0.1.10/debian/control --- r-cran-uwot-0.1.9/debian/control 2020-11-23 15:24:10.000000000 +0000 +++ r-cran-uwot-0.1.10/debian/control 2020-12-19 11:00:38.000000000 +0000 @@ -12,7 +12,7 @@ r-cran-rcpp, r-cran-fnn, r-cran-rspectra, - r-cran-rcppannoy, + r-cran-rcppannoy (>= 0.0.17), r-cran-irlba, r-cran-rcppprogress, r-cran-dqrng, diff -Nru r-cran-uwot-0.1.9/DESCRIPTION r-cran-uwot-0.1.10/DESCRIPTION --- r-cran-uwot-0.1.9/DESCRIPTION 2020-11-15 20:00:02.000000000 +0000 +++ r-cran-uwot-0.1.10/DESCRIPTION 2020-12-15 13:40:02.000000000 +0000 @@ -1,7 +1,7 @@ Package: uwot Title: The Uniform Manifold Approximation and Projection (UMAP) Method for Dimensionality Reduction -Version: 0.1.9 +Version: 0.1.10 Authors@R: c(person("James", "Melville", email = "jlmelville@gmail.com", role = c("aut", "cre")), person("Aaron", "Lun", role="ctb"),person("Mohamed Nadhir", "Djekidel", role="ctb"), @@ -23,13 +23,13 @@ RoxygenNote: 7.1.1 Depends: Matrix LinkingTo: Rcpp, RcppProgress, RcppAnnoy, dqrng -Imports: Rcpp, methods, FNN, RSpectra, RcppAnnoy (>= 0.0.11), irlba +Imports: Rcpp, methods, FNN, RSpectra, RcppAnnoy (>= 0.0.17), irlba NeedsCompilation: yes -Packaged: 2020-11-15 19:17:46 UTC; jlmel +Packaged: 2020-12-14 02:17:07 UTC; jlmel Author: James Melville [aut, cre], Aaron Lun [ctb], Mohamed Nadhir Djekidel [ctb], Yuhan Hao [ctb] Maintainer: James Melville Repository: CRAN -Date/Publication: 2020-11-15 20:00:02 UTC +Date/Publication: 2020-12-15 13:40:02 UTC diff -Nru r-cran-uwot-0.1.9/man/tumap.Rd r-cran-uwot-0.1.10/man/tumap.Rd --- r-cran-uwot-0.1.9/man/tumap.Rd 2020-08-03 05:15:48.000000000 +0000 +++ r-cran-uwot-0.1.10/man/tumap.Rd 2020-11-25 19:27:23.000000000 +0000 @@ -386,7 +386,9 @@ \code{"model"}), returns a list containing extra information that can be used to add new data to an existing embedding via \code{\link{umap_transform}}. In this case, the coordinates are available - in the list item \code{embedding}. + in the list item \code{embedding}. \bold{NOTE}: The contents of + the \code{model} list should \emph{not} be considered stable or part of + the public API, and are purposely left undocumented. \item if \code{ret_nn = TRUE} (or \code{ret_extra} contains \code{"nn"}), returns the nearest neighbor data as a list called \code{nn}. This contains one list for each \code{metric} calculated, itself containing a diff -Nru r-cran-uwot-0.1.9/man/umap.Rd r-cran-uwot-0.1.10/man/umap.Rd --- r-cran-uwot-0.1.9/man/umap.Rd 2020-11-12 16:51:45.000000000 +0000 +++ r-cran-uwot-0.1.10/man/umap.Rd 2020-11-25 19:26:00.000000000 +0000 @@ -415,7 +415,9 @@ \code{"model"}), returns a list containing extra information that can be used to add new data to an existing embedding via \code{\link{umap_transform}}. In this case, the coordinates are available - in the list item \code{embedding}. + in the list item \code{embedding}. \bold{NOTE}: The contents of + the \code{model} list should \emph{not} be considered stable or part of + the public API, and are purposely left undocumented. \item if \code{ret_nn = TRUE} (or \code{ret_extra} contains \code{"nn"}), returns the nearest neighbor data as a list called \code{nn}. This contains one list for each \code{metric} calculated, itself containing a diff -Nru r-cran-uwot-0.1.9/MD5 r-cran-uwot-0.1.10/MD5 --- r-cran-uwot-0.1.9/MD5 2020-11-15 20:00:02.000000000 +0000 +++ r-cran-uwot-0.1.10/MD5 2020-12-15 13:40:02.000000000 +0000 @@ -1,14 +1,14 @@ -e648f729fbdb825aee1e705ebcac3421 *DESCRIPTION +5cc1385bc8864015a161178ea824e753 *DESCRIPTION 2835b77ed7ea04968f6c4559e4bbb921 *NAMESPACE -e35cfdd1f069921adda67b19c89cfecd *NEWS.md +d9b1698a00ee1c3e18bd369891b2b255 *NEWS.md 8a70d13455317ad06535e5a3bc9bec63 *R/RcppExports.R 8c4849bee7ca6e9f04ce498ea63d24c1 *R/affinity.R 1badef676d44497787d884a6d9213418 *R/init.R fcaa30ee43afee0c3b9408bccd1a9324 *R/neighbors.R d5c7196f89f25c0be6446008596070ac *R/supervised.R 60397dba2da6de9c720ff4857bf3b472 *R/transform.R -87ec5ba133d8a2361371f298fab00aa9 *R/util.R -69fed60ebdd7f64865c76e8efa70db71 *R/uwot.R +bea9c21593cc8896ce09bec01f9cfc18 *R/util.R +deea229cf9baef085d69eeadccf935d9 *R/uwot.R 265735fea981ad819ef75ec54bcba81b *inst/include/RcppPerpendicular.h 93a073d84957f140072fc866ec94f528 *inst/include/uwot/connected_components.h d3cdf9a44a4afd1c0cfaae79ae54918b *inst/include/uwot/gradient.h @@ -23,15 +23,15 @@ f10ef03c0297422cd2b189b5241695cd *man/load_uwot.Rd 693ab1da7f2e8728a2a19246c0e6cf85 *man/lvish.Rd 731ae97a67a23467e87aefaa9922bed3 *man/save_uwot.Rd -7e1cd8ed03dc978d712f10cfc6ae5855 *man/tumap.Rd -bc0efa8a1b66baa1c36184199e0bf9f3 *man/umap.Rd +e7f6c7336ef9225fc82de5b0b284d58e *man/tumap.Rd +9abb945c780b5859afc510050225e01f *man/umap.Rd b96782c1e9c42c9deea29c1b170c4a42 *man/umap_transform.Rd 5093abbd7d39dd802d09f5d635106540 *man/unload_uwot.Rd -6437f29a46b5c1d81fed0733d9946108 *src/Makevars +5c305141d163878e56fb9b379a4b7f3a *src/Makevars fd6da8c7d08f58d98ccf4252e90ac571 *src/RcppExports.cpp 8b804bf880076d43811bb6374d8ccaea *src/connected_components.cpp 2413f7fc7b48e7988c2d8164f57357dd *src/nn_parallel.cpp -62c698d6011befaa3484eae8bea5d2ba *src/nn_parallel.h +69668471348ed2d76e708e6a4060300b *src/nn_parallel.h ac2788fa3d4929f72ceef648264d2a40 *src/optimize.cpp 5396ea1d1041d42f66d1307b1e3df898 *src/perplexity.cpp d8592140feeca94d7b0682aa7e60edfd *src/rng.h @@ -44,7 +44,7 @@ 894a66e93d95d7fa27c269bbbe15877d *tests/testthat/helper_fuzzy_sets.R f402a4131e20c4be58bcf488330fa2b5 *tests/testthat/test_curve.R 28948e38b76b2cf45da14b50d409e5c0 *tests/testthat/test_epochs.R -56bce59aef135df1859daf1e18ece623 *tests/testthat/test_errors.R +5241d6374c053458acde82afb3386127 *tests/testthat/test_errors.R 293d274f1ba615bb450190f0fc56f9be *tests/testthat/test_fuzzy_simplicial_set.R 42097917f3eeb862a865891d96a9dee0 *tests/testthat/test_knn_aff.R 6a06ce6ba4fbc2f42ed6993bbee31597 *tests/testthat/test_mixed_distances.R diff -Nru r-cran-uwot-0.1.9/NEWS.md r-cran-uwot-0.1.10/NEWS.md --- r-cran-uwot-0.1.9/NEWS.md 2020-11-15 18:20:56.000000000 +0000 +++ r-cran-uwot-0.1.10/NEWS.md 2020-12-06 17:55:31.000000000 +0000 @@ -1,3 +1,19 @@ +# uwot 0.1.10 + +This release is mainly to allow for some internal changes to keep compatibility +with RcppAnnoy, used for the nearest neighbor calculations. + +## Bug fixes and minor improvements + +* Passing in data with missing values will now raise an error early. Missing +data in factor columns intended for supervised UMAP is still ok. Thank you David +McGaughey for tweeting about this issue. +* The documentation for the return value of `umap` and `tumap` now note that the +contents of the `model` list are subject to change and not intended to be part +of the uwot public API. I recommend not relying on the structure of the `model`, +especially if your package is intended to appear on CRAN or Bioconductor, as any +breakages will delay future releases of uwot to CRAN. + # uwot 0.1.9 ## New features @@ -8,7 +24,7 @@ Backwards compatibility with models generated by previous versions using `ret_model = TRUE` should have been preserved. -## Big fixes and minor improvements +## Bug fixes and minor improvements * New parameter, `nn_method`, for `umap_transform`: pass a list containing pre-computed nearest neighbor data (identical to that used in the `umap` @@ -42,7 +58,7 @@ # uwot 0.1.8 -## Big fixes and minor improvements +## Bug fixes and minor improvements * default for `n_threads` is now `NULL` to provide a bit more protection from changing dependencies. diff -Nru r-cran-uwot-0.1.9/R/util.R r-cran-uwot-0.1.10/R/util.R --- r-cran-uwot-0.1.9/R/util.R 2020-08-02 18:23:56.000000000 +0000 +++ r-cran-uwot-0.1.10/R/util.R 2020-11-25 18:35:30.000000000 +0000 @@ -124,3 +124,8 @@ message("|") } +checkna <- function(X) { + if (!is.null(X) && any(is.na(X))) { + stop("Missing values found in 'X'") + } +} diff -Nru r-cran-uwot-0.1.9/R/uwot.R r-cran-uwot-0.1.10/R/uwot.R --- r-cran-uwot-0.1.9/R/uwot.R 2020-11-12 16:51:37.000000000 +0000 +++ r-cran-uwot-0.1.10/R/uwot.R 2020-11-25 19:27:17.000000000 +0000 @@ -338,7 +338,9 @@ #' \code{"model"}), returns a list containing extra information that can be #' used to add new data to an existing embedding via #' \code{\link{umap_transform}}. In this case, the coordinates are available -#' in the list item \code{embedding}. +#' in the list item \code{embedding}. \bold{NOTE}: The contents of +#' the \code{model} list should \emph{not} be considered stable or part of +#' the public API, and are purposely left undocumented. #' \item if \code{ret_nn = TRUE} (or \code{ret_extra} contains \code{"nn"}), #' returns the nearest neighbor data as a list called \code{nn}. This #' contains one list for each \code{metric} calculated, itself containing a @@ -778,7 +780,9 @@ #' \code{"model"}), returns a list containing extra information that can be #' used to add new data to an existing embedding via #' \code{\link{umap_transform}}. In this case, the coordinates are available -#' in the list item \code{embedding}. +#' in the list item \code{embedding}. \bold{NOTE}: The contents of +#' the \code{model} list should \emph{not} be considered stable or part of +#' the public API, and are purposely left undocumented. #' \item if \code{ret_nn = TRUE} (or \code{ret_extra} contains \code{"nn"}), #' returns the nearest neighbor data as a list called \code{nn}. This #' contains one list for each \code{metric} calculated, itself containing a @@ -1296,6 +1300,7 @@ if (ret_model) { stop("Can only create models with dense matrix or data frame input") } + checkna(X) n_vertices <- attr(X, "Size") tsmessage("Read ", n_vertices, " rows") } @@ -1303,6 +1308,7 @@ if (ret_model) { stop("Can only create models with dense matrix or data frame input") } + checkna(X) n_vertices <- nrow(X) if (ncol(X) != n_vertices) { stop("Sparse matrices are only supported as distance matrices") @@ -1331,6 +1337,7 @@ } X <- as.matrix(X[, indexes]) } + checkna(X) n_vertices <- nrow(X) tsmessage( "Read ", n_vertices, " rows and found ", ncol(X), diff -Nru r-cran-uwot-0.1.9/src/Makevars r-cran-uwot-0.1.10/src/Makevars --- r-cran-uwot-0.1.9/src/Makevars 2020-11-12 15:40:52.000000000 +0000 +++ r-cran-uwot-0.1.10/src/Makevars 2020-12-06 17:57:39.000000000 +0000 @@ -1,7 +1,5 @@ # Turn on C++11 support to get access to long long (guaranteed 64-bit ints) CXX_STD = CXX11 -# Add this define with Rcpp 0.16.2 or later: -D__RcppAnnoy_0_16_2__ -# Once that release is standard just remove the define here and its test (twice) in nn_parallel.h -PKG_CXXFLAGS = -DSTRICT_R_HEADERS -DRCPP_NO_RTTI +PKG_CXXFLAGS = -DRCPP_NO_RTTI PKG_CPPFLAGS = -I../inst/include/ diff -Nru r-cran-uwot-0.1.9/src/nn_parallel.h r-cran-uwot-0.1.10/src/nn_parallel.h --- r-cran-uwot-0.1.9/src/nn_parallel.h 2020-11-11 23:16:17.000000000 +0000 +++ r-cran-uwot-0.1.10/src/nn_parallel.h 2020-12-06 17:57:39.000000000 +0000 @@ -19,25 +19,11 @@ #include -#if defined(__MINGW32__) -#undef Realloc -#undef Free -#endif - -#define __ERROR_PRINTER_OVERRIDE__ REprintf - -#include -#include +#include "RcppAnnoy.h" #include "uwot/matrix.h" -#if defined __RcppAnnoy_0_16_2__ -#ifdef ANNOYLIB_MULTITHREADED_BUILD - typedef AnnoyIndexMultiThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy; -#else - typedef AnnoyIndexSingleThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy; -#endif -#endif +typedef AnnoyIndexSingleThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy; struct UwotAnnoyEuclidean { using Distance = Euclidean; @@ -74,11 +60,7 @@ std::vector dists; AnnoyIndex -#else - typename UwotAnnoyDistance::Distance, Kiss64Random> -#endif index; NNWorker(const std::string &index_name, const std::vector &mat, diff -Nru r-cran-uwot-0.1.9/tests/testthat/test_errors.R r-cran-uwot-0.1.10/tests/testthat/test_errors.R --- r-cran-uwot-0.1.9/tests/testthat/test_errors.R 2020-09-02 15:09:06.000000000 +0000 +++ r-cran-uwot-0.1.10/tests/testthat/test_errors.R 2020-11-25 18:59:15.000000000 +0000 @@ -47,3 +47,18 @@ # #42: check init is a matrix or a string; complain otherwise expect_error(umap(iris10, n_neighbors = 4, init = as.matrix(iris[, 1:3])), "(10, 2)") expect_error(umap(iris10, n_neighbors = 4, init = iris), "matrix or string") + +# Don't use data with NA in it +test_that("Detect data with NA in", { +diris10na <- diris10 +diris10na[1] <- NA +expect_error(umap(diris10na), "missing", ignore.case = TRUE) + +dmiris10zna <- dmiris10z +dmiris10zna[2, 1] <- NA +expect_error(umap(dmiris10zna, n_neighbors = 4), "missing", ignore.case = TRUE) + +iris10na <- iris10 +iris10na[1, 1] <- NA +expect_error(umap(iris10na, n_neighbors = 4), "missing", ignore.case = TRUE) +})