diff -Nru r-cran-energy-1.7-4/debian/changelog r-cran-energy-1.7-5/debian/changelog --- r-cran-energy-1.7-4/debian/changelog 2018-06-01 14:51:07.000000000 +0000 +++ r-cran-energy-1.7-5/debian/changelog 2018-08-14 01:59:18.000000000 +0000 @@ -1,8 +1,11 @@ -r-cran-energy (1.7-4-1build1) cosmic; urgency=medium +r-cran-energy (1.7-5-1) unstable; urgency=medium - * No-change rebuild against r-api-3.5 + * New upstream release - -- Graham Inggs Fri, 01 Jun 2018 14:51:07 +0000 + * debian/control: Set Build-Depends: to current R version + * debian/control: Set Standards-Version: to current version + + -- Dirk Eddelbuettel Mon, 13 Aug 2018 20:59:18 -0500 r-cran-energy (1.7-4-1) unstable; urgency=medium diff -Nru r-cran-energy-1.7-4/debian/control r-cran-energy-1.7-5/debian/control --- r-cran-energy-1.7-4/debian/control 2018-05-30 02:56:12.000000000 +0000 +++ r-cran-energy-1.7-5/debian/control 2018-08-14 01:59:18.000000000 +0000 @@ -2,8 +2,8 @@ Section: gnu-r Priority: optional Maintainer: Dirk Eddelbuettel -Build-Depends: debhelper (>= 10), r-base-dev (>= 3.4.4), dh-r, r-cran-boot, r-cran-rcpp -Standards-Version: 4.1.4 +Build-Depends: debhelper (>= 10), r-base-dev (>= 3.5.1), dh-r, r-cran-boot, r-cran-rcpp +Standards-Version: 4.1.5 Vcs-Browser: https://salsa.debian.org/edd/r-cran-energy Vcs-Git: https://salsa.debian.org/edd/r-cran-energy.git Homepage: https://cran.r-project.org/package=energy diff -Nru r-cran-energy-1.7-4/DESCRIPTION r-cran-energy-1.7-5/DESCRIPTION --- r-cran-energy-1.7-4/DESCRIPTION 2018-05-27 21:03:00.000000000 +0000 +++ r-cran-energy-1.7-5/DESCRIPTION 2018-08-11 22:00:12.000000000 +0000 @@ -1,16 +1,17 @@ Package: energy Title: E-Statistics: Multivariate Inference via the Energy of Data -Version: 1.7-4 -Date: 2018-05-27 -Author: Maria L. Rizzo and Gabor J. Szekely +Version: 1.7-5 +Date: 2018-08-11 +Authors@R: c( + person("Maria", "Rizzo", , "mrizzo@bgsu.edu", c("aut", "cre")), + person("Gabor", "Szekely", , , "aut")) Description: E-statistics (energy) tests and statistics for multivariate and univariate inference, including distance correlation, one-sample, two-sample, and multi-sample tests for comparing multivariate distributions, are implemented. Measuring and testing multivariate independence based on distance correlation, partial distance correlation, - multivariate goodness-of-fit tests, clustering based on energy distance, testing for - multivariate normality, distance components (disco) for non-parametric analysis of - structured data, and other energy statistics/methods are implemented. -Maintainer: Maria Rizzo + multivariate goodness-of-fit tests, k-groups and hierarchical clustering based on energy + distance, testing for multivariate normality, distance components (disco) for non-parametric + analysis of structured data, and other energy statistics/methods are implemented. Imports: Rcpp (>= 0.12.6), stats, boot LinkingTo: Rcpp Suggests: MASS @@ -18,5 +19,8 @@ License: GPL (>= 2) NeedsCompilation: yes Repository: CRAN -Packaged: 2018-05-27 19:49:23 UTC; Maria -Date/Publication: 2018-05-27 21:03:00 UTC +Packaged: 2018-08-11 17:00:17 UTC; Maria +Author: Maria Rizzo [aut, cre], + Gabor Szekely [aut] +Maintainer: Maria Rizzo +Date/Publication: 2018-08-11 22:00:12 UTC diff -Nru r-cran-energy-1.7-4/man/dcov2d.Rd r-cran-energy-1.7-5/man/dcov2d.Rd --- r-cran-energy-1.7-4/man/dcov2d.Rd 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-energy-1.7-5/man/dcov2d.Rd 2018-06-19 09:28:15.000000000 +0000 @@ -0,0 +1,72 @@ +\name{Fast bivariate dcor and dcov} +\alias{dcor2d} +\alias{dcov2d} +\title{Fast dCor and dCov for bivariate data only} +\description{ +For bivariate data only, these are fast O(n log n) implementations of distance +correlation and distance covariance statistics. The U-statistic for dcov^2 is unbiased; +the V-statistic is the original definition in SRB 2007. These algorithms do not +store the distance matrices, so they are suitable for large samples. +} +\usage{ +dcor2d(x, y, type = c("V", "U")) +dcov2d(x, y, type = c("V", "U"), all.stats = FALSE) +} +\arguments{ + \item{x}{ numeric vector} + \item{y}{ numeric vector} + \item{type}{ "V" or "U", for V- or U-statistics} + \item{all.stats}{ logical} +} +\details{ +The unbiased (squared) dcov is documented in \code{dcovU}, for multivariate data in arbitrary, not necessarily equal dimensions. \code{dcov2d} and \code{dcor2d} provide a faster O(n log n) algorithm for bivariate (x, y) only (X and Y are real-valued random vectors). The O(n log n) algorithm was proposed by Huo and Szekely (2016). The algorithm is faster above a certain sample size n. It does not store the distance matrix so the sample size can be very large. +} +\value{ +By default, \code{dcor2d} returns the V-statistic dCor_n^2(x, y), and if type="U", it returns a bias-corrected estimator of squared dcor. + +By default, \code{dcov2} returns the V-statistic V_n^2 = dCov_n^2(x, y), and if type="U", it returns the U-statistic, unbiased for V^2(X, Y). The argument all.stats=TRUE is used internally when the function is called from \code{dcor2}. + +For \code{dcov2d} and \code{dcor2d}, direct computation using the C++ function \code{dcovU_stats} may be somewhat faster on small samples, depending on the platform. + +\code{dcor2d} and \code{dcov2d} do not store the distance matrices so these functions are helpful when sample size is large, the data is bivariate, and we simply require the statistics. There is not an efficient way to do the nonparametric test by permutations without storing distances. For a test of independence on moderate size samples, use \code{dcov.test} or \code{dcor.test}. +} +\note{ +Unbiased distance covariance (SR2014) is equivalent to the U-statistic +estimator of \eqn{\mathrm{dCov^2}}{dCov^2}. Since \code{dcovU} is an +unbiased statistic, it can be negative and its square root would be +complex so the square root of the U-statistic is not applied. +For the original distance covariance test of independence (SRB2007, +SR2009), the test statistic was the V-statistic \eqn{\mathrm{n\, dCov_n^2} = n \mathcal{V}_n^2}{n V_n^2}. +Similarly, \code{bcdcor} is bias-corrected, so we do not take the +square root as with \eqn{dCor_n^2}{dCor_n^2}. +} +\references{ +Huo, X. and Szekely, G.J. (2016). Fast computing for +distance covariance. Technometrics, 58(4), 435-447. + + Szekely, G.J. and Rizzo, M.L. (2014), + Partial Distance Correlation with Methods for Dissimilarities. + \emph{Annals of Statistics}, Vol. 42 No. 6, 2382-2412. + + Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007), + Measuring and Testing Dependence by Correlation of Distances, + \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794. + \cr \url{http://dx.doi.org/10.1214/009053607000000505} +} +\author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and +Gabor J. Szekely +} +\examples{ + \donttest{ + ## these are equivalent, but 2d is faster for n > 50 + n <- 100 + x <- rnorm(100) + y <- rnorm(100) + all.equal(dcov(x, y)^2, dcov2d(x, y), check.attributes = FALSE) + all.equal(bcdcor(x, y), dcor2d(x, y, "U"), check.attributes = FALSE) + } +} +\concept{ independence } +\concept{ distance correlation } +\concept{ distance covariance } +\concept{ energy statistics } diff -Nru r-cran-energy-1.7-4/man/dcov.test.Rd r-cran-energy-1.7-5/man/dcov.test.Rd --- r-cran-energy-1.7-4/man/dcov.test.Rd 2018-05-27 19:19:13.000000000 +0000 +++ r-cran-energy-1.7-5/man/dcov.test.Rd 2018-06-17 15:33:15.000000000 +0000 @@ -100,8 +100,8 @@ Szekely, G.J. and Rizzo, M.L. (2009), Rejoinder: Brownian Distance Covariance, \emph{Annals of Applied Statistics}, Vol. 3, No. 4, 1303-1308. - } - \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and +} +\author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ @@ -111,21 +111,7 @@ dcor.test(dist(x), dist(y), R=199) set.seed(1) dcov.test(x, y, R=199) - - \dontrun{ - set.seed(1) - dcov.test(dist(x), dist(y), R=199) #same thing - set.seed(1) - dcov.test(x, y, index=.5, R=199) - set.seed(1) - dcov.test(dist(x), dist(y), index=.5, R=199) #same thing - } - - ## Example with dvar=0 (so dcov=0 and pval=1) - x <- rep.int(1, 10) - y <- 1:10 - dcov.test(x, y, R=199) - } +} \keyword{ htest } \keyword{ multivariate } \keyword{ nonparametric } diff -Nru r-cran-energy-1.7-4/man/energy-defunct.Rd r-cran-energy-1.7-5/man/energy-defunct.Rd --- r-cran-energy-1.7-4/man/energy-defunct.Rd 2018-05-27 19:24:54.000000000 +0000 +++ r-cran-energy-1.7-5/man/energy-defunct.Rd 2018-06-17 15:33:15.000000000 +0000 @@ -2,7 +2,7 @@ \alias{indep.e} \alias{indep.etest} \title{ Energy Statistic Test of Independence} -\description{Deprecated: use \code{indep.test} with \code{method = mvI}. +\description{Defunct: use \code{indep.test} with \code{method = mvI}. Computes a multivariate nonparametric E-statistic and test of independence.} \usage{ indep.e(x, y) @@ -42,20 +42,7 @@ \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } -\examples{ - \dontrun{ - ## independent univariate data - x <- sin(runif(30, 0, 2*pi) * 2) - y <- sin(runif(30, 0, 2*pi) * 4) - indep.etest(x, y, R = 99) - ## dependent multivariate data - Sigma <- matrix(c(1, .1, 0, 0 , 1, 0, 0 ,.1, 1), 3, 3) - x <- mvrnorm(30, c(0, 0, 0), diag(3)) - y <- mvrnorm(30, c(0, 0, 0), Sigma) * x - indep.etest(x, y, R = 99) - } -} \keyword{ htest } \keyword{ multivariate } \concept{ energy statistics } diff -Nru r-cran-energy-1.7-4/man/energy.hclust.Rd r-cran-energy-1.7-5/man/energy.hclust.Rd --- r-cran-energy-1.7-4/man/energy.hclust.Rd 2018-05-27 19:19:13.000000000 +0000 +++ r-cran-energy-1.7-5/man/energy.hclust.Rd 2018-07-09 17:31:04.000000000 +0000 @@ -66,9 +66,7 @@ Currently \code{stats::hclust} implements Ward's method by \code{method="ward.D2"}, which applies the squared distances. That method was previously \code{"ward"}. Because both \code{hclust} and energy use the same type of Lance-Williams recursive formula to update cluster distances, now with the additional option \code{method="ward.D"} in \code{hclust}, the -energy distance method is easily implemented by \code{hclust}. (Some "Ward" algorithms do not use Lance-Williams, however). Energy clustering (with \code{alpha=1}) and "ward.D" now return the same result, except that the cluster heights of {energy.hclust} with \code{alpha=1} are two times the heights from \code{hclust}. - -However, the implementation in the energy package is more than 100 times faster than \code{hclust}. in a recent benchmark. +energy distance method is easily implemented by \code{hclust}. (Some "Ward" algorithms do not use Lance-Williams, however). Energy clustering (with \code{alpha=1}) and "ward.D" now return the same result, except that the cluster heights of energy hierarchical clustering with \code{alpha=1} are two times the heights from \code{hclust}. In order to ensure compatibility with hclust methods, \code{energy.hclust} now passes arguments through to \code{hclust} after possibly applying the optional exponent to distance. } \references{ Szekely, G. J. and Rizzo, M. L. (2005) Hierarchical Clustering @@ -94,7 +92,6 @@ library(cluster) data(animals) plot(energy.hclust(dist(animals))) - } data(USArrests) ecl <- energy.hclust(dist(USArrests)) @@ -113,6 +110,7 @@ w <- hclust(d^2, method="ward.D2") list("E" = table(cutree(e, k=2) == g), "Ward" = table(cutree(w, k=2) == g), "Avg" = table(cutree(a, k=2) == g)) + } } \keyword{ multivariate } \keyword{ cluster } diff -Nru r-cran-energy-1.7-4/man/eqdist.etest.Rd r-cran-energy-1.7-5/man/eqdist.etest.Rd --- r-cran-energy-1.7-4/man/eqdist.etest.Rd 2018-05-27 19:19:13.000000000 +0000 +++ r-cran-energy-1.7-5/man/eqdist.etest.Rd 2018-06-17 15:33:15.000000000 +0000 @@ -144,15 +144,6 @@ # should match statistic in edist method="discoB", above set.seed(1234) disco.between(d, factors=g, distance=TRUE, R=199) - - \dontshow{ - eqdist.e(iris[,1:4], c(50,50,50)) - x <- matrix(rnorm(200), nrow=40) - y <- matrix(rnorm(250, mean=5), nrow=50) - x <- rbind(x, y) - eqdist.etest(dist(x), sizes=c(40, 50), distance=TRUE, R = 19) - eqdist.e(dist(x), sizes=c(40, 50), distance=TRUE) -} } \keyword{ multivariate } \keyword{ htest } diff -Nru r-cran-energy-1.7-4/man/kgroups.Rd r-cran-energy-1.7-5/man/kgroups.Rd --- r-cran-energy-1.7-4/man/kgroups.Rd 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-energy-1.7-5/man/kgroups.Rd 2018-08-11 13:47:07.000000000 +0000 @@ -0,0 +1,96 @@ +\name{kgroups} +\alias{kgroups} + +\title{ +K-Groups Clustering +} +\description{ +Perform k-groups clustering by energy distance. +} +\usage{ +kgroups(x, k, iter.max = 10, nstart = 1, cluster = NULL) +} +\arguments{ + \item{x}{Data frame or data matrix or distance object} + \item{k}{number of clusters} + \item{iter.max}{maximum number of iterations} + \item{nstart}{number of restarts} + \item{cluster}{initial clustering vector} +} + +\details{ +K-groups is based on the multisample energy distance for comparing distributions. +Based on the disco decomposition of total dispersion (a Gini type mean distance) the objective function should either maximize the total between cluster energy distance, or equivalently, minimize the total within cluster energy distance. It is more computationally efficient to minimize within distances, and that makes it possible to use a modified version of the Hartigan-Wong algorithm (1979) to implement K-groups clustering. + +The within cluster Gini mean distance is +\deqn{G(C_j) = \frac{1}{n_j^2} \sum_{i,m=1}^{n_j} |x_{i,j} - x_{m,j}|} +and the K-groups within cluster distance is +\deqn{W_j = \frac{n_j}{2}G(C_j) = \frac{1}{2 n_j} \sum_{i,m=1}^{n_j} |x_{i,j} - x_{m,j}.} +If z is the data matrix for cluster \eqn{C_j}, then \eqn{W_j} could be computed as +\code{sum(dist(z)) / nrow(z)}. + +If cluster is not NULL, the clusters are initialized by this vector (can be a factor or integer vector). Otherwise clusters are initialized with random labels in k approximately equal size clusters. + +If \code{x} is not a distance object (class(x) == "dist") then \code{x} is converted to a data matrix for analysis. + +Run up to \code{iter.max} complete passes through the data set until a local min is reached. If \code{nstart > 1}, on second and later starts, clusters are initialized at random, and the best result is returned. +} + +\value{ +An object of class \code{kgroups} containing the components +\item{call}{the function call} +\item{cluster}{vector of cluster indices} +\item{sizes}{cluster sizes} +\item{within}{vector of Gini within cluster distances} +\item{W}{sum of within cluster distances} +\item{count}{number of moves} +\item{iterations}{number of iterations} +\item{k}{number of clusters} + +\code{cluster} is a vector containing the group labels, 1 to k. \code{print.kgroups} +prints some of the components of the kgroups object. + +Expect that count is 0 if the algorithm converged to a local min (that is, 0 moves happened on the last iteration). If iterations equals iter.max and count is positive, then the algorithm did not converge to a local min. +} +\author{ +Maria Rizzo and Songzi Li +} + +\references{ +Li, Songzi (2015). +"K-groups: A Generalization of K-means by Energy Distance." +Ph.D. thesis, Bowling Green State University. + +Li, S. and Rizzo, M. L. (2017). +"K-groups: A Generalization of K-means Clustering". +ArXiv e-print 1711.04359. https://arxiv.org/abs/1711.04359 + +Szekely, G. J., and M. L. Rizzo. "Testing for equal distributions in high dimension." InterStat 5, no. 16.10 (2004). + +Rizzo, M. L., and G. J. Szekely. "Disco analysis: A nonparametric extension of analysis of variance." The Annals of Applied Statistics (2010): 1034-1055. + +Hartigan, J. A. and Wong, M. A. (1979). "Algorithm AS 136: A K-means clustering algorithm." Applied Statistics, 28, 100-108. doi: 10.2307/2346830. +} + +\examples{ + x <- as.matrix(iris[ ,1:4]) + set.seed(123) + kg <- kgroups(x, k = 3, iter.max = 5, nstart = 2) + kg + fitted(kg) + + \donttest{ + d <- dist(x) + set.seed(123) + kg <- kgroups(d, k = 3, iter.max = 5, nstart = 2) + kg + + kg$cluster + + fitted(kg) + fitted(kg, method = "groups") + } +} + +\keyword{ cluster } +\keyword{ multivariate } diff -Nru r-cran-energy-1.7-4/man/sortrank.Rd r-cran-energy-1.7-5/man/sortrank.Rd --- r-cran-energy-1.7-4/man/sortrank.Rd 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-energy-1.7-5/man/sortrank.Rd 2018-06-17 15:33:15.000000000 +0000 @@ -0,0 +1,34 @@ +\name{sortrank} +\alias{sortrank} +\title{ Sort, order and rank a vector } +\description{ +A utility that returns a list with the components +equivalent to sort(x), order(x), rank(x, ties.method = "first"). + } +\usage{ +sortrank(x) +} +\arguments{ + \item{x}{ vector compatible with sort(x)} +} +\details{ +This utility exists to save a little time on large vectors when two or all three of the sort(), order(), rank() results are required. In case of ties, the ranks component matches \code{rank(x, ties.method = "first")}. +} +\value{ +A list with components + \item{x}{the sorted input vector x} + \item{ix}{the permutation = order(x) which rearranges x into ascending order} + \item{r}{the ranks of x} +} +\note{ +This function was benchmarked faster than the combined calls to \code{sort} and \code{rank}. +} +\examples{ +sortrank(rnorm(5)) +} +\references{ +See \code{\link{sort}}. +} +\author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} +} + diff -Nru r-cran-energy-1.7-4/MD5 r-cran-energy-1.7-5/MD5 --- r-cran-energy-1.7-4/MD5 2018-05-27 21:03:00.000000000 +0000 +++ r-cran-energy-1.7-5/MD5 2018-08-11 22:00:12.000000000 +0000 @@ -1,50 +1,55 @@ -01c066c2d2dee9d655ac8ad2002def25 *DESCRIPTION -06ac70e0e00da5ebc888991d170fedc9 *NAMESPACE -3a7f4726295f81248f972a39dd1dd09d *NEWS.md -60c944921e1c68681babcce658ef512c *R/Ecluster.R +ba847bf59d39e38323b06dcfdb5cf6e3 *DESCRIPTION +d6a5033d2db37860f81c4c407be1c6aa *NAMESPACE +e286c6f096d71b36c98e6f6cb64279a0 *NEWS.md +8a4895ff48be4737c5df6ac1ed63a86e *R/Ecluster.R 8b428dedf82ffb7e15422ff9f49addf4 *R/Eeqdist.R 25c48c287980d25df5ddf285bef068ff *R/Eindep.R a0d3a418969b10f0312159869dfae2ae *R/Emvnorm.R ee422cbbaab6e95389cbf3c90db51441 *R/Epoisson.R -df14913c2355403486761709b1d0571e *R/RcppExports.R +12e51dd49f97f7825ca0fdbd5a358790 *R/RcppExports.R f09a74b96ef85633d5a8f3a875ef84db *R/centering.R beffe2a8c9fe38f6ce49401e59381a5f *R/dcorT.R ae0b5579d71db5b253d1ece0801b02fa *R/dcov.R +65cd8e0a3c4bcd168ec715562feaafaa *R/dcov2d.R aa0cd56cb4314d09fa6fe7381e2732f0 *R/dcovu.R 0a49640e318f561d44d80e3187e0a43d *R/disco.R ab6155ea80b037f78dfbee4e179d4c4f *R/edist.R +4bea70962c5502c02208a9e36bc81f0b *R/kgroups.R b59b194449218e1668ddbdc5811d010c *R/pdcor.R 99d4997266887901ee6a62763e5ecfb8 *R/pdcov-test.R -3026f2609ed4f9108d0071769683e014 *README.md +dcb8e55a7969ef5ffc554eaa07621836 *R/util.R +8ab90b7760f971c9b444b931fa17bb2e *README.md 5bb9ea0c6b352a1e910817d4a2b2a997 *man/U_product.Rd 0106d2186ba21e28b9eef5f747c224bb *man/centering.Rd 21887eadf04293b14c48b56186a9a7ab *man/dcor.ttest.Rd c3c4f6a98c07966047d0114a96dbebc4 *man/dcov.Rd -7abdb2f8b9fff293f984ba7c9e4b1f61 *man/dcov.test.Rd +3a8c4360bcc60446a3c754912a82fa79 *man/dcov.test.Rd +9052c8105f523f61faf10499ab850832 *man/dcov2d.Rd f09ec348f412b4567195a44b18c042ee *man/dcovU_stats.Rd c797819cd60e72d907fa8c51c577ae9d *man/dcovu.Rd b60ab330b854d639ce6768aea4678c58 *man/disco.Rd bc9faea760233d2ff7c38702cf061d4c *man/edist.Rd -e4429a99d2add1f38a742331f6d578d9 *man/energy-defunct.Rd +4fa19e70fdf697ce866123f89a3619a5 *man/energy-defunct.Rd 97bdc56c7e04ba90c96f18ab4e9311b9 *man/energy-package.Rd -b918f4703f7ff7a298dce7ed3e324477 *man/energy.hclust.Rd -296495b40f99e3d4264f8285f2072ac4 *man/eqdist.etest.Rd +c333282f6d196ef9a1edd75b11e32e28 *man/energy.hclust.Rd +09240ed399879750d9fa116ed00f15e4 *man/eqdist.etest.Rd ab71ed8778965add0a0a215d3e6e177e *man/indep.test.Rd +805c2039f6e6b8614a07a02517b72f64 *man/kgroups.Rd f0a60791a303c425609a68651c549e26 *man/mvI.test.Rd ec44802e62875b25b1b167756d09075a *man/mvnorm-etest.Rd 6e62676733a8a8cdbaed67b8d0c00048 *man/pdcor.Rd ee92472463df0214c11d4e2d825987bc *man/poisson.mtest.Rd -727842fcced81550ab308552381b5b1b *src/ECl.cc -60f2661bf08b2295e345cf799b8bb28e *src/ECl.h -f93deb94d53b0be60435ac02150a8009 *src/Ecluster.cc -2a0d7e13abeedc9d58e8c8b438d93f50 *src/Eindep.c -ca127e0cff65055beff4b60d5fb5b8d3 *src/RcppExports.cpp +7a6ff6c66dd6fe89fa9ba80df4b45a26 *man/sortrank.Rd +f16c05d5e8c9d12f79e4fe6dbac512ce *src/B-tree.cpp +5f678a27d0f4cfcfe0ad6f8ba4418eb8 *src/Eindep.c +6795c9a42693c2b5c90ecd57de0d85e4 *src/RcppExports.cpp c560954ffd313a1b0aeb9ede7f73c4d1 *src/U-product.cpp 932a45c4fe940a27be100c2c5ca3468c *src/centering.cpp da0b2398098bc08ae54f647b29cf60b5 *src/dcov.c fe3940cdfcef45def8b3aec42b66c1cd *src/dcovU.cpp -aff0782a38beac801051d6f741566a1a *src/energy.c -345eebf91336a105c74f7c99a4c321a5 *src/energy_init.c +cc21c189df7b28475073e2fa09cceb07 *src/energy.c +9195849545ea4d3f01b6787800011c2a *src/energy_init.c +2d5c6923d19331f62df12d27ff957e59 *src/kgroups.cpp 663ee0e9767da39329a65a8bd86b7134 *src/mvnorm.cpp 6f5eba31cc00fe9daecaabfcfa182975 *src/partial-dcor.cpp 991223944feae331d405ae200899f71a *src/projection.cpp diff -Nru r-cran-energy-1.7-4/NAMESPACE r-cran-energy-1.7-5/NAMESPACE --- r-cran-energy-1.7-4/NAMESPACE 2018-05-27 19:19:13.000000000 +0000 +++ r-cran-energy-1.7-5/NAMESPACE 2018-08-11 12:51:58.000000000 +0000 @@ -10,12 +10,14 @@ D_center, Dcenter, dcor, + dcor2d, DCOR, dcor.t, dcor.test, dcor.ttest, dcov, dcov.test, + dcov2d, dcovU, dcovU_stats, disco, @@ -25,6 +27,7 @@ eqdist.e, eqdist.etest, indep.test, + kgroups, ksample.e, mvI, mvI.test, @@ -37,9 +40,12 @@ pdcov.test, poisson.m, poisson.mtest, + sortrank, U_center, U_product, Ucenter ) S3method(print, disco) +S3method(print, kgroups) +S3method(fitted, kgroups) diff -Nru r-cran-energy-1.7-4/NEWS.md r-cran-energy-1.7-5/NEWS.md --- r-cran-energy-1.7-4/NEWS.md 2018-05-27 19:29:32.000000000 +0000 +++ r-cran-energy-1.7-5/NEWS.md 2018-08-11 13:54:44.000000000 +0000 @@ -1,8 +1,24 @@ -energy package NEWS -=================== +# energy 1.7-5 -Version 1.7-4 ---- +* User level changes: + - kgroups: (new) implements energy clustering for a specified + number k classes by energy distance criterion, analogous to the k + classes of the k-means algorithm. + - dcov2d and dcor2d: (new) O(n log n) methods to compute the + squared U or V statistics for real x and y + - sortrank() function added (a utility) + +* Internal changes: + - B-tree.cpp: Btree_sum and other internal functions + implement binary tree search for faster O(n log n) + calculation of paired distances in dcov2d + - kgroups.cpp: Rcpp implementation of k-groups algorithm + - energy.hclust implementation: replaced C++ code with call + to stats::hclust; since R > 3.0.3 it is now equivalent for + alpha = 1 with method = "ward.D". Input and return value + unchanged except heights from hclust are half. + +# energy 1.7-4 * User level changes - disco: handle the case when the user argument x is dist with @@ -17,8 +33,8 @@ - BCDCOR: handle the cases when class of argument x or y conflicts with the distance argument -Version 1.7-2 ---- +# energy 1.7-2 + * User level changes - Provided new dcor.test function, similar to dcov.test but using the @@ -29,8 +45,8 @@ * Internal changes - energy_init.c added for registering routines -Version 1.7-0 ---- +# energy 1.7-0 + * Partial Distance Correlation statistics and tests added - pdcov, pdcor, pdcov.test, pdcor.test @@ -49,19 +65,19 @@ * default number of replicates R in tests: for all tests, R now defaults to 0 or R has no default value. -Version 1.6.2 ---- +# energy 1.6.2 + * inserted GetRNGstate() .. PutRNGState around repl. loop in dcov.c. -Version 1.6.1 ---- +# energy 1.6.1 + * replace Depends with Imports in DESCRIPTION file -Version 1.6.0 ---- +# energy 1.6.0 + * implementation of high-dim distance correlation t-test introduced in JMVA Volume 117, pp. 193-213 (2013). @@ -69,13 +85,13 @@ * minor changes to tidy other code in dcov.R * removed unused internal function .dcov.test -Version 1.5.0 ---- +# energy 1.5.0 + * NAMESPACE: insert UseDynLib; remove zzz.R, .First.Lib() -Version 1.4-0 ---- +# energy 1.4-0 + * NAMESPACE added. * (dcov.c, Eindep.c) Unused N was removed. @@ -95,8 +111,8 @@ changed from ek/B to (ek+1)/(B+1) as it should be for a permutation test, and unneeded int* n removed. -Version 1.3-0 ---- +# energy 1.3-0 + * In distance correlation, distance covariance functions (dcov, dcor, DCOR) and dcov.test, arguments x and y can now @@ -113,8 +129,8 @@ dst is logical (TRUE if distances) and R is number of replicates. For dCOV dims must be c(n, p, q, dst). -Version 1.2-0 ---- +# energy 1.2-0 + * disco (distance components) added for one-way layout. * A method argument was added to ksample.e, eqdist.e, and diff -Nru r-cran-energy-1.7-4/R/dcov2d.R r-cran-energy-1.7-5/R/dcov2d.R --- r-cran-energy-1.7-4/R/dcov2d.R 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-energy-1.7-5/R/dcov2d.R 2018-06-17 15:33:15.000000000 +0000 @@ -0,0 +1,162 @@ +dcor2d<- function(x, y, type = c("V", "U")) { + ## computes dcor^2 or bias-corrected dcor^2 by O(n log n) algorithm + ## bivariate data only: (x,y) in R^2 + ## should be faster than direct calc. for big n + type <- match.arg(type) + ## argument checking in dcov2d + stat <- dcov2d(x, y, type, all.stats=TRUE) + dvarX <- stat[2] + dvarY <- stat[3] + R2 <- 0.0 + if (abs(dvarX*dvarY > 10*.Machine$double.eps)) + R2 <- stat[1] / sqrt(dvarX*dvarY) + return (R2) +} + +dcov2d<- function(x, y, type=c("V", "U"), all.stats=FALSE) { + ## O(n log n) computation of dcovU or dcov^2 (V^2) for (x, y) in R^2 only + type <- match.arg(type) + if (!is.vector(x) || !is.vector(y)) { + if (NCOL(x) > 1 || NCOL(y) > 1) + stop("this method is only for univariate x and y") + } + x <- as.vector(x) + y <- as.vector(y) + n <- length(x) + if (n != length(y)) + stop("sample sizes must agree") + + Sums <- .dcovSums2d(x, y, all.sums=all.stats) + if (type =="V") { + d1 <- n^2 + d2 <- n^3 + d3 <- n^4 + } else { + d1 <- n * (n - 3) + d2 <- d1 * (n - 2) + d3 <- d2 * (n - 1) + } + dCov2d <- Sums$S1/d1 - 2*Sums$S2/d2 + Sums$S3/d3 + if (all.stats) { + dvarX <- Sums$S1a/d1 - 2*Sums$S2a/d2 + Sums$S3a/d3 + dvarY <- Sums$S1b/d1 - 2*Sums$S2b/d2 + Sums$S3b/d3 + } + rval <- ifelse(type=="V", c(V=dCov2d), c(U=dCov2d)) + if (all.stats) + rval <- c(rval, dvarX=dvarX, dvarY=dvarY) + return (rval) +} + +.dcovSums2d <- function(x, y, all.sums = FALSE) { + ## compute the sums S1, S2, S3 of distances for dcov^2 + ## dCov^2 <- S1/d1 - 2 * S2/d2 + S3/d3 + ## denominators differ for U-statistic, V-statisic + ## if all==TRUE, also return the sums needed for dVar + if (is.matrix(x) || is.matrix(y)) { + if (ncol(x) > 1 || ncol(y) > 1) + stop("Found multivariate (x,y) in .dcovSums2d, expecting bivariate") + } + n <- length(x) + SRx <- sortrank(x) + SRy <- sortrank(y) + ## compute the rowSums of the distance matrices + a. <- .rowSumsDist1(x, SRx) + b. <- .rowSumsDist1(y, SRy) + S2 <- sum(a. * b.) + a.. <- sum(a.) + b.. <- sum(b.) + S3 <- sum(a.) * sum(b.) + + ## also need order and rank for y[order(x)] in gamma1() + x1 <- SRx$x + y1 <- y[SRx$ix] + SRy1 <- sortrank(y1) + ones <- rep(1, n) + g_1 <- .gamma1(x1=x1, y1=y1, z1=ones, SRx=SRx, SRy1=SRy1) + g_x <- .gamma1(x1=x1, y1=y1, z1=x1, SRx=SRx, SRy1=SRy1) + g_y <- .gamma1(x1=x1, y1=y1, z1=y1, SRx=SRx, SRy1=SRy1) + g_xy <- .gamma1(x1=x1, y1=y1, z1=x1*y1, SRx=SRx, SRy1=SRy1) + S1 <- sum(x * y * g_1 + g_xy - x * g_y - y * g_x) + + L <- list(S1=S1, S2=S2, S3=S3, + S1a=NA, S1b=NA, S2a=NA, S2b=NA, S3a=NA, S3b=NA) + if (all.sums) { + L$S1a <- 2 * n * (n-1) * var(x) + L$S1b <- 2 * n * (n-1) * var(y) + L$S2a=sum(a.^2) + L$S2b=sum(b.^2) + L$S3a=a..^2 + L$S3b=b..^2 + } + return (L); +} + +.dvarU2 <- function(x, SRx = NULL) { + ## O(n log n) computation of dvarU for univariate x only + ## this is an internal function that will do a stand-alone dVar calc. + ## but it is not faster than dcovU2(x, x) unless we supply + ## the precomputed sort + rank results in SRx + n <- length(x) + ## compute the rowSums of the distance matrices + if (is.null(SRx)) + SRx <- sortrank(x) + a. <- .rowSumsDist1(x, SRx) + S2 <- sum(a. * a.) + S3 <- sum(a.)^2 + + ## also need order and rank for y[order(x)] in gamma1() + x1 <- SRx$x + x2 <- x1 + SRx1 <- sortrank(x1) + ones <- rep(1, n) + g_1 <- .gamma1(x1=x1, y1=x2, z1=ones, SRx, SRx1) + g_x <- .gamma1(x1=x1, y1=x2, z1=x1, SRx, SRx1) + g_xx <- .gamma1(x1=x1, y1=x2, z1=x1*x2, SRx, SRx1) + S1 <- sum(x^2 * g_1 + g_xx - 2 * x * g_x) + d1 <- n * (n - 3) + d2 <- d1 * (n - 2) + d3 <- d2 * (n - 1) + dVar <- S1/d1 - 2 * S2/d2 + S3/d3 + return(dVar) +} + +.gamma1 <- function(x1, y1, z1, SRx, SRy1) { + # computes the terms of the sum (ab) in dcovU + # original sample (x_i, y_i, z_i) + # triples (x1_i, y1_i, z1_i) are sorted by ix=order(x) + # SRx is the result of sortrank(x), original order + # SRy1 is the result of sortrank(y1), y1=y[order(x)] + # pre-compute SRx, SRy1 to avoid repeated sort and rank + # + n <- length(x1) + ix <- SRx$ix #order(x) + rankx <- SRx$r #ranks of original sample x + + ## ranks and order vector for this permutation of sample y1 + iy1 <- SRy1$ix #order(y1) + ranky1 <- SRy1$r #rank(y1) + + ## the partial sums in the formula g_1 + psumsy1 <- (cumsum(z1[iy1]) - z1[iy1])[ranky1] + psumsx1 <- cumsum(z1) - z1 + + gamma1 <- Btree_sum(y=ranky1, z=z1) #y1 replaced by rank(y1) + g <- sum(z1) - z1 - 2 * psumsx1 - 2 * psumsy1 + 4 * gamma1 + g <- g[rankx] +} + + +.rowSumsDist1 <- function(x, Sx = NULL) { + ## for univariate samples, equivalent to rowSums(as.matrix(dist(x))) + ## but much faster + ## Sx is a sortrank object usually pre-computed here + ## x is the data vector, Sx$x is sort(x) + if (is.null(Sx)) + Sx <- sortrank(x) + n <- length(x) + r <- Sx$r #ranks + z <- Sx$x #ordered sample x + psums1 <- (cumsum(z) - z)[r] + (2*(r-1)-n)*x + sum(x) - 2*psums1 +} + diff -Nru r-cran-energy-1.7-4/R/Ecluster.R r-cran-energy-1.7-5/R/Ecluster.R --- r-cran-energy-1.7-4/R/Ecluster.R 2018-05-27 19:19:13.000000000 +0000 +++ r-cran-energy-1.7-5/R/Ecluster.R 2018-07-09 17:23:35.000000000 +0000 @@ -12,27 +12,7 @@ stop("Cannot use negative exponent on distance.") d <- d^alpha } - labels <- attr(d, "Labels") - if (is.null(labels)) - labels <- paste(1:n) - merge <- integer(2 * (n - 1)) - height <- double(n - 1) - order <- integer(n) - ecl <- .C("Emin_hclust", - diss = as.double(d), - en = as.integer(n), - merge = as.integer(merge), - height = as.double(height), - order = as.integer(order), - PACKAGE = "energy") - merge <- matrix(ecl$merge, nrow = n - 1, ncol = 2) - e <- list(merge = merge, - height = ecl$height, - order = ecl$order, - labels = labels, - method = "e-distance", - call = match.call(), - dist.method = attr(dst, "method")) - class(e) <- "hclust" - e + ## heights of hclust are half of energy; otherwise equivalent + return(hclust(d, method = "ward.D")) } + diff -Nru r-cran-energy-1.7-4/R/kgroups.R r-cran-energy-1.7-5/R/kgroups.R --- r-cran-energy-1.7-4/R/kgroups.R 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-energy-1.7-5/R/kgroups.R 2018-08-11 13:21:13.000000000 +0000 @@ -0,0 +1,69 @@ + +kgroups <- function(x, k, iter.max = 10, nstart = 1, cluster = NULL) { + distance <- (class(x) == "dist") + x <- as.matrix(x) + if (!is.numeric(x)) + stop("x must be numeric") + n <- nrow(x) + if (is.null(cluster)) { + cluster <- sample(0:(k-1), size = n, replace = TRUE) + } else { + ## recode cluster as 0,1,...,k-1 + cluster <- factor(cluster) + if(length(levels(cluster)) != k) + stop("cluster vector does not have k clusters") + cluster <- as.integer(cluster) - 1 + if(length(cluster) != n) + stop("data and length of cluster vector must match") + } + value <- .kgroups_start(x, k, cluster, iter.max, distance = distance) + + if (nstart > 1) { + objective <- rep(0, nstart) + objective[1] <- value$W + values <- vector("list", nstart) + values[[1]] <- value + for (j in 2:nstart) { + ## random initialization of cluster labels + cluster <- sample(0:(k-1), size = n, replace = TRUE) + values[[j]] <- .kgroups_start(x, k, cluster, iter.max, distance = distance) + + objective[j] <- values[[j]]$W + } + best <- which.min(objective) + value <- values[[best]] + } + + obj <- structure(list( + call = match.call(), + cluster = value$cluster + 1, + sizes = value$sizes, + within = value$within, + W = sum(value$within), + count = value$count, + iterations = value$it, + k = k), + class = "kgroups") + return (obj) +} + + +print.kgroups <- function(x, ...) { + cat("\n"); print(x$call) + cat("\nK-groups cluster analysis\n") + cat(x$k, " groups of size ", x$sizes, "\n") + cat("Within cluster distances:\n", x$within) + cat("\nIterations: ", x$iterations, " Count: ", x$count, "\n") +} + +fitted.kgroups <- function(object, method = c("labels", "groups"), ...) { + method = match.arg(method) + if (method == "groups") { + k <- object$k + CList <- vector("list", k) + for (i in 1:k) + CList[[i]] <- which(object$cluster == i) + return (CList) + } + return (object$cluster) +} diff -Nru r-cran-energy-1.7-4/R/RcppExports.R r-cran-energy-1.7-5/R/RcppExports.R --- r-cran-energy-1.7-4/R/RcppExports.R 2018-05-27 19:19:13.000000000 +0000 +++ r-cran-energy-1.7-5/R/RcppExports.R 2018-08-11 13:15:52.000000000 +0000 @@ -1,6 +1,10 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +Btree_sum <- function(y, z) { + .Call(`_energy_Btree_sum`, y, z) +} + D_center <- function(Dx) { .Call(`_energy_D_center`, Dx) } @@ -13,6 +17,10 @@ .Call(`_energy_dcovU_stats`, Dx, Dy) } +.kgroups_start <- function(x, k, clus, iter_max, distance) { + .Call(`_energy_kgroups_start`, x, k, clus, iter_max, distance) +} + mvnEstat <- function(y) { .Call(`_energy_mvnEstat`, y) } diff -Nru r-cran-energy-1.7-4/R/util.R r-cran-energy-1.7-5/R/util.R --- r-cran-energy-1.7-4/R/util.R 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-energy-1.7-5/R/util.R 2018-06-17 15:33:15.000000000 +0000 @@ -0,0 +1,18 @@ +## util.R +## +## miscellaneous utilities +## + + +sortrank <- function(x) { + ## sort and rank data with one call to order() + ## faster than calling sort and rank separately + ## returns an object identical to: + ## list(x=sort(x), ix=order(x), r=rank(x, ties.method = "first")) + o <- order(x) + n <- length(o) + N <- 1:n + N[o] <- N + return(list(x=x[o], ix=o, r=N)) +} + diff -Nru r-cran-energy-1.7-4/README.md r-cran-energy-1.7-5/README.md --- r-cran-energy-1.7-4/README.md 2018-05-27 19:19:13.000000000 +0000 +++ r-cran-energy-1.7-5/README.md 2018-06-17 15:33:15.000000000 +0000 @@ -10,3 +10,5 @@ energy is named based on the analogy with potential energy in physics. See the references in the manual for more details. + +[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/energy)]https://cran.r-project.org/package=energy) \ No newline at end of file diff -Nru r-cran-energy-1.7-4/src/B-tree.cpp r-cran-energy-1.7-5/src/B-tree.cpp --- r-cran-energy-1.7-4/src/B-tree.cpp 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-energy-1.7-5/src/B-tree.cpp 2018-08-11 17:00:17.000000000 +0000 @@ -0,0 +1,132 @@ +#include +using namespace Rcpp; + + +// compute partial sum using binary search algorithm like AVL +// pre-compute powers of two to save repeated calculations + + +IntegerVector containerNodes (int y, IntegerVector pwr2, IntegerVector psum); +NumericVector gamma1_direct(IntegerVector y, NumericVector z); +IntegerVector p2sum(IntegerVector pwr2); +IntegerVector powers2 (int L); +NumericVector rowsumsDist(NumericVector x, NumericVector sorted, IntegerVector ranks); +IntegerVector subNodes (int y, IntegerVector pwr2, IntegerVector psum); + + + +// [[Rcpp::export]] +NumericVector Btree_sum (IntegerVector y, NumericVector z) { + // + // y is a permutation of the integers 1:n + // z is a numeric vector of length n + // compute gamma1(i) = sum(j 0) + gamma1(i) += sums(node); + } + } + return gamma1; +} + +IntegerVector containerNodes (int y, IntegerVector pwr2, IntegerVector psum) { + /* + * get the indices of all nodes of binary tree whose closed + * intervals contain integer y + */ + int i, L = pwr2.length(); + IntegerVector nodes(L); + + nodes(0) = y; + for (i = 0; i < L-1; i++) { + nodes(i+1) = ceil((double) y / pwr2(i)) + psum(i); + } + return nodes; +} + + +IntegerVector subNodes (int y, IntegerVector pwr2, IntegerVector psum) { + /* + * get indices of nodes whose intervals disjoint union is 1:y + */ + int L = psum.length(); + int idx, k, level, p2; + IntegerVector nodes(L); + + std::fill(nodes.begin(), nodes.end(), -1L); + + k = y; + for (level = L - 1; level > 0; level --) { + p2 = pwr2(level - 1); + if (k >= p2) { + // at index of left node plus an offset + idx = psum(level - 1) + (y / p2); + nodes(L - level - 1) = idx; + k -= p2; + } + } + if (k > 0) + nodes(L - 1) = y; + return nodes; +} + + +IntegerVector powers2 (int L) { + // (2, 4, 8, ..., 2^L, 2^(L+1)) + int k; + IntegerVector pwr2(L); + + pwr2(0) = 2; + for (k = 1; k < L; k++) + pwr2(k) = pwr2(k-1) * 2; + return pwr2; +} + +IntegerVector p2sum(IntegerVector pwr2) { + // computes the cumsum of 2^L, 2^(L-1), ..., 2^2, 2 + int i, L = pwr2.length(); + IntegerVector psum(L); + + std::fill(psum.begin(), psum.end(), pwr2(L-1)); + for (i = 1; i < L; i++) + psum(i) = psum(i-1) + pwr2(L-i-1); + return psum; +} + + +NumericVector gamma1_direct(IntegerVector y, NumericVector z) { + // utility: direct computation of the sum gamm1 + // for the purpose of testing and benchmarks + + int n = y.length(); + int i, j; + NumericVector gamma1(n); + + for (i = 1; i < n; i++) { + for (j = 0; j < i; j++) { + if (y(j) < y(i)) { + gamma1(i) += z(j); + } + } + } + return gamma1; +} diff -Nru r-cran-energy-1.7-4/src/ECl.cc r-cran-energy-1.7-5/src/ECl.cc --- r-cran-energy-1.7-4/src/ECl.cc 2018-05-27 19:49:23.000000000 +0000 +++ r-cran-energy-1.7-5/src/ECl.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,415 +0,0 @@ -/* - ECl.cc: energy package - - Author: Maria Rizzo - Created: 12 Dec 2002 - Revised: 4 Jan 2004 for R-1.8.1 energy package - Revised: 28 Jan 2004 - -*/ - -#include "ECl.h" -#include - -extern "C" { - -//implementation of Cl and ECl classes - -Cl::~Cl() { //destructor - int i; - if (isinit==1) { - Free(size); - Free(step); - Free(height); - Free(w); - for (i=0; i 0) { - I = -m1[0]-1; - J = -m2[0]-1; - combine(I, J); - w[0] = J; - w[1] = I; - i = 1; - while (nclus > k) { - I = (m1[i]<0)? -m1[i]-1 : w[m1[i]]; - J = (m2[i]<0)? -m2[i]-1 : w[m2[i]]; - combine(I, J); - i++; - w[i] = I; - } - } - nclus = clusters(); - return nclus; -} - -int Cl::init(int m, int *G, int base) { - //initialize cluster with group membership vector G - int g, i; - - init(m); - if (base > 0) - for (i=0; i0) { - cl[k++] = i; - m+= size[i]; - } - if (k!=nclus) error("nclus error"); - if (m!=n) error("total size error"); - return nclus; -} - -int Cl::clusters() { - //count the number of non-empty clusters, and set nclus - int i, k=0; - for (i=0; i0) k++; - if (k>n || k<1) error("nclus error"); - nclus = k; - return nclus; -} - -int Cl::combine(int I, int J) { - //merge Jth row into Ith row - //w is preserved - int j, m; - - if (I==J) error("c:I==J"); - if (I<0 || J<0 || I>=n || J>=n) error("c:I,J error"); - if (size[I]<=0 || size[J]<=0) error("c:empty cluster"); - if (nclus < 2) error("c:1 cluster"); - - m = size[I]; - for (j=0; j 0) { - for (j=0; j 0) - for (i=0; i 0) - for (j=0; j 0) - for (i=0; i n) return -1; - return 0; -} - -int Cl::proximity(int **p) { - //p[i][j] is 1 if (i,j) in same cluster - //p[i][j] is 0 if (i,j) in different clusters - int a, b, i, j, k; - for (i=0; i height[J]) { - I=w[1]; J=w[0]; - } - height[I]=Ed[I][J]; - combine(I, J); - update_Edst(I, J, dst, Ed); - return 0.0; - } - - if (nclus == 1) error("last cluster"); - if (nclus < 1) error("nclus<1"); - - I=J=-1; - find_minEdst(Ed, &I, &J); - - if (I>=0) { - if (J < I) { - p=I; I=J; J=p; - } - hI = hJ = 0.0; - if (step[I] > 0) hI = height[I]; - if (step[J] > 0) hJ = height[J]; - if (hJ < hI) { - p=I; I=J; J=p; - } - - height[I] = Ed[I][J]; - d=combine(I,J); - if(!d) error("merge_best_pair error"); - pE = E; - E = update_Edst(I, J, dst, Ed); - } - return E; -} - - -} //end extern "C" diff -Nru r-cran-energy-1.7-4/src/ECl.h r-cran-energy-1.7-5/src/ECl.h --- r-cran-energy-1.7-4/src/ECl.h 2018-05-27 19:49:23.000000000 +0000 +++ r-cran-energy-1.7-5/src/ECl.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,85 +0,0 @@ -/* - ECl.h: energy package - - Author: Maria Rizzo - Created: 12 Dec 2002 - Revised: 4 Jan 2004 for R-1.8.1 energy package - Revised: 28 Jan 2004 -*/ - -#define DLLEXPORT - -#include -#include -#include -#include - -//declarations for class Cl for hierarchical cluster analysis -//and ECl for e-clustering - -#define EPS DBL_EPSILON*20.0 -#define ONE 1.0+DBL_EPSILON*20.0 - -class DLLEXPORT Cl { - //for cluster analysis -protected: - int n; //number of observations - int nclus; //number of clusters - int it; //number of changes to clusters - int pstep1; - int pstep2; - int psize1; - int psize2; - int r1; - int r2; - int c1; - int c2; - int temp; - int isinit; //is memory allocated for arrays - int *size; //sizes of clusters - int *step; //step when cluster formed - double *height; //distance between merging clusters - int *w; - int **clus; //indices of observations - -public: - Cl(){isinit=0;}; //no memory is allocated, call init(n) - ~Cl(); - int init(int m); - int init(int n, int *m1, int *m2, int k); - int init(int m, int *G, int base); - - int dim() {return n;} - int len() {return nclus;} - int len(int i) {return size[i];} - int obs(int i, int j) {return clus[i][j];} - double ht(int i) {return height[i];} - int next_cl(int p) {p++;while(p - Created: Created: 12 Dec 2002 - Revised: 4 Jan 2004 for R-1.8.1 energy package - Revised: 28 Jan 2004 -*/ - -#include "R.h" -#include "Rmath.h" -#include "ECl.h" - -extern "C" { - -double **alloc_matrix(int r, int c); -void free_matrix(double **matrix, int r, int c); -void Emin_hclust(double *diss, int *en, int *merge, double *height, int *order); -void lower2square(double **dst, double *diss, int n); - -void Emin_hclust(double *diss, int *en, int *merge, double *height, int *order) -{ - // performs hierarchical E-clustering by minimum cluster E-distance - // diss lower.tri of n by n distance matrix, column order - // en sample size n - // merge (n-1) by 2 array as a vector in col order; see hclust object - // height vector length n-1; see hclust object - // order vector length n; see hclust object - - int i, step, I, J; - int n = (*en); - double e; - double *E; - double **Edst; - double **dst; - int *m1, *m2; - ECl c; //clustering object - - c.init(n); - - dst = alloc_matrix(n, n); - Edst = alloc_matrix(n, n); //E dist between clusters - E = Calloc(n, double); - m1 = Calloc(n-1, int); - m2 = Calloc(n-1, int); - - // convert lower.tri in vector form to square matrix - lower2square(dst, diss, n); - - //E-hierarchical clustering - - E[0] = c.init_Edst(dst, Edst); - step = 0; - while (c.len() > 1) - { - e = c.merge_minEdst(dst, Edst); - c.last_pair(&I, &J); - height[step] = c.ht(I); - step = c.last_merge(m1+step, m2+step); - E[step] = e; - } - - //compute the return values for merge and order - - E[n-1] = 0.0; - for (i=0; i + Author: Maria Rizzo Created: June 15, 2004 (development) Last Modified: April 5, 2008 */ diff -Nru r-cran-energy-1.7-4/src/energy.c r-cran-energy-1.7-5/src/energy.c --- r-cran-energy-1.7-4/src/energy.c 2018-05-27 19:49:23.000000000 +0000 +++ r-cran-energy-1.7-5/src/energy.c 2018-08-11 17:00:17.000000000 +0000 @@ -1,7 +1,7 @@ /* energy.c: energy package - Author: Maria Rizzo + Author: Maria Rizzo Created: 4 Jan 2004 Updated: 2 April 2008 some functions moved to utilities.c Updated: 25 August 2016 mvnEstat converted to c++ in mvnorm.cpp diff -Nru r-cran-energy-1.7-4/src/energy_init.c r-cran-energy-1.7-5/src/energy_init.c --- r-cran-energy-1.7-4/src/energy_init.c 2018-05-27 19:49:23.000000000 +0000 +++ r-cran-energy-1.7-5/src/energy_init.c 2018-08-11 17:00:17.000000000 +0000 @@ -8,7 +8,6 @@ /* .C calls */ extern void dCOV(void *, void *, void *, void *, void *, void *, void *); extern void dCOVtest(void *, void *, void *, void *, void *, void *, void *, void *); -extern void Emin_hclust(void *, void *, void *, void *, void *); extern void indepE(void *, void *, void *, void *, void *); extern void indepEtest(void *, void *, void *, void *, void *, void *, void *); extern void ksampleEtest(void *, void *, void *, void *, void *, void *, void *, void *, void *); @@ -23,11 +22,12 @@ extern SEXP _energy_projection(SEXP, SEXP); extern SEXP _energy_U_center(SEXP); extern SEXP _energy_U_product(SEXP, SEXP); +extern SEXP _energy_Btree_sum(SEXP, SEXP); +extern SEXP _energy_kgroups_start(SEXP, SEXP, SEXP, SEXP, SEXP); static const R_CMethodDef CEntries[] = { {"dCOV", (DL_FUNC) &dCOV, 7}, {"dCOVtest", (DL_FUNC) &dCOVtest, 8}, - {"Emin_hclust", (DL_FUNC) &Emin_hclust, 5}, {"indepE", (DL_FUNC) &indepE, 5}, {"indepEtest", (DL_FUNC) &indepEtest, 7}, {"ksampleEtest", (DL_FUNC) &ksampleEtest, 9}, @@ -36,14 +36,16 @@ }; static const R_CallMethodDef CallEntries[] = { - {"_energy_D_center", (DL_FUNC) &_energy_D_center, 1}, - {"_energy_dcovU_stats", (DL_FUNC) &_energy_dcovU_stats, 2}, - {"_energy_mvnEstat", (DL_FUNC) &_energy_mvnEstat, 1}, - {"_energy_partial_dcor", (DL_FUNC) &_energy_partial_dcor, 3}, - {"_energy_partial_dcov", (DL_FUNC) &_energy_partial_dcov, 3}, - {"_energy_projection", (DL_FUNC) &_energy_projection, 2}, - {"_energy_U_center", (DL_FUNC) &_energy_U_center, 1}, - {"_energy_U_product", (DL_FUNC) &_energy_U_product, 2}, + {"_energy_D_center", (DL_FUNC) &_energy_D_center, 1}, + {"_energy_dcovU_stats", (DL_FUNC) &_energy_dcovU_stats, 2}, + {"_energy_mvnEstat", (DL_FUNC) &_energy_mvnEstat, 1}, + {"_energy_partial_dcor", (DL_FUNC) &_energy_partial_dcor, 3}, + {"_energy_partial_dcov", (DL_FUNC) &_energy_partial_dcov, 3}, + {"_energy_projection", (DL_FUNC) &_energy_projection, 2}, + {"_energy_U_center", (DL_FUNC) &_energy_U_center, 1}, + {"_energy_U_product", (DL_FUNC) &_energy_U_product, 2}, + {"_energy_Btree_sum", (DL_FUNC) &_energy_Btree_sum, 2}, + {"_energy_kgroups_start", (DL_FUNC) &_energy_kgroups_start, 5}, {NULL, NULL, 0} }; diff -Nru r-cran-energy-1.7-4/src/kgroups.cpp r-cran-energy-1.7-5/src/kgroups.cpp --- r-cran-energy-1.7-4/src/kgroups.cpp 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-energy-1.7-5/src/kgroups.cpp 2018-08-11 17:00:17.000000000 +0000 @@ -0,0 +1,126 @@ +#include +using namespace Rcpp; + +int kgroups_update(NumericMatrix x, int k, IntegerVector clus, + IntegerVector sizes, NumericVector within, bool distance); +List kgroups_start(NumericMatrix x, int k, IntegerVector clus, + int iter_max, bool distance); + +int kgroups_update(NumericMatrix x, int k, IntegerVector clus, + IntegerVector sizes, NumericVector w, bool distance) { + /* + * k-groups one pass through sample moving one point at a time + * x: data matrix or distance + * k: number of clusters + * clus: clustering vector clus(i)==j ==> x_i is in cluster j + * sizes: cluster sizes + * within: vector of within cluster dispersions + * distance: true if x is distance matrix + * update clus, sizes, and withins + * return count = number of points moved + */ + + int n = x.nrow(), d = x.ncol(); + int i, j, I, J, ix, nI, nJ; + NumericVector rowdst(k), e(k); + int best, count = 0; + double dsum, dif; + + for (ix = 0; ix < n; ix++) { + I = clus(ix); + nI = sizes(I); + if (nI > 1) { + // calculate the E-distances of this point to each cluster + rowdst.fill(0.0); + for (i = 0; i < n; i++) { + J = clus(i); + if (distance == true) { + rowdst(J) += x(ix, i); + } else { + dsum = 0.0; + for (j = 0; j < d; j++) { + dif = x(ix, j) - x(i, j); + dsum += dif * dif; + } + rowdst(J) += sqrt(dsum); + } + } + + for (J = 0; J < k; J++) { + nJ = sizes(J); + e(J) = (2.0 / (double) nJ) * (rowdst(J) - w(J)); + } + + best = Rcpp::which_min(e); + if (best != I) { + // move this point and update + nI = sizes(I); + nJ = sizes(best); + w(best) = (((double) nJ) * w(best) + rowdst(best)) / ((double) (nJ + 1)); + w(I) = (((double) nI) * w(I) - rowdst(I)) / ((double) (nI - 1)); + clus(ix) = best; + sizes(I) = nI - 1; + sizes(best) = nJ + 1; + count ++; // number of moves + } + } + } + + return count; +} + + + +// [[Rcpp::export(.kgroups_start)]] +List kgroups_start(NumericMatrix x, int k, IntegerVector clus, + int iter_max, bool distance) { + // k-groups clustering with initial clustering vector clus + // up to iter_max iterations of n possible moves each + // distance: true if x is distance matrix + NumericVector within(k, 0.0); + IntegerVector sizes(k, 0); + double dif, dsum; + int I, J, h, i, j; + int n = x.nrow(), d = x.ncol(); + + for (i = 0; i < n; i++) { + I = clus(i); + sizes(I)++; + for (j = 0; j < i; j++) { + J = clus(j); + if (I == J) { + if (distance == true) { + within(I) += x(i, j); + } else { + dsum = 0.0; + for (h = 0; h < d; h++) { + dif = x(i, h) - x(j, h); + dsum += dif * dif; + } + within(I) += sqrt(dsum); + } + } + } + } + for (I = 0; I < k; I++) + within(I) /= ((double) sizes(I)); + + int it = 1, count = 1; + count = kgroups_update(x, k, clus, sizes, within, distance); + + while (it < iter_max && count > 0) { + count = kgroups_update(x, k, clus, sizes, within, distance); + it++; + } + double W = Rcpp::sum(within); + + return List::create( + _["within"] = within, + _["W"] = W, + _["sizes"] = sizes, + _["cluster"] = clus, + _["iterations"] = it, + _["count"] = count); +} + + diff -Nru r-cran-energy-1.7-4/src/RcppExports.cpp r-cran-energy-1.7-5/src/RcppExports.cpp --- r-cran-energy-1.7-4/src/RcppExports.cpp 2018-05-27 19:49:23.000000000 +0000 +++ r-cran-energy-1.7-5/src/RcppExports.cpp 2018-08-11 17:00:17.000000000 +0000 @@ -5,6 +5,18 @@ using namespace Rcpp; +// Btree_sum +NumericVector Btree_sum(IntegerVector y, NumericVector z); +RcppExport SEXP _energy_Btree_sum(SEXP ySEXP, SEXP zSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< IntegerVector >::type y(ySEXP); + Rcpp::traits::input_parameter< NumericVector >::type z(zSEXP); + rcpp_result_gen = Rcpp::wrap(Btree_sum(y, z)); + return rcpp_result_gen; +END_RCPP +} // D_center NumericMatrix D_center(NumericMatrix Dx); RcppExport SEXP _energy_D_center(SEXP DxSEXP) { @@ -39,6 +51,21 @@ return rcpp_result_gen; END_RCPP } +// kgroups_start +List kgroups_start(NumericMatrix x, int k, IntegerVector clus, int iter_max, bool distance); +RcppExport SEXP _energy_kgroups_start(SEXP xSEXP, SEXP kSEXP, SEXP clusSEXP, SEXP iter_maxSEXP, SEXP distanceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericMatrix >::type x(xSEXP); + Rcpp::traits::input_parameter< int >::type k(kSEXP); + Rcpp::traits::input_parameter< IntegerVector >::type clus(clusSEXP); + Rcpp::traits::input_parameter< int >::type iter_max(iter_maxSEXP); + Rcpp::traits::input_parameter< bool >::type distance(distanceSEXP); + rcpp_result_gen = Rcpp::wrap(kgroups_start(x, k, clus, iter_max, distance)); + return rcpp_result_gen; +END_RCPP +} // mvnEstat double mvnEstat(NumericMatrix y); RcppExport SEXP _energy_mvnEstat(SEXP ySEXP) {