diff -Nru r-cran-formula-1.2-0/debian/changelog r-cran-formula-1.2-1/debian/changelog --- r-cran-formula-1.2-0/debian/changelog 2015-07-30 07:39:00.000000000 +0000 +++ r-cran-formula-1.2-1/debian/changelog 2015-07-30 07:39:00.000000000 +0000 @@ -1,3 +1,11 @@ +r-cran-formula (1.2-1-1) unstable; urgency=low + + * New upstream release + + * debian/control: Set Build-Depends: to current R version + + -- Dirk Eddelbuettel Wed, 08 Apr 2015 22:13:00 -0500 + r-cran-formula (1.2-0-1) unstable; urgency=low * New upstream release diff -Nru r-cran-formula-1.2-0/debian/control r-cran-formula-1.2-1/debian/control --- r-cran-formula-1.2-0/debian/control 2015-07-30 07:39:00.000000000 +0000 +++ r-cran-formula-1.2-1/debian/control 2015-07-30 07:39:00.000000000 +0000 @@ -2,7 +2,7 @@ Section: gnu-r Priority: optional Maintainer: Dirk Eddelbuettel -Build-Depends: debhelper (>= 7.0.0), r-base-dev (>= 3.1.2), cdbs +Build-Depends: debhelper (>= 7.0.0), r-base-dev (>= 3.1.3), cdbs Standards-Version: 3.9.6 Package: r-cran-formula diff -Nru r-cran-formula-1.2-0/DESCRIPTION r-cran-formula-1.2-1/DESCRIPTION --- r-cran-formula-1.2-0/DESCRIPTION 2015-01-20 00:17:15.000000000 +0000 +++ r-cran-formula-1.2-1/DESCRIPTION 2015-04-07 16:42:14.000000000 +0000 @@ -1,6 +1,6 @@ Package: Formula -Version: 1.2-0 -Date: 2015-01-20 +Version: 1.2-1 +Date: 2015-04-07 Title: Extended Model Formulas Description: Infrastructure for extended formulas with multiple parts on the right-hand side and/or multiple responses on the left-hand side. @@ -8,10 +8,10 @@ person(given = "Yves", family = "Croissant", role = "aut", email = "Yves.Croissant@univ-reunion.fr")) Depends: R (>= 2.0.0), stats License: GPL-2 | GPL-3 -Packaged: 2015-01-19 23:25:12 UTC; zeileis +Packaged: 2015-04-07 14:58:46 UTC; zeileis Author: Achim Zeileis [aut, cre], Yves Croissant [aut] Maintainer: Achim Zeileis NeedsCompilation: no Repository: CRAN -Date/Publication: 2015-01-20 01:17:15 +Date/Publication: 2015-04-07 18:42:14 Binary files /tmp/rqGwZJiGg_/r-cran-formula-1.2-0/inst/doc/Formula.pdf and /tmp/SdIeV7SkHh/r-cran-formula-1.2-1/inst/doc/Formula.pdf differ diff -Nru r-cran-formula-1.2-0/man/model.frame.Formula.Rd r-cran-formula-1.2-1/man/model.frame.Formula.Rd --- r-cran-formula-1.2-0/man/model.frame.Formula.Rd 2015-01-05 20:25:01.000000000 +0000 +++ r-cran-formula-1.2-1/man/model.frame.Formula.Rd 2015-04-07 11:05:44.000000000 +0000 @@ -24,7 +24,7 @@ model.part(object, \dots) \method{model.part}{Formula}(object, data, lhs = 0, rhs = 0, - drop = FALSE, terms = FALSE, dot = "separate", \dots) + drop = FALSE, terms = FALSE, dot = NULL, \dots) } \arguments{ @@ -71,6 +71,17 @@ (and optionally the \code{lhs}). Also, it returns either a \code{data.frame} with multiple columns or a single column (dropping the \code{data.frame} property) depending on whether multiple responses are employed or not. + + If the the formula contains one or more dots (\code{.}), some care has to be + taken to process these correctly, especially if the LHS contains transformartions + (such as \code{log}, \code{sqrt}, \code{cbind}, \code{Surv}, etc.). Calling the + \code{terms} method with the original data (untransformed, if any) resolves + all dots (by default separately for each part, otherwise sequentially) and also + includes the original and updated formula as part of the terms. When calling + \code{model.part} either the original untransformed data should be provided + along with a \code{dot} specification or the transformed \code{model.frame} + from the same formula without another \code{dot} specification (in which + case the \code{dot} is inferred from the \code{terms} of the \code{model.frame}). } \references{ @@ -150,14 +161,19 @@ model.matrix(F3, data = mf3, lhs = 2) ## set up Formula with multiple '.' -F3 <- Formula(y1 | y2 | y3 ~ . - x3 - x4 | .) -mf3 <- model.frame(F3, data = dat) +F3 <- Formula(y1 | y2 | log(y3) ~ . - x3 - x4 | .) +## process both '.' separately (default) +mf3 <- model.frame(F3, data = dat, dot = "separate") +## only x1-x2 +model.part(F3, data = mf3, rhs = 1) +## all x1-x4 +model.part(F3, data = mf3, rhs = 2) +## process the '.' sequentially, i.e., the second RHS conditional on the first +mf3 <- model.frame(F3, data = dat, dot = "sequential") ## only x1-x2 model.part(F3, data = mf3, rhs = 1) -## all x1-x4 because '.' is processed separately (default) -model.part(F3, data = mf3, rhs = 2, dot = "separate") -## only x3-x4 because '.' is processed sequentally after first RHS -model.part(F3, data = mf3, rhs = 2, dot = "sequential") +## only x3-x4 +model.part(F3, data = mf3, rhs = 2) ############################## ## Process multiple offsets ## diff -Nru r-cran-formula-1.2-0/MD5 r-cran-formula-1.2-1/MD5 --- r-cran-formula-1.2-0/MD5 2015-01-20 00:17:15.000000000 +0000 +++ r-cran-formula-1.2-1/MD5 2015-04-07 16:42:14.000000000 +0000 @@ -1,15 +1,15 @@ -7df349ee461e357abdd9ece8d1f2c88c *DESCRIPTION +b685fef804c60e3271ee695d1a95bdd5 *DESCRIPTION d9799c7b3b68df94f6b84382ffecd5a5 *NAMESPACE -5725cbadb8a3dca72b4fefd23cdaf551 *NEWS -59a01bc315355085ee56539f445a9f79 *R/Formula.R +7d223bccb745d827e064c89124094484 *NEWS +a6841e85b8146b5e173682e10467e40c *R/Formula.R 089fe9adb437d8ce78f39c0c32c6b432 *build/vignette.rds cecc325558bfb9e8a1396e05dbcf9c23 *inst/CITATION ae4c4bf2ff5a0d0adbdfbb30bc0d23bf *inst/doc/Formula.R 0e62223f1646a8021221d03e42ea4a26 *inst/doc/Formula.Rnw -f8a456774142bcc88ca3415c0c80e603 *inst/doc/Formula.pdf +625e607ef5af96950395b102f60f4605 *inst/doc/Formula.pdf 30b0d76d32423f22294bf9aa4a221371 *man/Formula.Rd -d4da3acddb7d7f9e31cead87be51ab9a *man/model.frame.Formula.Rd -7615e266c31ca23538839bb10f7d1662 *tests/Examples/Formula-Ex.Rout.save +fb0a0637262cece095aad2cae419b646 *man/model.frame.Formula.Rd +7a95fab74f58d244533dbb6aa581c239 *tests/Examples/Formula-Ex.Rout.save 0e62223f1646a8021221d03e42ea4a26 *vignettes/Formula.Rnw e99ed6af2b803dc98ec4c6b2f6c042b4 *vignettes/Formula.Rout.save 4bab7df5607b24ff56c23a73e67c625e *vignettes/Formula.bib diff -Nru r-cran-formula-1.2-0/NEWS r-cran-formula-1.2-1/NEWS --- r-cran-formula-1.2-0/NEWS 2015-01-05 20:11:49.000000000 +0000 +++ r-cran-formula-1.2-1/NEWS 2015-04-07 10:47:48.000000000 +0000 @@ -1,3 +1,13 @@ +Changes in Version 1.2-1 + + o Bug fix for formulas with transformed variables on the left-hand + side (e.g., cbind(), log(), or Surv()) and one ore more '.' on the + right-hand side. The terms() and hence the model.frame() now work + smoothly. When using model.part() the same Formula (plus 'dot' + argument) has to be supplied when preparing the model.frame() and + the model.part(). + + Changes in Version 1.2-0 o Extended processing of formulas with one or more '.' on the diff -Nru r-cran-formula-1.2-0/R/Formula.R r-cran-formula-1.2-1/R/Formula.R --- r-cran-formula-1.2-0/R/Formula.R 2015-01-05 20:22:46.000000000 +0000 +++ r-cran-formula-1.2-1/R/Formula.R 2015-04-07 10:53:01.000000000 +0000 @@ -83,74 +83,42 @@ return(rval) } -terms.Formula <- function(x, ..., lhs = NULL, rhs = NULL, dot = "separate") { - - ## simplify a Formula to a formula that can be processed with - ## terms/model.frame etc. - simplify_to_formula <- function(Formula, lhs = NULL, rhs = NULL) { - - ## get desired subset as formula and Formula - form <- formula(Formula, lhs = lhs, rhs = rhs) - Form <- Formula(form) - - ## convenience functions for checking extended features - is_lhs_extended <- function(Formula) { - ## check for multiple parts - if(length(attr(Formula, "lhs")) > 1L) { - return(TRUE) - } else { - ## and multiple responses - if(length(attr(Formula, "lhs")) < 1L) return(FALSE) - return(length(attr(terms(paste_formula(NULL, - attr(Formula, "lhs"), rsep = "+")), "term.labels")) > 1L) - } - } - - is_rhs_extended <- function(Formula) { - ## check for muliple parts - length(attr(Formula, "rhs")) > 1L - } - - ## simplify (if necessary) - ext_lhs <- is_lhs_extended(Form) - if(ext_lhs | is_rhs_extended(Form)) { - form <- if(ext_lhs) { - if(length(attr(Form, "rhs")) == 1L & identical(attr(Form, "rhs")[[1L]], 0)) { - paste_formula(NULL, attr(Form, "lhs"), rsep = "+") - } else { - paste_formula(NULL, c(attr(Form, "lhs"), attr(Form, "rhs")), rsep = "+") - } - } else { - paste_formula(attr(Form, "lhs"), attr(Form, "rhs"), rsep = "+") - } - } - - ## re-attach original environment and return - environment(form) <- environment(Formula) - return(form) - } - - ## check whether formula has a dot - has_dot <- function(formula) inherits(try(terms(formula), silent = TRUE), "try-error") - +terms.Formula <- function(x, ..., lhs = NULL, rhs = NULL, dot = "separate") +{ ## simplify to standard formula form <- simplify_to_formula(x, lhs = lhs, rhs = rhs) ## if necessary try to expand/update/simplify formula parts with dot if(has_dot(form)) { + x_orig <- x dot <- match.arg(dot, c("separate", "sequential")) + + ## lhs and rhs calls ll <- formula(x, rhs = 0L, collapse = TRUE)[[2L]] rr <- attr(x, "rhs") + + ## update and simplify again for(i in seq_along(rr)) { if(dot == "sequential" && i > 1L) ll <- c_formula(ll, rr[[i - 1L]], sep = "+") - fi <- paste_formula(NULL, c_formula(rr[[i]], ll, sep = "-")) - attr(x, "rhs")[[i]] <- update(formula(terms(fi, ...)), . ~ .)[[3L]] + fi <- paste_formula(ll, rr[[i]]) #probably better than:# paste_formula(NULL, c_formula(rr[[i]], ll, sep = "-")) + rr[[i]] <- update(formula(terms(fi, ...)), . ~ .)[[3L]] } + attr(x, "rhs") <- rr form <- simplify_to_formula(x, lhs = lhs, rhs = rhs) + + ## call traditional terms() + mt <- terms(form, ...) + + ## store updating for future reference (e.g., in model.part) + attr(mt, "Formula_with_dot") <- x_orig + attr(mt, "Formula_without_dot") <- x + attr(mt, "dot") <- dot + } else { + ## call traditional terms() + mt <- terms(form, ...) } - ## call traditional terms() - terms(form, ...) + return(mt) } model.frame.Formula <- function(formula, data = NULL, ..., lhs = NULL, rhs = NULL, dot = "separate") @@ -174,7 +142,7 @@ NextMethod() } -model.part.Formula <- function(object, data, lhs = 0, rhs = 0, drop = FALSE, terms = FALSE, dot = "separate", ...) { +model.part.Formula <- function(object, data, lhs = 0, rhs = 0, drop = FALSE, terms = FALSE, dot = NULL, ...) { ## *hs = NULL: keep all parts if(is.null(lhs)) lhs <- 1L:length(attr(object, "lhs")) @@ -184,6 +152,26 @@ isTRUE(all.equal(as.numeric(rhs), rep(0, length(rhs))))) stop("Either some 'lhs' or 'rhs' has to be selected.") + if(is.null(dot)) { + if(is.null(attr(attr(data, "terms"), "dot"))) { + dot <- "separate" + } else { + dot <- attr(attr(data, "terms"), "dot") + } + } else { + dot <- match.arg(dot, c("separate", "sequential")) + } + + ## + if(has_dot(object) && + !is.null(attr(data, "terms")) && + all(c("Formula_with_dot", "Formula_without_dot", "dot") %in% names(attributes(attr(data, "terms")))) && + dot == attr(attr(data, "terms"), "dot") && + simplify_to_formula(object, lhs = lhs, rhs = rhs) == simplify_to_formula(attr(attr(data, "terms"), "Formula_with_dot"), lhs = lhs, rhs = rhs) + ) { + object <- attr(attr(data, "terms"), "Formula_without_dot") + } + ## construct auxiliary terms object mt <- terms(object, lhs = lhs, rhs = rhs, dot = dot, data = data) @@ -367,3 +355,50 @@ c_formula(lval, rval, sep = "~") } +## simplify a Formula to a formula that can be processed with +## terms/model.frame etc. +simplify_to_formula <- function(Formula, lhs = NULL, rhs = NULL) { + + ## get desired subset as formula and Formula + form <- formula(Formula, lhs = lhs, rhs = rhs) + Form <- Formula(form) + + ## convenience functions for checking extended features + is_lhs_extended <- function(Formula) { + ## check for multiple parts + if(length(attr(Formula, "lhs")) > 1L) { + return(TRUE) + } else { + ## and multiple responses + if(length(attr(Formula, "lhs")) < 1L) return(FALSE) + return(length(attr(terms(paste_formula(NULL, + attr(Formula, "lhs"), rsep = "+")), "term.labels")) > 1L) + } + } + + is_rhs_extended <- function(Formula) { + ## check for muliple parts + length(attr(Formula, "rhs")) > 1L + } + + ## simplify (if necessary) + ext_lhs <- is_lhs_extended(Form) + if(ext_lhs | is_rhs_extended(Form)) { + form <- if(ext_lhs) { + if(length(attr(Form, "rhs")) == 1L & identical(attr(Form, "rhs")[[1L]], 0)) { + paste_formula(NULL, attr(Form, "lhs"), rsep = "+") + } else { + paste_formula(NULL, c(attr(Form, "lhs"), attr(Form, "rhs")), rsep = "+") + } + } else { + paste_formula(attr(Form, "lhs"), attr(Form, "rhs"), rsep = "+") + } + } + + ## re-attach original environment and return + environment(form) <- environment(Formula) + return(form) +} + +## check whether formula has a dot (FIXME: can other problems than just '.' occur?) +has_dot <- function(formula) inherits(try(terms(formula), silent = TRUE), "try-error") diff -Nru r-cran-formula-1.2-0/tests/Examples/Formula-Ex.Rout.save r-cran-formula-1.2-1/tests/Examples/Formula-Ex.Rout.save --- r-cran-formula-1.2-0/tests/Examples/Formula-Ex.Rout.save 2015-01-05 20:28:27.000000000 +0000 +++ r-cran-formula-1.2-1/tests/Examples/Formula-Ex.Rout.save 2015-04-07 11:13:34.000000000 +0000 @@ -263,20 +263,28 @@ > > ## set up Formula with multiple '.' -> F3 <- Formula(y1 | y2 | y3 ~ . - x3 - x4 | .) -> mf3 <- model.frame(F3, data = dat) +> F3 <- Formula(y1 | y2 | log(y3) ~ . - x3 - x4 | .) +> ## process both '.' separately (default) +> mf3 <- model.frame(F3, data = dat, dot = "separate") > ## only x1-x2 > model.part(F3, data = mf3, rhs = 1) x1 x2 2 0.26 0.46 3 0.03 0.37 -> ## all x1-x4 because '.' is processed separately (default) -> model.part(F3, data = mf3, rhs = 2, dot = "separate") +> ## all x1-x4 +> model.part(F3, data = mf3, rhs = 2) x1 x2 x3 x4 2 0.26 0.46 a a 3 0.03 0.37 b b -> ## only x3-x4 because '.' is processed sequentally after first RHS -> model.part(F3, data = mf3, rhs = 2, dot = "sequential") +> ## process the '.' sequentially, i.e., the second RHS conditional on the first +> mf3 <- model.frame(F3, data = dat, dot = "sequential") +> ## only x1-x2 +> model.part(F3, data = mf3, rhs = 1) + x1 x2 +2 0.26 0.46 +3 0.03 0.37 +> ## only x3-x4 +> model.part(F3, data = mf3, rhs = 2) x3 x4 2 a a 3 b b @@ -312,7 +320,7 @@ > ### > options(digits = 7L) > base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n") -Time elapsed: 0.252 0.008 0.261 0 0 +Time elapsed: 0.3 0.004 0.306 0 0 > grDevices::dev.off() null device 1