diff -Nru r-cran-msm-0.9.7/ChangeLog r-cran-msm-1.0/ChangeLog --- r-cran-msm-0.9.7/ChangeLog 2010-05-18 14:03:53.000000000 +0000 +++ r-cran-msm-1.0/ChangeLog 2010-11-24 15:57:59.000000000 +0000 @@ -1,3 +1,43 @@ +2010-11-24 Chris Jackson + + * R/outputs.R: Line types, colours and widths can be configured in + plotprog.msm, plot.survfit.msm and plot.prevalence.msm. + + * DESCRIPTION: Version 1.0 finally released to accompany the + forthcoming paper on msm in Journal of Statistical Software. + +2010-11-12 Chris Jackson + + * man/msm.Rd: Documentation for the return value of msm corrected + to state that the "logbaseline" components of result matrices are + evaluated with covariates set to zero if center=FALSE, and at + their means if center=TRUE. Previously it stated that + "logbaseline" was evaluated at the mean covariate values. + + * R/msm.R(msm), R/outputs.R(print.msm): Return the "baseline" and + "sojourn" components of result matrices at covariate values of 0 + if center=0. Thanks to Kenneth Gundersen for the report. + +2010-11-01 Chris Jackson + + * R/boot.R(bootdata.trans.msm): Fix for bootstrapping when some + levels of a factor do not appear in a bootstrapped dataset. + Thanks to Gale Bravener for the report. + +2010-10-29 Chris Jackson + + * R/msm.R(msm.check.times): Fixed a bug in comparing small floating + point numbers to zero, and added "may be" to warning about data + inconsistent with transition matrix, due to uncertainty about + these comparisons. + +2010-09-22 Chris Jackson + + * R/msm.R(msm.check.times): Added warning for multiple observations + at the same time on the same person with different states, which + leads to zero likelihood and "cannot be evaluated at initial + values" message. + 2010-05-18 Chris Jackson * DESCRIPTION: Version 0.9.7 released diff -Nru r-cran-msm-0.9.7/debian/changelog r-cran-msm-1.0/debian/changelog --- r-cran-msm-0.9.7/debian/changelog 2010-12-03 18:12:51.000000000 +0000 +++ r-cran-msm-1.0/debian/changelog 2010-12-01 08:43:19.000000000 +0000 @@ -1,3 +1,12 @@ +r-cran-msm (1.0-1) unstable; urgency=low + + * New upstream version + * Standards-Version: 3.9.1 + * debian/rules: use $(package) variable + * debian/source/format: 3.0 (quilt) + + -- Andreas Tille Wed, 01 Dec 2010 09:41:05 +0100 + r-cran-msm (0.9.7-1) unstable; urgency=low * New upstream version diff -Nru r-cran-msm-0.9.7/debian/control r-cran-msm-1.0/debian/control --- r-cran-msm-0.9.7/debian/control 2010-12-03 18:12:51.000000000 +0000 +++ r-cran-msm-1.0/debian/control 2010-12-01 08:45:41.000000000 +0000 @@ -5,7 +5,7 @@ Uploaders: Andreas Tille DM-Upload-Allowed: yes Build-Depends: debhelper (>= 7.0), cdbs, r-base-dev, r-cran-mvtnorm, r-cran-survival -Standards-Version: 3.8.4 +Standards-Version: 3.9.1 Homepage: http://cran.r-project.org/web/packages/msm/ Vcs-Browser: http://svn.debian.org/wsvn/debian-science/packages/R/r-cran-msm/trunk/?rev=0&sc=0 Vcs-Svn: svn://svn.debian.org/svn/debian-science/packages/R/r-cran-msm/trunk/ diff -Nru r-cran-msm-0.9.7/debian/README.Debian r-cran-msm-1.0/debian/README.Debian --- r-cran-msm-0.9.7/debian/README.Debian 2010-12-03 18:12:51.000000000 +0000 +++ r-cran-msm-1.0/debian/README.Debian 2010-12-01 08:44:34.000000000 +0000 @@ -1,10 +1,13 @@ R msm for Debian ---------------- -While this package was builded as precondition for R surveillance - -a R package which supports epidemiological research - the authors -are basically using msm also for this very purpose. Please see the -comment of Ross Boylan +This package can be tested by loading it into R with the command +‘library(msm)’ in order to confirm its integrity. + +This package was builded as precondition for R surveillance - a R +package which supports epidemiological research - the authors are +basically using msm also for this very purpose. Please see the comment +of Ross Boylan http://lists.debian.org/debian-science/2009/03/msg00011.html diff -Nru r-cran-msm-0.9.7/debian/rules r-cran-msm-1.0/debian/rules --- r-cran-msm-0.9.7/debian/rules 2010-12-03 18:12:51.000000000 +0000 +++ r-cran-msm-1.0/debian/rules 2010-12-01 08:42:21.000000000 +0000 @@ -5,15 +5,13 @@ include /usr/share/R/debian/r-cran.mk -pkg:=r-$(debRreposname)-$(cranName) - clean:: - echo "Clean up for package $(pkg)" + echo "Clean up for package $(package)" rm -f src/msm.so -install/$(pkg):: - chmod 644 debian/$(pkg)/usr/lib/R/site-library/msm/NEWS +install/$(package):: + chmod 644 debian/$(package)/usr/lib/R/site-library/msm/NEWS # Require a number equal or superior than the R version the package was built with. - echo "R:Depends=r-base-core (>= $(shell R --version | head -n1 | perl -ne 'print / +([0-9]\.[0-9]+\.[0-9])/'))" >> debian/$(pkg).substvars + echo "R:Depends=r-base-core (>= $(shell R --version | head -n1 | perl -ne 'print / +([0-9]\.[0-9]+\.[0-9])/'))" >> debian/$(package).substvars diff -Nru r-cran-msm-0.9.7/debian/source/format r-cran-msm-1.0/debian/source/format --- r-cran-msm-0.9.7/debian/source/format 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-msm-1.0/debian/source/format 2010-12-03 18:12:51.000000000 +0000 @@ -0,0 +1 @@ +3.0 (quilt) diff -Nru r-cran-msm-0.9.7/DESCRIPTION r-cran-msm-1.0/DESCRIPTION --- r-cran-msm-0.9.7/DESCRIPTION 2010-05-18 15:42:49.000000000 +0000 +++ r-cran-msm-1.0/DESCRIPTION 2010-11-25 08:22:47.000000000 +0000 @@ -1,18 +1,19 @@ Package: msm -Version: 0.9.7 -Date: 2010-05-18 +Version: 1.0 +Date: 2010-11-24 Title: Multi-state Markov and hidden Markov models in continuous time Author: Christopher Jackson Maintainer: Christopher Jackson Description: Functions for fitting general continuous-time Markov and hidden Markov multi-state models to longitudinal data. Both Markov transition rates and the hidden Markov output process - can be modelled in terms of covariates. A variety of - observation schemes are supported, including processes observed - at arbitrary times, completely-observed processes, and censored - states. + can be modelled in terms of covariates, which may be constant + or piecewise-constant in time. A variety of observation + schemes are supported, including processes observed at + arbitrary times (panel data), continuously-observed processes, + and censored states. License: GPL (>= 2) Imports: survival,mvtnorm -Packaged: 2010-05-18 14:22:01 UTC; chris +Packaged: 2010-11-24 16:38:55 UTC; chris Repository: CRAN -Date/Publication: 2010-05-18 15:42:49 +Date/Publication: 2010-11-25 08:22:47 diff -Nru r-cran-msm-0.9.7/inst/CITATION r-cran-msm-1.0/inst/CITATION --- r-cran-msm-0.9.7/inst/CITATION 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-msm-1.0/inst/CITATION 2010-11-17 21:36:50.000000000 +0000 @@ -0,0 +1,19 @@ +citHeader("To cite msm in publications use:") + +citEntry(entry = "Article", + title = "Multi-State Models for Panel Data: The {msm} Package for {R}", + author = personList(as.person("Christopher H. Jackson")), + journal = "Journal of Statistical Software", + year = "2011", + volume = "38", + number = "8", + pages = "1--29", + url = "http://www.jstatsoft.org/v38/i08/", + + textVersion = + paste("Christopher H. Jackson (2011).", + "Multi-State Models for Panel Data: The msm Package for R.", + "Journal of Statistical Software, 38(8), 1-29.", + "URL http://www.jstatsoft.org/v38/i08/.") +) + Binary files /tmp/0XeIaO5geq/r-cran-msm-0.9.7/inst/doc/msm-manual.pdf and /tmp/7KRXmZPqBW/r-cran-msm-1.0/inst/doc/msm-manual.pdf differ diff -Nru r-cran-msm-0.9.7/inst/NEWS r-cran-msm-1.0/inst/NEWS --- r-cran-msm-0.9.7/inst/NEWS 2010-05-18 14:03:53.000000000 +0000 +++ r-cran-msm-1.0/inst/NEWS 2010-11-24 15:57:59.000000000 +0000 @@ -3,6 +3,26 @@ USER-VISIBLE CHANGES (for detailed changes see ChangeLog in the source package) -------------------- +Version 1.0 (2010-11-24) +------------- + +o 1.0 release to accompany the forthcoming Journal of Statistical + Software paper about msm. + +o Line types, colours and widths can be configured in plotprog.msm, + plot.survfit.msm and plot.prevalence.msm. + +o Added warning for multiple observations at the same time on the same + person with different states, which leads to zero likelihood and + the dreaded "cannot be evaluated at initial values" message. + +o If center=FALSE, the $Qmatrices$baseline, $Ematrices$baseline and + $sojourn components of msm objects are evaluated with covariate + values of 0, for consistency with "logbaseline". Documentation and + printed output corrected accordingly. These issues caused problems + with viterbi.msm. Thanks to Kenneth Gundersen for the report. + + Version 0.9.7 (2010-05-18) ------------- @@ -13,8 +33,8 @@ Version 0.9.6 (2010-02-09) ------------- -o Bug fix which caused occasional wrong likelihood calculations for - models with "exacttimes". Thanks to Brian Tom for the report. +o Fix of a bug which caused occasional wrong likelihood calculations + for models with "exacttimes". Thanks to Brian Tom for the report. o Fix for "NA in probability vector" error in pearson.msm. Thanks to Wen-Wen Yang for the report. diff -Nru r-cran-msm-0.9.7/man/boot.msm.Rd r-cran-msm-1.0/man/boot.msm.Rd --- r-cran-msm-0.9.7/man/boot.msm.Rd 2008-03-11 18:16:16.000000000 +0000 +++ r-cran-msm-1.0/man/boot.msm.Rd 2010-11-19 18:28:31.000000000 +0000 @@ -23,10 +23,11 @@ \details{ The bootstrap datasets are computed by resampling independent transitions between pairs of states (for non-hidden models without - censoring), or independent patient series (for hidden models or + censoring), or independent individual series (for hidden models or models with censoring). Therefore this approach doesn't work if, for example, the data for a HMM consist of a series of observations - from just one individual. + from just one individual, and is inaccurate for small numbers of + independent transitions or individuals. Confidence intervals or standard errors for the corresponding statistic can be calculated by summarising the returned list of @@ -47,11 +48,11 @@ parameters. Bootstrapping should give a more accurate estimate of the uncertainty. - An alternative method which is faster than bootstrapping but more - accurate than the delta method, is to draw a sample from the - asymptotic multivariate normal distribution implied by the - maximum likelihood estimates (and covariance matrix), and summarise - the transformed bootstrapping. See \code{\link{pmatrix.msm}} + An alternative method which is less accurate though faster than + bootstrapping, but more accurate than the delta method, is to draw a + sample from the asymptotic multivariate normal distribution implied by + the maximum likelihood estimates (and covariance matrix), and + summarise the transformed estimates. See \code{\link{pmatrix.msm}}. All objects used in the original call to \code{\link{msm}} which produced \code{x}, such as the \code{qmatrix}, should be in the @@ -91,14 +92,19 @@ ## Psoriatic arthritis example data(psor) psor.q <- rbind(c(0,0.1,0,0),c(0,0,0.1,0),c(0,0,0,0.1),c(0,0,0,0)) - psor.msm <- msm(state ~ months, subject=ptnum, data=psor, qmatrix = psor.q, covariates = ~ollwsdrt+hieffusn, constraint = list(hieffusn=c(1,1,1),ollwsdrt=c(1,1,2)), control = list(REPORT=1,trace=2), method="BFGS") + psor.msm <- msm(state ~ months, subject=ptnum, data=psor, qmatrix = + psor.q, covariates = ~ollwsdrt+hieffusn, + constraint = list(hieffusn=c(1,1,1),ollwsdrt=c(1,1,2)), + control = list(REPORT=1,trace=2), method="BFGS") ## Bootstrap the baseline transition intensity matrix. This will take a long time. q.list <- boot.msm(psor.msm, function(x)x$Qmatrices$baseline) ## Manipulate the resulting list of matrices to calculate bootstrap standard errors. apply(array(unlist(q.list), dim=c(4,4,5)), c(1,2), sd) ## Similarly calculate a bootstrap 95\% confidence interval - apply(array(unlist(q.list), dim=c(4,4,5)), c(1,2), function(x)quantile(x, c(0.025, 0.975))) - ## Bootstrap standard errors are larger than the asymptotic standard errors calculated from the Hessian + apply(array(unlist(q.list), dim=c(4,4,5)), c(1,2), + function(x)quantile(x, c(0.025, 0.975))) + ## Bootstrap standard errors are larger than the asymptotic standard + ## errors calculated from the Hessian psor.msm$QmatricesSE$baseline } } diff -Nru r-cran-msm-0.9.7/man/deltamethod.Rd r-cran-msm-1.0/man/deltamethod.Rd --- r-cran-msm-0.9.7/man/deltamethod.Rd 2009-08-26 13:56:07.000000000 +0000 +++ r-cran-msm-1.0/man/deltamethod.Rd 2010-05-25 17:20:04.000000000 +0000 @@ -51,7 +51,7 @@ If you can spare the computational time, bootstrapping is a more accurate method of calculating confidence intervals or standard errors - of transformations of parameters. See \code{\link{boot.msm}}. + for transformations of parameters. See \code{\link{boot.msm}}. } \references{Oehlert, G. W. \emph{A note on the delta method}. American Statistician 46(1), 1992} diff -Nru r-cran-msm-0.9.7/man/ematrix.msm.Rd r-cran-msm-1.0/man/ematrix.msm.Rd --- r-cran-msm-0.9.7/man/ematrix.msm.Rd 2009-06-05 16:59:18.000000000 +0000 +++ r-cran-msm-1.0/man/ematrix.msm.Rd 2010-11-19 18:28:33.000000000 +0000 @@ -7,7 +7,8 @@ model at a given set of covariate values. } \usage{ -ematrix.msm(x, covariates="mean", ci=c("delta","normal","bootstrap","none"), cl=0.95, B=1000) +ematrix.msm(x, covariates="mean", ci=c("delta","normal","bootstrap","none"), + cl=0.95, B=1000) } \arguments{ @@ -81,7 +82,7 @@ the multinomial-logit scale by \code{\link{msm}}. A covariance matrix is estimated from the Hessian of the maximised log-likelihood. From these, the delta method can be used to obtain standard errors of the - probabilities on the naturalscale at arbitrary covariate values. + probabilities on the natural scale at arbitrary covariate values. Confidence intervals are estimated by assuming normality on the multinomial-logit scale. } diff -Nru r-cran-msm-0.9.7/man/hmm-dists.Rd r-cran-msm-1.0/man/hmm-dists.Rd --- r-cran-msm-0.9.7/man/hmm-dists.Rd 2009-04-23 16:07:58.000000000 +0000 +++ r-cran-msm-1.0/man/hmm-dists.Rd 2010-11-19 18:28:33.000000000 +0000 @@ -136,7 +136,7 @@ See the PDF manual \file{msm-manual.pdf} in the \file{doc} subdirectory for algebraic definitions of all these distributions. New hidden Markov model response distributions can be added to - \pkg{msm} by following the instructions in Section 2.18.1. + \pkg{msm} by following the instructions in Section 2.17.1. Parameters which can be modelled in terms of covariates, on the scale of a link function, are as follows. diff -Nru r-cran-msm-0.9.7/man/msm.Rd r-cran-msm-1.0/man/msm.Rd --- r-cran-msm-0.9.7/man/msm.Rd 2010-05-18 08:31:44.000000000 +0000 +++ r-cran-msm-1.0/man/msm.Rd 2010-11-19 18:28:30.000000000 +0000 @@ -28,9 +28,10 @@ \code{state ~ time} - Observed states should be in the set \code{1, \dots, n}, - where \code{n} is the number of states. - } + Observed states should be in the set \code{1, \dots, n}, where + \code{n} is the number of states. The times can indicate different + types of observation scheme, so be careful to choose the correct + \code{obstype}. } \item{subject}{Vector of subject identification numbers for the data specified by \code{formula}. If missing, then all observations @@ -67,8 +68,8 @@ } \item{gen.inits}{If \code{TRUE}, then initial values for the - transition intensities are estimated by assuming that the data - represent the exact transition times of the process. The non-zero + transition intensities are generated automatically using the method + in \code{\link{crudeinits.msm}}. The non-zero entries of the supplied \code{qmatrix} are assumed to indicate the allowed transitions of the model.} @@ -133,8 +134,7 @@ A misclassification model, that is, a hidden Markov model where the outcomes are misclassified observations of the underlying states, can either be specified using a list of \code{\link{hmmCat}} - objects, or by using an \code{ematrix} as in previous versions of - \pkg{msm}. + objects, or by using an \code{ematrix}. For example, \cr @@ -164,7 +164,8 @@ \describe{ \item{1}{An observation of the process at an arbitrary time (a "snapshot" - of the process, or "panel-observed" data)} + of the process, or "panel-observed" data). The states are + unknown between observation times.} \item{2}{An exact transition time, with the state at the previous observation retained until the current observation.} \item{3}{An exact transition time, but the state at the instant before @@ -425,7 +426,7 @@ censoring. Censoring means that the observed state is known only to be one of a particular set of states. For example, \code{censor=999} indicates that all observations of \code{999} in the vector of observed - states denote censoring times. By default, this means that the true + states are censored states. By default, this means that the true state could have been anything other than an absorbing state. To specify corresponding true states explicitly, use a \code{censor.states} argument. @@ -443,9 +444,9 @@ facility. If the true state is unknown on occasions when a piecewise constant covariate is known to change, then censored states can be inserted in the data on those occasions. - The covariate may represent time itself, or some other - time-dependent variable. The \code{pci} option to msm can be used - to perform this trick automatically. + The covariate may represent time itself, in which case the \code{pci} option to msm can be used + to perform this trick automatically, or some other time-dependent + variable. } \item{censor.states}{ @@ -471,7 +472,8 @@ specifies that the intensity changes at time points 5 and 10. This will automatically construct a model with a categorical - (factor) covariate called \code{timeperiod}, with levels \code{"timeperiod[-Inf,5)"}, + (factor) covariate called \code{timeperiod}, with levels + \code{"timeperiod[-Inf,5)"}, \code{"timeperiod[5,10)"} and \code{"timeperiod[10,Inf)"}, where the first level is the baseline. This covariate defines the time period in @@ -497,7 +499,7 @@ \code{qmatrix.msm(example.msm, covariates=list(timeperiod="[5,Inf)"))} This facility does not support interactions between time and - other covariates. Such models need to be specified by hand, using + other covariates. Such models need to be specified "by hand", using a state variable with censored observations inserted. Note that the \code{data} component of the \code{msm} object returned from a call to \code{msm} with \code{pci} supplied contains the @@ -522,11 +524,12 @@ fixed at their initial values during the optimisation. These are given in the order: transition intensities (reading across rows of the transition matrix), covariates on intensities (ordered by - intensities within covariates), hidden Markov model parameters - (ordered by parameters within states), hidden Markov model covariate - parameters (ordered by covariates within parameters within states), - initial state occupancy probabilities (excluding the first - probability, which is fixed at one minus the sum of the others). + intensities within covariates), hidden Markov model parameters, + including misclassification probabilities (ordered by parameters + within states), hidden Markov model covariate parameters (ordered by + covariates within parameters within states), initial state occupancy + probabilities (excluding the first probability, which is fixed at one + minus the sum of the others). If there are equality constraints on certain parameters, then \code{fixedpars} indexes the set of unique parameters, excluding @@ -540,9 +543,10 @@ models stage by stage. To fix all parameters, specify \code{fixedpars = TRUE}. } - \item{center}{If \code{TRUE} (the default) then covariates are centered at - their means during the maximum likelihood estimation. This usually - improves stability of the numerical optimisation. } + \item{center}{If \code{TRUE} (the default, unless + \code{fixedpars=TRUE}) then covariates are centered at their means + during the maximum likelihood estimation. This usually improves + stability of the numerical optimisation. } \item{opt.method}{Quoted name of the R function to perform minimisation of the minus twice log likelihood. Either "optim" or @@ -583,13 +587,25 @@ } } \value{ - A list of class \code{msm}, with components: + To obtain summary information from models fitted by the + \code{\link{msm}} function, it is recommended to use extractor + functions such as \code{\link{qmatrix.msm}}, + \code{\link{pmatrix.msm}}, \code{\link{sojourn.msm}}. These provide + estimates and confidence intervals for quantities such as transition + probabilities for given covariate values. + + For advanced use, it may be necessary to directly use information + stored in the object returned by the \code{msm} function. This is a + list of class \code{msm}, with components: \item{call}{The original call to \code{msm}.} \item{Qmatrices}{A list of matrices. The first component, labelled \code{logbaseline}, is a matrix containing the estimated transition intensities on the log scale with any covariates fixed at - their means in the data. Each remaining component is a matrix giving the linear + their means in the data (or at zero, if \code{center=FALSE}). The + component labelled \code{baseline} is the equivalent on the + untransformed scale. Each + remaining component is a matrix giving the linear effects of the labelled covariate on the matrix of log intensities. To extract an estimated intensity matrix on the natural scale, at an arbitrary combination of covariate values, use the @@ -605,7 +621,10 @@ \item{Ematrices}{A list of matrices. The first component, labelled \code{logitbaseline}, is the estimated misclassification probability matrix (expressed as as log odds relative to the probability of the - true state) with any covariates fixed at their means in the data. Each + true state) with any covariates fixed at their means in the data (or + at zero, if \code{center=FALSE}). The + component labelled \code{baseline} is the equivalent on the + untransformed scale. Each remaining component is a matrix giving the linear effects of the labelled covariate on the matrix of logit misclassification probabilities. To extract an estimated misclassification @@ -620,7 +639,8 @@ A list with components: \code{mean} = estimated mean sojourn times in the transient states, - with covariates fixed at their means. + with covariates fixed at their means (if center=TRUE) or at zero + (if center=FALSE). \code{se} = corresponding standard errors. } @@ -672,21 +692,16 @@ \code{hmodel} component is also formatted and printed. This includes estimates and confidence intervals for each parameter. - - To extract summary information from the fitted model, it is - recommended to use the more flexible extractor functions, such as - \code{\link{qmatrix.msm}}, \code{\link{pmatrix.msm}}, - \code{\link{sojourn.msm}}, instead of directly reading from list - components of \code{msm} objects. - } \details{ For full details about the methodology behind the \pkg{msm} package, refer to the PDF manual \file{msm-manual.pdf} in the \file{doc} subdirectory of the package. This includes a tutorial in the typical - use of \pkg{msm}. + use of \pkg{msm}. The paper by Jackson (2011) in Journal of + Statistical Software presents the material in this manual in a more + concise form. - Note that \pkg{msm} is for fitting \emph{continuous-time} Markov + \pkg{msm} was designed for fitting \emph{continuous-time} Markov models, processes where transitions can occur at any time. These models are defined by \emph{intensities}, which govern both the time spent in the current state and the probabilities of the next @@ -694,9 +709,14 @@ advance to only occur at multiples of some time unit, and the model is purely governed by the probability distributions of the state at the next time point, conditionally on the state at the current time. These - are more appropriately fitted using multinomial logistic regression, - for example, using \code{multinom} from the - R package \pkg{nnet} (Venables and Ripley, 2002). + can also be fitted in \pkg{msm}, assuming that there is a + continuous-time process underlying the data. Then the fitted + transition probability matrix over one time period, as + returned by \code{pmatrix.msm(...,t=1)} is equivalent to the + matrix that governs the discrete-time model. However, these can be + fitted more efficiently using multinomial logistic regression, for + example, using \code{multinom} from the R package \pkg{nnet} (Venables + and Ripley, 2002). For simple continuous-time multi-state Markov models, the likelihood is calculated in terms of the transition intensity @@ -731,19 +751,15 @@ also be important. For flat likelihoods, 'informative' initial values will often be required. - Users upgrading from versions of \pkg{msm} less than 0.5 will need to - change some of their model fitting syntax. In particular, initial - values are now specified in the \code{qmatrix} and \code{covinits} - arguments instead of \code{inits}, and \code{qmatrix} is no longer a - matrix of \code{0/1} indicators. See the appendix to the PDF manual - or the NEWS file in the top-level installation directory for a full - list of changes. - } \references{ + Jackson, C. H. (2011). Multi-State Models for Panel Data: The msm Package + for R., Journal of Statistical Software, 38(8), 1-29. URL + http://www.jstatsoft.org/v38/i08/. + Kalbfleisch, J., Lawless, J.F., The analysis of panel data under a Markov assumption \emph{Journal of the Americal Statistical - Association} (1985) 80(392): 863--871. + Association} (1985) 80(392): 863--871. Kay, R. A Markov model for analysing cancer markers and disease states in survival studies. \emph{Biometrics} (1986) 42: 855--865. diff -Nru r-cran-msm-0.9.7/man/pearson.msm.Rd r-cran-msm-1.0/man/pearson.msm.Rd --- r-cran-msm-0.9.7/man/pearson.msm.Rd 2009-06-09 12:02:48.000000000 +0000 +++ r-cran-msm-1.0/man/pearson.msm.Rd 2010-11-19 18:28:30.000000000 +0000 @@ -68,7 +68,7 @@ \code{groups} at the first observation for each subject are ignored. } - \item{boot}{Estimate an exact p-value using a parametric bootstrap. + \item{boot}{Estimate an "exact" p-value using a parametric bootstrap. All objects used in the original call to \code{\link{msm}} which produced \code{x}, such as the \code{qmatrix}, should be in the @@ -233,7 +233,7 @@ # A random effects model might accommodate such fast progressors. } \author{ -Andrew Titman (\email{andrew.titman@mrc-bsu.cam.ac.uk}), -Chris Jackson (\email{chris.jackson@mrc-bsu.cam.ac.uk}) + Andrew Titman (\email{a.titman@lancaster.ac.uk}), + Chris Jackson (\email{chris.jackson@mrc-bsu.cam.ac.uk}) } \keyword{models} diff -Nru r-cran-msm-0.9.7/man/pexp.Rd r-cran-msm-1.0/man/pexp.Rd --- r-cran-msm-0.9.7/man/pexp.Rd 2008-02-19 09:25:13.000000000 +0000 +++ r-cran-msm-1.0/man/pexp.Rd 2010-11-19 18:28:32.000000000 +0000 @@ -65,10 +65,14 @@ x <- seq(0.1, 50, by=0.1) rate <- c(0.1, 0.2, 0.05, 0.3) t <- c(0, 10, 20, 30) -plot(x, dexp(x, 0.1), type="l") ## standard exponential distribution -lines(x, dpexp(x, rate, t), type="l", lty=2) ## distribution with piecewise constant rate -plot(x, pexp(x, 0.1), type="l") ## standard exponential distribution -lines(x, ppexp(x, rate, t), type="l", lty=2) ## distribution with piecewise constant rate +## standard exponential distribution +plot(x, dexp(x, 0.1), type="l") +## distribution with piecewise constant rate +lines(x, dpexp(x, rate, t), type="l", lty=2) +## standard exponential distribution +plot(x, pexp(x, 0.1), type="l") +## distribution with piecewise constant rate +lines(x, ppexp(x, rate, t), type="l", lty=2) } \author{C. H. Jackson \email{chris.jackson@mrc-bsu.cam.ac.uk}} \keyword{distribution} diff -Nru r-cran-msm-0.9.7/man/plot.prevalence.msm.Rd r-cran-msm-1.0/man/plot.prevalence.msm.Rd --- r-cran-msm-0.9.7/man/plot.prevalence.msm.Rd 2008-04-04 22:50:18.000000000 +0000 +++ r-cran-msm-1.0/man/plot.prevalence.msm.Rd 2010-11-24 15:54:23.000000000 +0000 @@ -13,7 +13,9 @@ initstates=NULL, interp=c("start","midpoint"), covariates="mean", misccovariates="mean", piecewise.times=NULL, piecewise.covariates=NULL, - xlab="Times",ylab="Prevalence (\%)", lwd=1, legend.pos=NULL, + xlab="Times",ylab="Prevalence (\%)", lwd.obs=1, + lwd.exp=1, lty.obs=1, lty.exp=2, col.obs="blue", + col.exp="red", legend.pos=NULL, ...) } \arguments{ @@ -46,7 +48,12 @@ to specify this.} \item{xlab}{x axis label.} \item{ylab}{y axis label.} - \item{lwd}{Line width. See \code{\link{par}}.} + \item{lwd.obs}{Line width for observed prevalences. See \code{\link{par}}.} + \item{lwd.exp}{Line width for expected prevalences. See \code{\link{par}}.} + \item{lty.obs}{Line type for observed prevalences. See \code{\link{par}}.} + \item{lty.exp}{Line type for expected prevalences. See \code{\link{par}}.} + \item{col.obs}{Line colour for observed prevalences. See \code{\link{par}}.} + \item{col.exp}{Line colour for expected prevalences. See \code{\link{par}}.} \item{legend.pos}{Vector of the \eqn{x} and \eqn{y} position, respectively, of the legend.} \item{...}{Further arguments to be passed to the generic \code{\link{plot}} diff -Nru r-cran-msm-0.9.7/man/plotprog.msm.Rd r-cran-msm-1.0/man/plotprog.msm.Rd --- r-cran-msm-0.9.7/man/plotprog.msm.Rd 2008-07-25 14:22:41.000000000 +0000 +++ r-cran-msm-1.0/man/plotprog.msm.Rd 2010-11-17 18:52:45.000000000 +0000 @@ -7,7 +7,8 @@ } \usage{ plotprog.msm(formula, subject, data, legend.pos=NULL, xlab="Time", - ylab="1 - incidence probability", lwd=1, xlim=NULL, ...) + ylab="1 - incidence probability", lwd=1, xlim=NULL, + mark.time=TRUE, ...) } \arguments{ \item{formula}{A formula giving the vectors containing the observed @@ -30,9 +31,11 @@ \item{ylab}{y axis label.} \item{lwd}{Line width. See \code{\link{par}}.} \item{xlim}{x axis limits, e.g. c(0,10) for an axis ranging from 0 to 10. Default is the range of observation times.} + \item{mark.time}{Mark the empirical survival curve at each censoring + point, see \code{\link{lines.survfit}}.} \item{...}{Other arguments to be passed to the - \code{\link[survival]{plot.survfit}} and \code{\link[survival]{lines.survfit}} functions.} + \code{\link{plot}} and \code{\link[survival]{lines.survfit}} functions.} } \details{ If the data represent observations of the process at arbitrary times, then diff -Nru r-cran-msm-0.9.7/man/plot.survfit.msm.Rd r-cran-msm-1.0/man/plot.survfit.msm.Rd --- r-cran-msm-0.9.7/man/plot.survfit.msm.Rd 2009-04-29 09:38:49.000000000 +0000 +++ r-cran-msm-1.0/man/plot.survfit.msm.Rd 2010-11-17 18:52:45.000000000 +0000 @@ -8,7 +8,10 @@ \usage{ plot.survfit.msm(x, from=1, to=NULL, range=NULL, covariates="mean", interp=c("start","midpoint"), ci=c("none","normal","bootstrap"), B=100, - legend.pos=NULL, xlab="Time", ylab="Survival probability", lwd=1, ...) + legend.pos=NULL, xlab="Time", ylab="Survival probability", + lty=1, lwd=1, col="red", lty.ci=2, lwd.ci=1, col.ci="red", + mark.time=TRUE, col.surv="blue", lty.surv=2, lwd.surv=1, + ...) } \arguments{ @@ -66,9 +69,20 @@ respectively, of the legend.} \item{xlab}{x axis label.} \item{ylab}{y axis label.} - \item{lwd}{Line width. See \code{\link{par}}.} + \item{lty}{Line type for the fitted curve. See \code{\link{par}}.} + \item{lwd}{Line width for the fitted curve. See \code{\link{par}}.} + \item{col}{Colour for the fitted curve. See \code{\link{par}}.} + \item{lty.ci}{Line type for the fitted curve confidence limits. See \code{\link{par}}.} + \item{lwd.ci}{Line width for the fitted curve confidence limits. See \code{\link{par}}.} + \item{col.ci}{Colour for the fitted curve confidence limits. See \code{\link{par}}.} + \item{mark.time}{Mark the empirical survival curve at each censoring + point, see \code{\link[survival]{lines.survfit}}.} + \item{col.surv}{Colour for the empirical survival curve, passed to \code{\link[survival]{lines.survfit}}. See \code{\link{par}}.} + \item{lty.surv}{Line type for the empirical survival curve, passed to \code{\link[survival]{lines.survfit}}. See \code{\link{par}}.} + \item{lwd.surv}{Line width for the empirical survival curve, passed to \code{\link[survival]{lines.survfit}}. See \code{\link{par}}.} \item{...}{Other arguments to be passed to the - \code{\link{plot.survfit}} and \code{\link{lines.survfit}} functions.} + \code{\link{plot}} function which draws the fitted curve, or the \code{\link[survival]{lines.survfit}} + function which draws the empirical curve. } } \details{ If the data represent observations of the process at arbitrary times, then @@ -79,5 +93,5 @@ This currently only handles time-homogeneous models. } -\seealso{\code{\link{survfit}}, \code{\link{plot.survfit}}, \code{\link{plot.prevalence.msm}}} +\seealso{\code{\link[survival]{survfit}}, \code{\link[survival]{plot.survfit}}, \code{\link{plot.prevalence.msm}}} \keyword{models} diff -Nru r-cran-msm-0.9.7/man/pmatrix.piecewise.msm.Rd r-cran-msm-1.0/man/pmatrix.piecewise.msm.Rd --- r-cran-msm-0.9.7/man/pmatrix.piecewise.msm.Rd 2010-01-14 17:45:32.000000000 +0000 +++ r-cran-msm-1.0/man/pmatrix.piecewise.msm.Rd 2010-11-19 18:28:32.000000000 +0000 @@ -1,6 +1,7 @@ \name{pmatrix.piecewise.msm} \alias{pmatrix.piecewise.msm} -\title{Transition probability matrix for processes with piecewise-constant intensities} +\title{Transition probability matrix for processes with + piecewise-constant intensities} \description{ Extract the estimated transition probability matrix from a fitted non-time-homogeneous multi-state model for a given time interval. diff -Nru r-cran-msm-0.9.7/man/scoreresid.msm.Rd r-cran-msm-1.0/man/scoreresid.msm.Rd --- r-cran-msm-0.9.7/man/scoreresid.msm.Rd 2008-03-18 19:02:03.000000000 +0000 +++ r-cran-msm-1.0/man/scoreresid.msm.Rd 2010-11-19 18:28:33.000000000 +0000 @@ -32,6 +32,6 @@ will have higher score residuals. } -\author{Andrew Titman \email{andrew.titman@mrc-bsu.cam.ac.uk} (theory), +\author{Andrew Titman \email{a.titman@lancaster.ac.uk} (theory), Chris Jackson \email{chris.jackson@mrc-bsu.cam.ac.uk} (code)} \keyword{models} \ No newline at end of file diff -Nru r-cran-msm-0.9.7/man/sojourn.msm.Rd r-cran-msm-1.0/man/sojourn.msm.Rd --- r-cran-msm-0.9.7/man/sojourn.msm.Rd 2007-11-08 17:13:13.000000000 +0000 +++ r-cran-msm-1.0/man/sojourn.msm.Rd 2010-11-19 18:28:33.000000000 +0000 @@ -6,7 +6,8 @@ multi-state model and their confidence limits. } \usage{ -sojourn.msm(x, covariates="mean", ci=c("delta","normal","bootstrap","none"), cl=0.95, B=1000) +sojourn.msm(x, covariates="mean", ci=c("delta","normal","bootstrap","none"), + cl=0.95, B=1000) } \arguments{ diff -Nru r-cran-msm-0.9.7/man/surface.msm.Rd r-cran-msm-1.0/man/surface.msm.Rd --- r-cran-msm-0.9.7/man/surface.msm.Rd 2009-03-16 14:48:38.000000000 +0000 +++ r-cran-msm-1.0/man/surface.msm.Rd 2010-11-19 18:28:34.000000000 +0000 @@ -23,11 +23,11 @@ surface. If 100 likelihood function evaluations is slow, then reduce this. } \item{type}{Character string specifying the type of plot to produce. \tabular{ll}{ - "contour" \tab Contour plot, using the R function + \code{"contour"} \tab Contour plot, using the R function \code{\link{contour}}. \cr - "filled.contour" \tab Solid-color contour plot, using the R function \code{\link{filled.contour}}. \cr - "persp" \tab Perspective plot, using the R function \code{\link{persp}}. \cr - "image" \tab Grid color plot, using the R function \code{\link{image}}. \cr + \code{"filled.contour"} \tab Solid-color contour plot, using the R function \code{\link{filled.contour}}. \cr + \code{"persp"} \tab Perspective plot, using the R function \code{\link{persp}}. \cr + \code{"image"} \tab Grid color plot, using the R function \code{\link{image}}. \cr } } \item{point}{Vector of length \code{n}, where \code{n} is the number diff -Nru r-cran-msm-0.9.7/man/tnorm.Rd r-cran-msm-1.0/man/tnorm.Rd --- r-cran-msm-0.9.7/man/tnorm.Rd 2010-04-01 16:34:59.000000000 +0000 +++ r-cran-msm-1.0/man/tnorm.Rd 2010-11-19 18:28:34.000000000 +0000 @@ -13,8 +13,10 @@ } \usage{ dtnorm(x, mean=0, sd=1, lower=-Inf, upper=Inf, log = FALSE) - ptnorm(q, mean=0, sd=1, lower=-Inf, upper=Inf, lower.tail = TRUE, log.p = FALSE) - qtnorm(p, mean=0, sd=1, lower=-Inf, upper=Inf, lower.tail = TRUE, log.p = FALSE) + ptnorm(q, mean=0, sd=1, lower=-Inf, upper=Inf, + lower.tail = TRUE, log.p = FALSE) + qtnorm(p, mean=0, sd=1, lower=-Inf, upper=Inf, + lower.tail = TRUE, log.p = FALSE) rtnorm(n, mean=0, sd=1, lower=-Inf, upper=Inf) } \arguments{ diff -Nru r-cran-msm-0.9.7/man/viterbi.msm.Rd r-cran-msm-1.0/man/viterbi.msm.Rd --- r-cran-msm-0.9.7/man/viterbi.msm.Rd 2008-03-04 18:40:17.000000000 +0000 +++ r-cran-msm-1.0/man/viterbi.msm.Rd 2010-05-25 17:20:04.000000000 +0000 @@ -12,7 +12,8 @@ viterbi.msm(x) } \arguments{ - \item{x}{A fitted hidden Markov multi-state model, as produced by \code{\link{msm}}} + \item{x}{A fitted hidden Markov multi-state model, or a model with + censored state observations, as produced by \code{\link{msm}}} } \value{ A data frame with columns: diff -Nru r-cran-msm-0.9.7/R/boot.R r-cran-msm-1.0/R/boot.R --- r-cran-msm-0.9.7/R/boot.R 2010-05-07 10:50:41.000000000 +0000 +++ r-cran-msm-1.0/R/boot.R 2010-11-17 17:38:01.000000000 +0000 @@ -21,8 +21,9 @@ for (j in dat$covlabels.orig) { frominds <- seq(1, 2*length(inds)-1, 2) data.boot[frominds, j] <- data.boot[frominds+1,j] <- dat$cov.orig[inds, j] - if (is.factor(dat$cov.orig[, j])) - data.boot[, j] <- factor(data.boot[,j], labels=levels(dat$cov.orig[, j])) + if (is.factor(dat$cov.orig[, j])) { + data.boot[, j] <- factor(data.boot[,j], labels=sort(unique(dat$cov.orig[inds, j]))) + } } colnames(data.boot) <- gsub("factor\\((.+)\\)", "\\1", colnames(data.boot)) data.boot @@ -55,7 +56,7 @@ for (j in dat$covlabels.orig) { data.boot[, j] <- dat$cov.orig[inds, j] if (is.factor(dat$cov.orig[, j])) - data.boot[, j] <- factor(data.boot[,j], labels=levels(dat$cov.orig[, j])) + data.boot[, j] <- factor(data.boot[,j], labels=sort(unique(dat$cov.orig[, j]))) } colnames(data.boot) <- gsub("factor\\((.+)\\)", "\\1", colnames(data.boot)) data.boot diff -Nru r-cran-msm-0.9.7/R/msm.R r-cran-msm-1.0/R/msm.R --- r-cran-msm-0.9.7/R/msm.R 2010-05-07 14:29:11.000000000 +0000 +++ r-cran-msm-1.0/R/msm.R 2010-11-16 19:24:02.000000000 +0000 @@ -33,7 +33,7 @@ pci = NULL, cl = 0.95, # width of confidence intervals fixedpars = NULL, # specify which parameters to fix. TRUE for all parameters - center = TRUE, # center covariates at their means + center = TRUE, # center covariates at their means during optimisation opt.method = c("optim","nlm"), hessian = TRUE, use.deriv = TRUE, @@ -46,7 +46,7 @@ obstype <- if (missing(obstype)) NULL else eval(substitute(obstype), data, parent.frame()) obstrue <- if (missing(obstrue)) NULL else eval(substitute(obstrue), data, parent.frame()) if (missing(data)) data <- environment(formula) - + ### MODEL FOR TRANSITION INTENSITIES qmodel <- msm.form.qmodel(qmatrix, qconstraint, exacttimes, gen.inits, formula, subject, data, censor, censor.states, analyticp) @@ -79,7 +79,7 @@ ### CENSORING MODEL cmodel <- msm.form.cmodel(censor, censor.states, qmodel$qmatrix) - + msmdata.obs <- msm.form.data(formula, subject, obstype, obstrue, covariates, data, hcovariates, misccovariates, initcovariates, qmodel, emodel, hmodel, cmodel, dmodel, exacttimes, center) @@ -210,7 +210,11 @@ p$params[p$hmmpars] <- msm.mninvlogit.transform(p$params[p$hmmpars], hmodel$plabs, hmodel$parstate) p$params.uniq <- p$params[!duplicated(abs(p$constr))] p$params <- p$params[!duplicated(abs(p$constr))][abs(p$constr)]*sign(p$constr) - if (hessian && all(eigen(opt$hessian)$values > 0)) { + + if (hessian && + all(!is.na(opt$hessian)) && all(!is.nan(opt$hessian)) && all(is.finite(opt$hessian)) && + all(eigen(opt$hessian)$values > 0)) + { p$foundse <- TRUE p$covmat <- matrix(0, nrow=p$npars, ncol=p$npars) p$covmat[p$optpars,p$optpars] <- solve(0.5 * opt$hessian) @@ -300,7 +304,7 @@ ) attr(msmobject, "fixed") <- p$fixed class(msmobject) <- "msm" - q <- qmatrix.msm(msmobject) # intensity matrix with centered covariates + q <- qmatrix.msm(msmobject, covariates=(if(center) "mean" else 0)) # intensity matrix on natural scale msmobject$Qmatrices$baseline <- q$estimates msmobject$QmatricesSE$baseline <- q$SE msmobject$QmatricesL$baseline <- q$L @@ -310,14 +314,14 @@ msmobject$EmatricesSE <- EmatricesSE msmobject$EmatricesL <- EmatricesL msmobject$EmatricesU <- EmatricesU - e <- ematrix.msm(msmobject) # misc matrix with centered covariates + e <- ematrix.msm(msmobject, covariates=(if(center) "mean" else 0)) # misc matrix on natural scale msmobject$Ematrices$baseline <- e$estimates msmobject$EmatricesSE$baseline <- e$SE msmobject$EmatricesL$baseline <- e$L msmobject$EmatricesU$baseline <- e$U } - ## Calculate mean sojourn times with centered covariates - msmobject$sojourn <- sojourn.msm(msmobject) + ## Calculate mean sojourn times at baseline + msmobject$sojourn <- sojourn.msm(msmobject, covariates=(if(center) "mean" else 0)) msmobject } @@ -349,7 +353,6 @@ msm.form.qmodel <- function(qmatrix, qconstraint=NULL, exacttimes=FALSE, gen.inits=FALSE, formula, subject, data, censor, censor.states, analyticp) { -### INTENSITY MATRIX (INPUT: qmatrix, qconstraint; OUTPUT: nstates, nintens, qmatrix, qvector, baseconstr, nintenseffs) if (gen.inits) qmatrix <- crudeinits.msm(formula, subject, qmatrix, data, censor, censor.states) msm.check.qmatrix(qmatrix) @@ -521,8 +524,8 @@ final.rows <- intersect(final.rows, icovdata$covrows.kept) subject <- subject[final.rows] time <- subset(time, statetimerows.kept %in% final.rows) - msm.check.times(time, subject) state <- subset(state, statetimerows.kept %in% final.rows) + msm.check.times(time, subject, state) obstype <- subset(obstype, statetimerows.kept %in% final.rows) obstrue <- subset(obstrue, statetimerows.kept %in% final.rows) covmat <- covmat.orig <- numeric() @@ -584,7 +587,7 @@ invisible() } -msm.check.times <- function(time, subject) +msm.check.times <- function(time, subject, state=NULL) { ### Check if any individuals have only one observation (after excluding missing data) subj.num <- match(subject,unique(subject)) # avoid problems with factor subjects with empty levels @@ -615,6 +618,16 @@ plural <- if (length(badsubjs)==1) "" else "s" stop ("Observations within subject", plural, " ", badlist, " are not ordered by time") } +### Check if any consecutive observations are made at the same time, but with different states + if (!is.null(state)){ + prevsubj <- c(-Inf, subj.num[1:length(subj.num)-1]) + prevtime <- c(-Inf, time[1:length(time)-1]) + prevstate <- c(-Inf, state[1:length(state)-1]) + sametime <- (subj.num==prevsubj & prevtime==time & prevstate!=state) + badlist <- paste(paste(which(sametime)-1, which(sametime), sep=" and "), collapse=", ") + if (any(sametime)) + warning("Different states observed at the same time on the same subject at observations ", badlist) + } invisible() } @@ -691,6 +704,7 @@ msm.check.model <- function(fromstate, tostate, obs, subject, obstype=NULL, qmatrix, cmodel) { n <- length(fromstate) + qmatrix <- qmatrix / mean(qmatrix[qmatrix>0]) # rescale to avoid false warnings with small rates Pmat <- MatrixExp(qmatrix) Pmat[Pmat < 1e-16] <- 0 imputed <- msm.impute.censored(fromstate, tostate, Pmat, cmodel) @@ -700,14 +714,14 @@ if (identical(all.equal(min(unitprob, na.rm=TRUE), 0), TRUE)) { - badobs <- min (obs[unitprob==0], na.rm = TRUE) - warning ("Data inconsistent with transition matrix for model without misclassification:\n", + badobs <- which.min(unitprob) + warning ("Data may be inconsistent with transition matrix for model without misclassification:\n", "individual ", if(is.null(subject)) "" else subject[obs==badobs], " moves from state ", fromstate[obs==badobs], " to state ", tostate[obs==badobs], " at observation ", badobs, "\n") } if (any(qunit[obstype==2]==0)) { badobs <- min (obs[qunit==0 & obstype==2], na.rm = TRUE) - warning ("Data inconsistent with intensity matrix for observations with exact transition times and no misclassification:\n", + warning ("Data may be inconsistent with intensity matrix for observations with exact transition times and no misclassification:\n", "individual ", if(is.null(subject)) "" else subject[obs==badobs], " moves from state ", fromstate[obs==badobs], " to state ", tostate[obs==badobs], " at observation ", badobs) } diff -Nru r-cran-msm-0.9.7/R/outputs.R r-cran-msm-1.0/R/outputs.R --- r-cran-msm-0.9.7/R/outputs.R 2010-05-07 10:50:41.000000000 +0000 +++ r-cran-msm-1.0/R/outputs.R 2010-11-24 16:01:18.000000000 +0000 @@ -6,7 +6,9 @@ if (!attr(x,"fixed")) { cat ("Maximum likelihood estimates: \n") - covmessage <- if (x$qcmodel$ncovs == 0) "" else "with covariates set to their means" + covmessage <- + if (x$qcmodel$ncovs == 0) "" + else paste("with covariates set to ", (if (x$center) "their means" else "0")) for (i in c("baseline", x$qcmodel$covlabels)) { title <- if (i == "baseline") paste("Transition intensity matrix",covmessage,"\n") @@ -136,7 +138,8 @@ ### Plot KM estimate of time to first occurrence of each state -plotprog.msm <- function(formula, subject, data, legend.pos=NULL, xlab="Time", ylab="1 - incidence probability", lwd=1, xlim=NULL, ...) { +plotprog.msm <- function(formula, subject, data, legend.pos=NULL, xlab="Time", ylab="1 - incidence probability", lwd=1, xlim=NULL, + mark.time=TRUE, ...) { data <- na.omit(data) mf <- model.frame(formula, data=data) state <- mf[,1] @@ -146,7 +149,7 @@ subject <- match(subject, unique(subject)) rg <- range(time) if (is.null(xlim)) xlim=rg - plot(0, xlim=xlim, ylim=c(0,1), type="n", xlab=xlab, ylab=ylab, lwd=lwd, ...) + plot(0, xlim=xlim, ylim=c(0,1), type="n", xlab=xlab, ylab=ylab, ...) states <- sort(unique(state))[-1] cols <- rainbow(length(states)) for (i in states) { @@ -158,7 +161,8 @@ mintime = if(any(x[,"state"]>=i)) min(x[x[,"state"] >= i, "time"]) else max(x[,"time"])) } ))) # slow - lines(survfit(Surv(st$mintime,st$anystate) ~ 1),col=cols[i-1],lty=i-1, lwd=lwd,...) + lines(survfit(Surv(st$mintime,st$anystate) ~ 1), + col=cols[i-1], lty=i-1, lwd=lwd, mark.time=mark.time, ...) } timediff <- (rg[2] - rg[1]) / 50 if (!is.numeric(legend.pos) || length(legend.pos) != 2) @@ -768,9 +772,9 @@ { if (!is.numeric(t) || (t < 0)) stop("t must be a positive number") if (is.null(x$pci)) { - q <- qmatrix.msm(x, covariates) - p <- MatrixExp(q$estimates, t, ...) - colnames(p) <- rownames(p) <- rownames(q$estimates) + q <- qmatrix.msm(x, covariates, ci="none") + p <- MatrixExp(q, t, ...) + colnames(p) <- rownames(p) <- rownames(q) ci <- match.arg(ci) p.ci <- switch(ci, bootstrap = pmatrix.ci.msm(x=x, t=t, t1=t1, covariates=covariates, cl=cl, B=B), @@ -1241,7 +1245,8 @@ plot.prevalence.msm <- function(x, mintime=NULL, maxtime=NULL, timezero=NULL, initstates=NULL, interp=c("start","midpoint"), covariates="mean", misccovariates="mean", piecewise.times=NULL, piecewise.covariates=NULL, xlab="Times",ylab="Prevalence (%)", - lwd=1, legend.pos=NULL,...){ + lwd.obs=1, lwd.exp=1, lty.obs=1, lty.exp=2, + col.obs="blue", col.exp="red", legend.pos=NULL,...){ time <- x$data$time if (is.null(mintime)) mintime <- min(time) if (is.null(maxtime)) maxtime <- max(time) @@ -1255,20 +1260,23 @@ nrows <- if (floor(sqrt(S))^2 < S && S <= floor(sqrt(S))*ceiling(sqrt(S))) floor(sqrt(S)) else ceiling(sqrt(S)) par(mfrow=c(nrows, ncols)) for (i in states) { - plot(t, obs$obsperc[,i], type="l", lty=1, ylim=c(0, 100), xlab=xlab, ylab=ylab, lwd=lwd, + plot(t, obs$obsperc[,i], type="l", ylim=c(0, 100), xlab=xlab, ylab=ylab, lwd=lwd.obs, lty=lty.obs, col=col.obs, main=rownames(x$qmodel$qmatrix)[i],...) - lines(t, expec[,i], lty=2, lwd=lwd) + lines(t, expec[,i], lwd=lwd.exp, lty=lty.exp, col=col.exp) } if (!is.numeric(legend.pos) || length(legend.pos) != 2) legend.pos <- c(0.4*maxtime, 40) - legend(x=legend.pos[1], y=legend.pos[2], legend=c("Observed","Expected"), lty=1:2, lwd=lwd) + legend(x=legend.pos[1], y=legend.pos[2], legend=c("Observed","Expected"), lty=c(lty.obs,lty.exp), lwd=c(lwd.obs,lwd.exp), col=c(col.obs,col.exp)) invisible() } ### Empirical versus fitted survival curve plot.survfit.msm <- function(x, from=1, to=NULL, range=NULL, covariates="mean", interp=c("start","midpoint"), ci=c("none","normal","bootstrap"), B=100, - legend.pos=NULL, xlab="Time", ylab="Survival probability", lwd=1, ...) { + legend.pos=NULL, xlab="Time", ylab="Survival probability", + lty=1, lwd=1, col="red", lty.ci=2, lwd.ci=1, col.ci="red", + mark.time=TRUE, col.surv="blue", lty.surv=2, lwd.surv=1, + ...) { if (!inherits(x, "msm")) stop("expected x to be a msm model") if (is.null(to)) to <- max(absorbing.msm(x)) @@ -1298,10 +1306,10 @@ else pr <- c(pr, P[from, to]) } plot(times, 1 - pr, type="l", xlab=xlab, ylab=ylab, lwd=lwd, - ylim=c(0,1), lty = 1, col="red",...) + ylim=c(0,1), lty = lty, col=col,...) if (ci != "none") { - lines(times, 1 - lower, lty=3, col="red", lwd=lwd) - lines(times, 1 - upper, lty=3, col="red", lwd=lwd) + lines(times, 1 - lower, lty=lty.ci, col=col.ci, lwd=lwd.ci) + lines(times, 1 - upper, lty=lty.ci, col=col.ci, lwd=lwd.ci) } dat <- as.data.frame(x$data[c("subject", "time", "state")]) st <- as.data.frame( @@ -1314,14 +1322,14 @@ c(anystate = as.numeric(any(x[,"state"]==to)), mintime = mintime) } ))) # slow - lines(survfit(Surv(st$mintime,st$anystate) ~ 1),col="blue",lty=2, lwd=lwd,...) + lines(survfit(Surv(st$mintime,st$anystate) ~ 1), mark.time=mark.time, col=col.surv, lty=lty.surv, lwd=lwd.surv,...) timediff <- (rg[2] - rg[1]) / 50 if (!is.numeric(legend.pos) || length(legend.pos) != 2) legend.pos <- c(max(x$data$time) - 25*timediff, 1) if (ci=="none") - legend(legend.pos[1], legend.pos[2], lty=c(1,2), lwd=lwd, col=c("red","blue"), + legend(legend.pos[1], legend.pos[2], lty=c(lty, lty.surv), lwd=c(lwd, lwd.surv), col=c(col, col.surv), legend=c("Fitted","Empirical")) - else legend(legend.pos[1], legend.pos[2], lty=c(1,3,2), lwd=lwd, col=c("red","red","blue"), + else legend(legend.pos[1], legend.pos[2], lty=c(lty, lty.ci, lty.surv), lwd=c(lwd,lwd.ci, lwd.surv), col=c(col ,col.ci, col.surv), legend=c("Fitted","Fitted (confidence interval)", "Empirical")) invisible() diff -Nru r-cran-msm-0.9.7/R/utils.R r-cran-msm-1.0/R/utils.R --- r-cran-msm-0.9.7/R/utils.R 2008-09-11 17:33:25.000000000 +0000 +++ r-cran-msm-1.0/R/utils.R 2010-10-27 16:01:30.000000000 +0000 @@ -175,7 +175,6 @@ (-upper + sqrt(upper^2 + 4)) * exp((upper*2 - -upper*sqrt(upper^2 + 4)) / 4)), 2, # rejection sampling with exponential proposal. Use if upper << mean. 3)))) # rejection sampling with uniform proposal. Use if bounds are narrow and central. - ind.nan <- ind[alg==-1]; ind.no <- ind[alg==0]; ind.expl <- ind[alg==1]; ind.expu <- ind[alg==2]; ind.u <- ind[alg==3] ret[ind.nan] <- NaN diff -Nru r-cran-msm-0.9.7/src/lik.c r-cran-msm-1.0/src/lik.c --- r-cran-msm-0.9.7/src/lik.c 2010-01-29 14:58:14.000000000 +0000 +++ r-cran-msm-1.0/src/lik.c 2010-11-24 16:38:55.000000000 +0000 @@ -12,6 +12,7 @@ #include "hmm.h" #include #define NODEBUG +#define NODEBUG2 #define NOVITDEBUG linkfn LINKFNS[3][2] = { @@ -207,7 +208,9 @@ LINKFNS[hm->links[i]][0], LINKFNS[hm->links[i]][1]); } GetOutcomeProb(pout, curr, nc, newpars, hm, qm, d->obstrue[obsno]); -/* printf("pout: %4.3f, %4.3f, %4.3f, %4.3f\n", pout[0], pout[1], pout[2], pout[3]); */ +#ifdef DEBUG2 + printf("pout: %4.3f, %4.3f, %4.3f, %4.3f\n", pout[0], pout[1], pout[2], pout[3]); +#endif /* calculate the transition probability (P) matrix for the time interval dt */ Pmat(pmat, d->time[obsno] - d->time[obsno-1], newintens, qm->npars, qm->ivector, qm->nst, (d->obstype[obsno] == OBS_EXACT), qm->analyticp, qm->iso, qm->perm, qm->qperm, 0); @@ -231,10 +234,15 @@ } if (T[MI(i,j,qm->nst)] < 0) T[MI(i,j,qm->nst)] = 0; newp[j] = newp[j] + cump[i]*T[MI(i,j,qm->nst)]; -/* printf("%4.3f, ", pmat[MI(i,j,qm->nst)]); */ +#ifdef DEBUG2 + printf("%4.3f, ", pmat[MI(i,j,qm->nst)]); +#endif } -/* printf("\n"); */ - +#ifdef DEBUG2 + printf("\n"); + printf("newp: "); + printf("%lf, ", newp[j]); +#endif } /* re-scale the likelihood at each step to prevent it getting too small and underflowing */ /* while cumulatively recording the log scale factor */ @@ -288,8 +296,18 @@ GetCensored((double)d->obs[obsno], cm, &nc, &curr); update_likhidden(curr, nc, obsno, d, qm, qcm, hm, cump, newp, &lweight); } - for (i = 0, lik = 0; i < qm->nst; ++i) +#ifdef DEBUG2 + printf("cump: "); +#endif + for (i = 0, lik = 0; i < qm->nst; ++i) { +#ifdef DEBUG2 + printf("%lf, ", cump[i]); +#endif lik = lik + cump[i]; + } +#ifdef DEBUG2 + printf("\n"); +#endif Free(curr); Free(cump); Free(newp); Free(pout); Free(newpars); Free(newinitp); /* Transform the likelihood back to the proper scale */ return -2*(log(lik) - lweight); @@ -488,6 +506,13 @@ GetOutcomeProb(pout, curr, nc, newpars, hm, qm, d->obstrue[i]); #ifdef VITDEBUG for (tru=0;trunpars; ++k) printf("intens[%d] = %f", k, qm->intens[k]); printf("\n"); + for (k=0; kncovs[0]; ++k) printf("cov[%d] = %f", k, d->cov[MI(i-1,k,d->nobs)]); printf("\n"); + for (k=0; knpars; ++k) printf("newintens[%d] = %f", k, newintens[k]); printf("\n"); + for (j = 0; j < qm->nst; ++j) { + for (k=0; knpars[j]; ++k) printf("hpar[%d] = %f", k, hm->pars[k]); + printf("\n"); + } #endif Pmat(pmat, dt, newintens, qm->npars, qm->ivector, qm->nst, (d->obstype[i] == OBS_EXACT), qm->analyticp, qm->iso, qm->perm, qm->qperm, 0); diff -Nru r-cran-msm-0.9.7/tests/boot.R r-cran-msm-1.0/tests/boot.R --- r-cran-msm-0.9.7/tests/boot.R 2009-11-06 18:02:49.000000000 +0000 +++ r-cran-msm-1.0/tests/boot.R 2010-11-12 18:24:08.000000000 +0000 @@ -40,23 +40,23 @@ ## multicore test - boot.msm <- function(x, stat=pmatrix.msm, B=1000, file=NULL, cores=1){ - boot.list <- vector(B, mode="list") - if (!is.null(x$call$subject)) x$call$subject <- substitute(subject.name) - if (!is.null(x$call$obstype)) x$call$obstype <- substitute(obstype.name) - if (!is.null(x$call$obstrue)) x$call$obstrue <- substitute(obstrue.name) - boot.fn <- function(dummy){ - boot.data <- if (x$hmodel$hidden || x$cmodel$ncens) bootdata.subject.msm(x) else bootdata.trans.msm(x) - x$call$data <- substitute(boot.data) - res <- try(eval(x$call)) - if (!is.null(stat)) - res <- stat(res) - res - } - if (require(multicore) && cores>1) - boot.list <- mclapply(1:B, boot.fn, mc.cores=cores) - else boot.list <- lapply(1:B, boot.fn) - } +## boot.msm <- function(x, stat=pmatrix.msm, B=1000, file=NULL, cores=1){ +## boot.list <- vector(B, mode="list") +## if (!is.null(x$call$subject)) x$call$subject <- substitute(subject.name) +## if (!is.null(x$call$obstype)) x$call$obstype <- substitute(obstype.name) +## if (!is.null(x$call$obstrue)) x$call$obstrue <- substitute(obstrue.name) +## boot.fn <- function(dummy){ +## boot.data <- if (x$hmodel$hidden || x$cmodel$ncens) bootdata.subject.msm(x) else bootdata.trans.msm(x) +## x$call$data <- substitute(boot.data) +## res <- try(eval(x$call)) +## if (!is.null(stat)) +## res <- stat(res) +## res +## } +## if (require(multicore) && cores>1) +## boot.list <- mclapply(1:B, boot.fn, mc.cores=cores) +## else boot.list <- lapply(1:B, boot.fn) +## } psor.msm <- msm(state ~ months, subject=ptnum, data=psor, qmatrix = psor.q, covariates = ~ollwsdrt+hieffusn, constraint = diff -Nru r-cran-msm-0.9.7/tests/miscnew.R r-cran-msm-1.0/tests/miscnew.R --- r-cran-msm-0.9.7/tests/miscnew.R 2008-03-17 18:48:33.000000000 +0000 +++ r-cran-msm-1.0/tests/miscnew.R 2010-11-12 18:24:08.000000000 +0000 @@ -34,8 +34,14 @@ ## Covs on misc probs, new way, with hmodel +## HUH FIXME Why does centering change the likelihood? +## Different intial values! Go through and fix all these. +## Or is it sensible to have different initial liks for same pars if fixedpars=TRUE/FALSE? +## any need for this change? just allows the subset trick in viterbi +## could just document that covariates will be changed, unless center=FALSE. + misccovnew.msm <- msm(state ~ years, subject = PTNUM, data = cav, - qmatrix = oneway4.q, death = 4, fixedpars=TRUE, + qmatrix = oneway4.q, death = 4, fixedpars=TRUE, center=TRUE, hmodel=list( hmmCat(prob=c(0.9, 0.1, 0, 0)), hmmCat(prob=c(0.1, 0.8, 0.1, 0)), diff -Nru r-cran-msm-0.9.7/tests/misc.R r-cran-msm-1.0/tests/misc.R --- r-cran-msm-0.9.7/tests/misc.R 2008-07-25 14:22:41.000000000 +0000 +++ r-cran-msm-1.0/tests/misc.R 2010-11-09 18:40:44.000000000 +0000 @@ -101,6 +101,18 @@ vit <- viterbi.msm(misc.msm)[viterbi.msm(misc.msm)$subject==100063,] stopifnot(isTRUE(all.equal(c(1, 1, 1, 1, 2, 2, 2, 2, 2, 2), vit$fitted, tol=1e-06))) +### TEST VITERBI ON SUBSET WITH FIXEDPARS BUG REPORT + + pt <- 100046 + for (pt in unique(cav$PTNUM)){ + subs <- cav[cav$PTNUM==pt,] + x <- viterbi.msm(misc.msm); x <- x[x$subject==pt,] + miscfix.msm <- msm(state ~ years, subject = PTNUM, data = subs, qmatrix = qmatrix.msm(misc.msm)$estimates, ematrix=ematrix.msm(misc.msm)$estimates, death = 4, fixedpars=TRUE) + y <- viterbi.msm(miscfix.msm) + print(pt) + stopifnot(all(x$fitted==y$fitted)) + } + odds <- odds.msm(misccov.msm) # stopifnot(isTRUE(all.equal(0.924920277547759, odds$dage[1,2], tol=1e-06))) # stopifnot(isTRUE(all.equal(0.9350691888108385, odds$dage[2,2], tol=1e-06))) diff -Nru r-cran-msm-0.9.7/tests/simple.R r-cran-msm-1.0/tests/simple.R --- r-cran-msm-0.9.7/tests/simple.R 2010-01-14 18:12:12.000000000 +0000 +++ r-cran-msm-1.0/tests/simple.R 2010-11-24 16:01:18.000000000 +0000 @@ -110,6 +110,19 @@ stopifnot(isTRUE(all.equal(1114.89946121717, psor.msm$minus2loglik, tol=1e-06))) stopifnot(isTRUE(all.equal(0.0953882330391683, qmatrix.msm(psor.msm)$estimates[1,2], tol=1e-03))) +## center=FALSE +psor.nocen.msm <- msm(state ~ months, subject=ptnum, data=psor, + qmatrix = psor.q, covariates = ~ollwsdrt+hieffusn, + constraint = list(hieffusn=c(1,1,1),ollwsdrt=c(1,1,2)), + fixedpars=FALSE, center=FALSE, control = list(REPORT=1,trace=2), method="BFGS") +## $baseline and component should obey center +stopifnot(isTRUE(all.equal(psor.nocen.msm$Qmatrices$baseline, qmatrix.msm(psor.nocen.msm, covariates=0, ci="none")))) +stopifnot(isTRUE(all.equal(psor.msm$Qmatrices$baseline, qmatrix.msm(psor.msm, covariates="mean", ci="none")))) +stopifnot(isTRUE(all.equal(psor.nocen.msm$sojourn, sojourn.msm(psor.nocen.msm, covariates=0)))) +stopifnot(isTRUE(all.equal(psor.msm$sojourn, sojourn.msm(psor.msm, covariates="mean")))) +stopifnot(isTRUE(all.equal(exp(psor.nocen.msm$Qmatrices$logbaseline[c(5,10,15)]), psor.nocen.msm$Qmatrices$baseline[c(5,10,15)]))) +stopifnot(isTRUE(all.equal(exp(psor.msm$Qmatrices$logbaseline[c(5,10,15)]), psor.msm$Qmatrices$baseline[c(5,10,15)]))) + ## crudeinits/geninits with missing values for state / time psor2 <- psor; @@ -330,10 +343,15 @@ persp(psor.msm, np=5) image(psor.msm) plot.prevalence.msm(psor.msm) + plot.prevalence.msm(psor.msm, col.obs="green", col.exp="brown", lty.obs=2, lty.exp=3, lwd.obs=2, lwd.exp=4, cex.axis=2) ## plotprog.msm(state ~ months, subject=ptnum, data=psor, legend.pos=c(20,0.99), lwd=3, xlab="Months") + plotprog.msm(state ~ months, subject=ptnum, data=psor, legend.pos=c(20,0.99), lwd=1, mark.time=FALSE, xlab="Months") plot.survfit.msm(psor.msm, lwd=3, xlab="Months") + plot.survfit.msm(psor.msm, lwd=3, lwd.surv=3, mark.time=FALSE, col.surv="green", lty.surv=4, xlab="Months") + plot.survfit.msm(psor.msm, lwd=3, lwd.surv=3, ci="normal", B=4, col.surv="green", lty.surv=4, xlab="Months", + lty.ci=1, lwd.ci=2, col.ci="purple") } @@ -450,7 +468,10 @@ qmatrix = psor.q, covariates = ~factor(ollwsdrt) + hieffusn, # covinits=list(hieffusn = c(0.5, 0.1, 0), ollwsdrt=c(0.2, 0.1, -0.1)), constraint = list(hieffusn=c(1,1,1)), # censor=99, censor.states=c(3,4), fixedpars=FALSE, control = list(REPORT=1,trace=2), method="BFGS")) - pmatrix.msm(psor2.msm, ci="boot", B=5, covariates=list(hieffusn=0, "factor(ollwsdrt)"="foo")) + pmatrix.msm(psor2.msm, ci="boot", B=50, covariates=list(hieffusn=0, "factor(ollwsdrt)"="foo")) + pmatrix.msm(psor2.msm, ci="boot", B=3) + boot.msm(psor2.msm, stat=function(x)qmatrix.msm(x)$estimates[1,2], B=3) + qmatrix.msm(psor2.msm, covariates=list(hieffusn=0, "factor(ollwsdrt)"="foo")) qmatrix.msm(psor2.msm, ci="normal", B=50, covariates=list(hieffusn=0, "factor(ollwsdrt)"="foo")) qmatrix.msm(psor2.msm, ci="boot", B=3, covariates=list(hieffusn=0, "factor(ollwsdrt)"="foo"))