diff -Nru r-cran-epi-2.32/CHANGES r-cran-epi-2.37/CHANGES --- r-cran-epi-2.32/CHANGES 2018-08-19 10:44:37.000000000 +0000 +++ r-cran-epi-2.37/CHANGES 2019-05-23 06:58:06.000000000 +0000 @@ -1,13 +1,85 @@ +Changes in 2.37 + +o ci.surv added to facilitate calculation of survival function from a + smooth parametrtic model + +o Bug in apc.LCa - wrong labeling of resulting list element has been + rectified. + +Changes in 2.36 + +o Lexis gains an 'notes' argument allowing to silence notes + +o Documentation of bootLexis upadated and groomed + +o Description of units of measurment in diet dataset corrected + +o Problems with default reference period/cohort in apc.fit fixed + +o apc.lines gains an argument shade= enabling shaded confidence limits + of estimated curves. + +o Scaling of the trend internally in projection.ip to avoid numerical + problems in solve() + +o Default vertical scaling of the box size in boxes.MS increased a bit + +Changes in 2.35 + +o Major re-write of vignette of follow-up data + +o glm.Lexis example updated + +o Lexis now gives a note and not a warning when dropping persons with + no follow-up time + +o Epi.Rd included as man page, allowing easy access to the index via ?Epi. + +o simLexis updated to cope with poisreg family in modeling input + +o timeSince function added to tell which time scales are defined as + time since entry to a state. Allows for a slightly more compact + output from summary.Lexis, when timeScales=TRUE. + +o such timescales are NA before entry. In modeling we may want these to + be 0, hence the model tsNA20 (timescale NAs to zero), that does this + for such time scales. And optionally for all. + +o improved readbility of error message from ci.lin + +Changes in 2.34 + +o cleanup of ci.lin to exploit the (post 3.5.0) behaviour of coef/vcov + which are now in sync + +o apc.fit nomenclature for drift extraction has been changed from + "weighted" to "Y" in concert with BxCs recent paper. + +Changes in 2.33 + +o Fixed bug in gen.exp causing a crash when purchase dates were + indestinguishable from FU-dates by match() but not by %in%. + +o Previously, apc.fit with dist="bin" returned odds in the Age + component. Now returns probabilities. + +o Argument names and -order have been changed for glm.Lexis, gam.Lexis + and coxph.Lexis, they are now x, from, to and formula. + +o Utility functions to describe state characteristics and + relationships have been added: absorbing(), transient(), + preceding(), succeeding(), before() and after() + Changes in 2.32 o gen.exp: time since drug cessation was only computed for latest cessation, not for intermittent ones. Rectified and expanded set of - result variables keeping track of patients' status are added. + result variables that keep track of patients' status are added. gen.exp also gains an argument, rm.dose, indicating whether doses purchased should be counted if time-allocation of purchases are limited by the push.max argument. Several disjoint FU-periods for - persone are now allowed. + persons are now allowed. o The changed functionality of vcov() returning 0s for aliased parameters (instead of omitting them) is now used and hence ci.lin Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/data/DMepi.rda and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/data/DMepi.rda differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/data/pr.rda and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/data/pr.rda differ diff -Nru r-cran-epi-2.32/debian/changelog r-cran-epi-2.37/debian/changelog --- r-cran-epi-2.32/debian/changelog 2018-10-07 17:46:08.000000000 +0000 +++ r-cran-epi-2.37/debian/changelog 2019-08-10 12:59:38.000000000 +0000 @@ -1,3 +1,18 @@ +r-cran-epi (2.37-1ubuntu1) eoan; urgency=medium + + * Skip vignettes/flup.rnw and vignettes/simLexis.rnw that + fail on Ubuntu autopkgtest infrastructure, see #882382 + + -- Graham Inggs Sat, 10 Aug 2019 12:59:38 +0000 + +r-cran-epi (2.37-1) unstable; urgency=medium + + * New upstream version + * debhelper 12 + * Standards-Version: 4.4.0 + + -- Andreas Tille Wed, 17 Jul 2019 08:16:02 +0200 + r-cran-epi (2.32-2) unstable; urgency=medium * Test-Depends: r-cran-popepi diff -Nru r-cran-epi-2.32/debian/compat r-cran-epi-2.37/debian/compat --- r-cran-epi-2.32/debian/compat 2018-10-07 17:46:08.000000000 +0000 +++ r-cran-epi-2.37/debian/compat 2019-07-17 06:16:02.000000000 +0000 @@ -1 +1 @@ -11 +12 diff -Nru r-cran-epi-2.32/debian/control r-cran-epi-2.37/debian/control --- r-cran-epi-2.32/debian/control 2018-10-07 17:46:08.000000000 +0000 +++ r-cran-epi-2.37/debian/control 2019-08-10 11:51:43.000000000 +0000 @@ -1,9 +1,10 @@ Source: r-cran-epi -Maintainer: Debian R Packages Maintainers +Maintainer: Ubuntu Developers +XSBC-Original-Maintainer: Debian R Packages Maintainers Uploaders: Andreas Tille Section: gnu-r Priority: optional -Build-Depends: debhelper (>= 11~), +Build-Depends: debhelper (>= 12~), dh-r, r-base-dev, r-cran-cmprsk, @@ -16,7 +17,7 @@ r-cran-data.table, r-cran-zoo, r-cran-mgcv -Standards-Version: 4.2.0 +Standards-Version: 4.4.0 Vcs-Browser: https://salsa.debian.org/r-pkg-team/r-cran-epi Vcs-Git: https://salsa.debian.org/r-pkg-team/r-cran-epi.git Homepage: https://cran.r-project.org/package=Epi diff -Nru r-cran-epi-2.32/debian/docs r-cran-epi-2.37/debian/docs --- r-cran-epi-2.32/debian/docs 2018-10-07 17:46:08.000000000 +0000 +++ r-cran-epi-2.37/debian/docs 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -debian/README.test -debian/tests/run-unit-test diff -Nru r-cran-epi-2.32/debian/examples r-cran-epi-2.37/debian/examples --- r-cran-epi-2.32/debian/examples 2018-10-07 17:46:08.000000000 +0000 +++ r-cran-epi-2.37/debian/examples 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -vignettes diff -Nru r-cran-epi-2.32/debian/README.test r-cran-epi-2.37/debian/README.test --- r-cran-epi-2.32/debian/README.test 2018-10-07 17:46:08.000000000 +0000 +++ r-cran-epi-2.37/debian/README.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -Notes on how this package can be tested. -──────────────────────────────────────── - -To run the unit tests provided by the package you can do - - sh run-unit-test - -in this directory. diff -Nru r-cran-epi-2.32/debian/tests/control r-cran-epi-2.37/debian/tests/control --- r-cran-epi-2.32/debian/tests/control 2018-10-07 17:46:08.000000000 +0000 +++ r-cran-epi-2.37/debian/tests/control 2019-07-17 06:16:02.000000000 +0000 @@ -1,3 +1,3 @@ -Tests: run-unit-test +Tests: vignette Depends: @, r-cran-survival, r-cran-popepi Restrictions: allow-stderr diff -Nru r-cran-epi-2.32/debian/tests/run-unit-test r-cran-epi-2.37/debian/tests/run-unit-test --- r-cran-epi-2.32/debian/tests/run-unit-test 2018-10-07 17:46:08.000000000 +0000 +++ r-cran-epi-2.37/debian/tests/run-unit-test 1970-01-01 00:00:00.000000000 +0000 @@ -1,25 +0,0 @@ -#!/bin/sh -e -oname=epi -pkg=r-cran-`echo $oname | tr '[A-Z]' '[a-z]'` - -if [ "$AUTOPKGTEST_TMP" = "" ] ; then - AUTOPKGTEST_TMP=`mktemp -d /tmp/${pkg}-test.XXXXXX` -fi -cd $AUTOPKGTEST_TMP -cp -a /usr/share/doc/$pkg/examples/vignettes . -gunzip vignettes/*.gz -mkdir R -cp /usr/lib/R/site-library/Epi/doc/simLexis.R R/. -cd vignettes -for rnw in `ls *.[rR]nw` ; do -rfile=`echo $rnw | sed 's/\.[rR]nw/.R/'` -if [ "$rfile" = "simLexis.R" -o "$rfile" = "yll.R" ] ; then - >&2 echo "Vignete $rnw runs infinite loop - ignoring this for the test" - continue -fi -R --no-save <= 3.0.0), utils +Depends: R (>= 3.5.0), utils Imports: cmprsk, etm, splines, MASS, survival, plyr, Matrix, numDeriv, data.table, zoo, mgcv Suggests: mstate, nlme, lme4, popEpi @@ -24,11 +24,11 @@ License: GPL-2 URL: http://BendixCarstensen.com/Epi/ NeedsCompilation: yes -Packaged: 2018-08-23 04:16:08 UTC; bendix +Packaged: 2019-05-23 08:26:35 UTC; bendix Author: Bendix Carstensen [aut, cre], Martyn Plummer [aut], Esa Laara [ctb], Michael Hills [ctb] Maintainer: Bendix Carstensen Repository: CRAN -Date/Publication: 2018-08-23 05:44:26 UTC +Date/Publication: 2019-05-23 17:30:03 UTC Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/inst/doc/etm.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/inst/doc/etm.pdf differ diff -Nru r-cran-epi-2.32/inst/doc/etm.R r-cran-epi-2.37/inst/doc/etm.R --- r-cran-epi-2.32/inst/doc/etm.R 2018-07-03 19:33:52.000000000 +0000 +++ r-cran-epi-2.37/inst/doc/etm.R 1970-01-01 00:00:00.000000000 +0000 @@ -1,684 +0,0 @@ -### R code from vignette source 'etm' -### Encoding: UTF-8 - -################################################### -### code chunk number 1: etm.rnw:42-52 -################################################### -options( width=90 ) -library( Epi ) -library( etm ) -library( survival ) -library( splines ) -print( sessionInfo(), l=F ) -# load( url("https://www.jstatsoft.org/index.php/jss/article/downloadSuppFile/v038i04/dli.data.rda") ) -# save( dli.data, file="./dli.Rda" ) -load( file="./dli.Rda" ) -str( dli.data ) - - -################################################### -### code chunk number 2: etm.rnw:55-56 -################################################### -subset( dli.data, id %in% c(2,5,388,511,531,600) ) - - -################################################### -### code chunk number 3: etm.rnw:61-62 -################################################### -subset( dli.data, c(TRUE,diff(time)<0 & diff(id)==0 ) ) - - -################################################### -### code chunk number 4: etm.rnw:65-66 -################################################### -dli.data[dli.data$id==531 & dli.data$from==2,"time"] <- 0.45 - - -################################################### -### code chunk number 5: etm.rnw:69-70 -################################################### -with( dli.data, table( from, to ) ) - - -################################################### -### code chunk number 6: etm.rnw:81-87 -################################################### -dli <- transform( dli.data, - to = as.numeric(to), - ti = ave( time, id, FUN=function( x ) c(0,x[-length(x)]) ) ) -dli$to <- ifelse( is.na(dli$to), dli$from, dli$to ) -subset( dli, id %in% c(5,388,511,600) ) -with( dli, table( from, to ) ) - - -################################################### -### code chunk number 7: etm.rnw:102-108 -################################################### -tmp <- subset(dli,to==2 & from!=to)[,c("id","time")] -names(tmp)[2] <- "tr" -dli <- merge( dli, tmp, all.x=TRUE ) -dli$tr <- with( dli, ifelse( ti-tr>=0, ti-tr, NA ) ) -subset(dli,id %in% c(600,603,608) ) -str( dli ) - - -################################################### -### code chunk number 8: etm.rnw:111-122 -################################################### -# DLI -tmp <- subset(dli,to==4 & from!=to)[,c("id","time")] -names(tmp)[2] <- "tD" -dli <- merge( dli, tmp, all.x=TRUE ) -dli$tD <- with( dli, ifelse( ti-tD>=0, ti-tD, NA ) ) -# Rm2 -tmp <- subset(dli,to==6 & from!=to)[,c("id","time")] -names(tmp)[2] <- "tR" -dli <- merge( dli, tmp, all.x=TRUE ) -dli$tR <- with( dli, ifelse( ti-tR>=0, ti-tR, NA ) ) -subset(dli,id %in% c(600,603,608) ) - - -################################################### -### code chunk number 9: etm.rnw:129-142 -################################################### -state.names <- c("Rem" , "D/Rem", - "Rel" , "D/Rel", - "DLI" , "D/DLI", - "Rem2", "D/Rem2", - "Rel2") -dli <- Lexis( entry = list( tfi=ti, tfr=tr, tfD=tD, tfR=tR ), - entry.status = factor( from, levels=0:8, labels=state.names ), - exit = list( tfi=time ), - exit.status = factor( to, levels=0:8, labels=state.names ), - id = id, - data = dli ) -print.data.frame( -subset( dli, id %in% c(600,603,608) )[,1:13], digits=3) - - -################################################### -### code chunk number 10: etm.rnw:157-158 -################################################### -summary( dli ) - - -################################################### -### code chunk number 11: etm.rnw:164-165 (eval = FALSE) -################################################### -## boxes( dli ) - - -################################################### -### code chunk number 12: boxes -################################################### -n.st <- nlevels( dli$lex.Cst ) -# Colors for stages reflecting severity -st.col <- rep(c("limegreen","darkorange","yellow3","forestgreen","red"),each=2)[-10] -st.col[1:4*2] <- rgb( t(col2rgb(st.col[1:4*2])*0.5 + 255*0.5), max=255 ) -boxes( dli, wmult=1.1, hmult=1.2, lwd=4, - boxpos=list(x=c(10,30,30,50,50,70,70,90,90), - y=c(25, 8,42,25,59,42,76,59,93)), - scale.R=100, show.BE=TRUE, DR.sep=c(" (",")"), - col.bg=st.col, col.txt=rep(c("white","black"),5)[-10], - col.border=c("white","black")[c(1,2,1,2,1,2,1,2,1)] ) - - -################################################### -### code chunk number 13: stack -################################################### -st.dli <- stack( dli ) -str( st.dli ) -round( -cbind( "Original"=with( dli, tapply( lex.dur, lex.Cst, sum ) ), - "Stacked" =with( st.dli, tapply( lex.dur, lex.Cst, sum ) ) ), 1 ) -round( xtabs( cbind( lex.Fail, lex.dur ) ~ lex.Tr, data = st.dli ), 1 ) - - -################################################### -### code chunk number 14: Subset-mort -################################################### -dd.dli <- subset( st.dli, lex.Tr %in% levels(lex.Tr)[c(1,3,5,7)] ) -table( dd.dli$lex.Tr ) - - -################################################### -### code chunk number 15: etm.rnw:222-224 -################################################### -dd.dli$lex.Tr <- factor( dd.dli$lex.Tr ) -round( xtabs( cbind( lex.Fail, lex.dur ) ~ lex.Tr, data=dd.dli ), 1 ) - - -################################################### -### code chunk number 16: Cox-0 -################################################### -str( dd.dli ) -c0 <- coxph( Surv(tfi,tfi+lex.dur,lex.Fail) ~ lex.Tr, data=dd.dli ) -summary( c0 ) - - -################################################### -### code chunk number 17: Cox-1 -################################################### -dd.dli$Rst <- Relevel( dd.dli$lex.Tr, list(Remis=c(1,4),Relapse=2:3) ) -with( dd.dli, table( lex.Tr, Rst ) ) -c1 <- coxph( Surv(tfi,tfi+lex.dur,lex.Fail) ~ Rst, data=dd.dli ) -summary( c1 ) -anova( c0, c1, test="Chisq" ) - - -################################################### -### code chunk number 18: split -################################################### -sp.dli <- splitLexis( dli, breaks=seq(0,50,1/10) ) -print.data.frame( subset( sp.dli, id==603 )[,1:13], digits=3 ) -summary( sp.dli ) - - -################################################### -### code chunk number 19: stack-1 -################################################### -ss.dli <- stack( sp.dli ) -m.dli <- subset( ss.dli, lex.Tr %in% levels(lex.Tr)[c(1,3,5,7)] ) -m.dli$lex.Tr <- factor( m.dli$lex.Tr ) -m.dli$Rst <- Relevel( m.dli$lex.Tr, list(Remis=c(1,4),Relapse=2:3) ) -with( m.dli, ftable( Rst, lex.Tr, lex.Fail, col.vars=3 ) ) - - -################################################### -### code chunk number 20: tab-split-stack -################################################### -YDtab <- xtabs( cbind(lex.dur,lex.Fail) ~ I(floor(tfi*10)/10) + Rst, - data=subset(m.dli,tfi<2.1) ) -dnam <- dimnames(YDtab) -dnam[[3]] <- c("Y","D","rate") -YDrate <- array( NA, dimnames=dnam, dim=sapply(dnam,length) ) -YDrate[,,1:2] <- YDtab -YDrate[,,3] <- YDrate[,,2]/YDrate[,,1]*1000 -round( ftable( YDrate, row.vars=1 ), 1 ) - - -################################################### -### code chunk number 21: m-knots -################################################### -( m.kn <- with( subset( m.dli, lex.Fail ), - c(0,quantile( tfi+lex.dur, probs=1:4/5 )) ) ) - - -################################################### -### code chunk number 22: Poisson-0 -################################################### -m0 <- glm( lex.Fail ~ Ns( tfi, knots=m.kn ) + lex.Tr, - family = poisson, offset=log(lex.dur), data=m.dli ) -summary( m0 ) -round(ci.lin( m0, E=T ),3) - - -################################################### -### code chunk number 23: Poisson-Cox-comp -################################################### -round(cbind( ci.lin( m0, subset="->" )[,1:2], - ci.lin( c0 )[,1:2] ), 4 ) -round(cbind( ci.lin( m0, subset="->" )[,1:2]/ - ci.lin( c0 )[,1:2] ), 4 ) - - -################################################### -### code chunk number 24: Poisson-1 -################################################### -m1 <- update( m0, . ~ . - lex.Tr + Rst ) -anova( m0, m1, test="Chisq" ) - - -################################################### -### code chunk number 25: time-interaction -################################################### -m0i <- update( m0, . ~ . + Ns( tfi, knots=m.kn ):lex.Tr ) -m1i <- update( m1, . ~ . + Ns( tfi, knots=m.kn ):Rst ) -anova( m0, m0i, test="Chisq" ) -anova( m1, m1i, test="Chisq" ) - - -################################################### -### code chunk number 26: Pairwise -################################################### -anova( m0i, m1i, test="Chisq" ) - - -################################################### -### code chunk number 27: i-parms -################################################### -ci.lin( m1i )[,1:2] - - -################################################### -### code chunk number 28: pred-mort -################################################### -pr.pt <- seq(0,10,0.02) - n.pt <- length( pr.pt ) -CM <- cbind( 1, Ns( pr.pt, knots=m.kn ) ) - - -################################################### -### code chunk number 29: mortality-rates -################################################### -Rem.Dead <- ci.exp( m1i, ctr.mat=cbind(CM,CM*0) ) -Rel.Dead <- ci.exp( m1i, ctr.mat=cbind(CM,CM ) ) - - -################################################### -### code chunk number 30: rates -################################################### -par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 ) -matplot( pr.pt, cbind( Rem.Dead, Rel.Dead )*100, log="y", ylim=c(1,400), - type="l", lty=1, lwd=c(3,1,1), las=1, - col=rep(clr<-c("limegreen","red"),each=3), - ylab="Mortality rates per 100 PY", xlab="Time since entry" ) -text( c(10,10), 400*c(0.8,1), c("Remission","Relapse"), col=clr, - adj=c(1,1) ) - - -################################################### -### code chunk number 31: withRR -################################################### -RR.RmRl <- ci.exp( m1i, ctr.mat=cbind(CM*0,CM) ) -par( mar=c(3,4,1,4), mgp=c(3,1,0)/1.6 ) -matplot( pr.pt, cbind( Rem.Dead, Rel.Dead )*100, log="y", ylim=c(1,400), - type="l", lty=1, lwd=c(3,1,1), - col=rep(c("limegreen","red"),each=3), - ylab="Rate per 100 PY", xlab="Time since entry", yaxt="n" ) -abline( h=10 ) -text( c(10,10), 400*c(0.8,1), - c("Remission","Relapse"), col=c("limegreen","red"), - adj=c(1,1), font=2 ) -matlines( pr.pt, RR.RmRl*10, type="l", lty=1, lwd=c(3,1,1), col="blue" ) -yt <- outer(c(1,2,5),c(1,10,100,1000),"*") -axis( side=2, at=yt, labels=yt , las=1 ) -axis( side=4, at=yt, labels=yt/10, las=1 ) -mtext( "Rate ratio", side=4, line=2, col="blue" ) - - -################################################### -### code chunk number 32: events -################################################### -str( ss.dli ) -with( subset( ss.dli, lex.Fail & lex.Tr %in% levels(lex.Tr)[-c(1,3,5,7)] ), - dotchart( tfi+lex.dur, groups=factor(lex.Tr), pch=16, cex=0.8) ) - - -################################################### -### code chunk number 33: progress-subset -################################################### -p.dli <- subset( ss.dli, lex.Tr %in% levels(lex.Tr)[-c(1,3,5,7)] ) -p.dli$lex.Tr <- factor( p.dli$lex.Tr ) -( p.kn <-with( subset( p.dli, lex.Fail ), - c(0,quantile( tfi+lex.dur, probs=1:3/4 )) ) ) - - -################################################### -### code chunk number 34: etm.rnw:452-453 -################################################### -p.dli$lex.Tr <- Relevel( p.dli$lex.Tr, list("Rem->Rel"=c(1,4), 2, 3 ) ) - - -################################################### -### code chunk number 35: pr-modl -################################################### -p2 <- glm( lex.Fail ~ Ns( tfi, knots=p.kn, intercept=TRUE ):lex.Tr -1 , - family = poisson, offset=log(lex.dur), data=p.dli ) -summary( p2 ) -round(ci.exp( p2, Exp=FALSE ), 2 ) - - -################################################### -### code chunk number 36: pr-ctr-mat -################################################### -CP <- Ns( pr.pt, knots=p.kn, intercept=TRUE ) -Rem.Rel <- ci.exp( p2, subset="TrRem", ctr.mat=CP ) -Rel.DLI <- ci.exp( p2, subset="TrRel", ctr.mat=CP ) -DLI.Rem <- ci.exp( p2, subset="TrDLI", ctr.mat=CP ) - - -################################################### -### code chunk number 37: pr-rates -################################################### -par( mar=c(3,4,1,4), mgp=c(3,1,0)/1.6 ) -matplot( pr.pt, cbind(Rem.Rel,Rel.DLI,DLI.Rem)*100, log="y", ylim=c(1,500), - type="l", lty=1, lwd=c(3,1,1), - col=rep(c("limegreen","red","blue"),each=3), - ylab="Progression rate per 100 PY", xlab="Time since entry" ) -par(font=2) -legend( "topright", legend=c("Remission -> Relapse", - "Relapse -> DLI", - "DLI -> Remission"), bty="n", - text.col=c("limegreen","red","blue"), xjust=1 ) - - -################################################### -### code chunk number 38: pr-pt-interval -################################################### -pr.pt[1:5] -unique( diff(pr.pt) ) -( il <- mean( diff(pr.pt) ) ) - - -################################################### -### code chunk number 39: tr-mat -################################################### -states <- levels( dli$lex.Cst ) -dnam <- list(From=states, To=states, time=pr.pt ) -AR <- array( 0, dimnames=dnam, dim=sapply(dnam,length) ) -str( AR ) - - -################################################### -### code chunk number 40: fill-tr -################################################### -AR["Rem" ,"D/Rem" ,] <- Rem.Dead[,1] -AR["Rem2","D/Rem2",] <- Rem.Dead[,1] -AR["Rel" ,"D/Rel" ,] <- Rel.Dead[,1] -AR["DLI" ,"D/DLI" ,] <- Rel.Dead[,1] -AR["Rem" ,"Rel" ,] <- Rem.Rel[,1] -AR["Rem2","Rel2" ,] <- Rem.Rel[,1] -AR["Rel" ,"DLI" ,] <- Rel.DLI[,1] -AR["DLI" ,"Rem2" ,] <- DLI.Rem[,1] - - -################################################### -### code chunk number 41: diag-tr -################################################### -SI <- apply(AR,c(1,3),sum) - - -################################################### -### code chunk number 42: tp-mat -################################################### -AP <- AR -for( i in 1:(dim(AP)[3]) ) - { - AP[,,i] <- AR[,,i]/SI[,i] * (1-exp(-SI[,i]*il)) - diag(AP[,,i]) <- exp(-SI[,i]*il) - } -AP[is.na(AP)] <- 0 -round( SI[,1], 4 ) -round( ftable( AR[,,50+1:2], row.vars=c(3,1)), 4 ) -round( ftable( AP[,,50+1:2], row.vars=c(3,1)), 4 ) - - -################################################### -### code chunk number 43: state-occ -################################################### -pi0 <- c(1,rep(0,8)) -ST <- SI*0 -ST[,1] <- pi0 %*% AP[,,1] -for( i in 2:dim(ST)[2] ) ST[,i] <- ST[,i-1] %*% AP[,,i] -str( ST ) -round(t(ST[,1:10]),3) - - -################################################### -### code chunk number 44: state-plot -################################################### -matplot( pr.pt, t(ST)[,-1], type="l", ylim=c(0,0.5), las=1, - lty=1, lwd=2, col=rainbow(8), - xlab="Time since 1st remission", ylab="Fraction of patients" ) -legend( "topleft", legend=rownames(ST)[-1], - bty="n", lty=1, lwd=2, col=rainbow(8), - ncol=1 ) - - -################################################### -### code chunk number 45: cum-states -################################################### -cST <- apply(ST,2,cumsum) - - -################################################### -### code chunk number 46: etm.rnw:612-613 (eval = FALSE) -################################################### -## matplot( pr.pt, t(cST), type="l", lty=1, lwd=2 ) - - -################################################### -### code chunk number 47: state-occ-1 -################################################### -perm <- c(1,3,5,7,9,2,4,6,8) -dimnames(ST)[[1]][perm] -matplot( pr.pt, t(apply(ST[perm,],2,cumsum)), type="l", lty=1, lwd=2 ) - - -################################################### -### code chunk number 48: state-occ-2 -################################################### -endpos <- cumsum(ST[perm,n.pt]) - ST[perm,n.pt]/2 -matplot( pr.pt, t(apply(ST[perm,],2,cumsum)), - type="l", lty=1, lwd=2, col=gray(c(0.6,0)[c(1,1,1,1,2,1,1,1,1)]), - xlim=c(0,11.5), ylim=c(0,1), yaxs="i" ) -text( 10.05, endpos, dimnames(ST)[[1]][perm], font=2, adj=0 ) - - -################################################### -### code chunk number 49: state-occ-fun -################################################### -state.occ <- -function( perm=1:n.st, line=NULL ) -{ -clr <- st.col[perm] - -mindist <- 1/40 -endpos <- cumsum(ST[perm,n.pt]) - ST[perm,n.pt]/2 -minpos <- (1:n.st-0.5)*mindist -maxpos <- 1-rev(minpos) -endpos <- pmin( pmax( endpos, minpos ), maxpos ) - -stkcrv <- t(apply(ST[perm,],2,cumsum)) -matplot( pr.pt, stkcrv, - type="l", lty=1, lwd=1, col="transparent", - xlim=c(0,11.5), ylim=c(0,1), yaxs="i", xaxs="i", bty="n", - xlab="Time since 1st remission", ylab="Fraction of patients" ) -text( 10.05, endpos, dimnames(ST)[[1]][perm], font=2, adj=0, col=clr ) -for( i in 9:1 ) -polygon( c( pr.pt, rev(pr.pt) ), - c( stkcrv[,i], if(i>1) rev(stkcrv[,i-1]) else rep(0,n.pt) ), - col=clr[i], border=clr[i] ) -if( !is.null(line) ) -matlines( pr.pt, stkcrv[,line], type="l", lty=1, lwd=3, col="black" ) -} - - -################################################### -### code chunk number 50: state-occ-col1 -################################################### -state.occ( perm=1:n.st ) - - -################################################### -### code chunk number 51: state-occ-col2 -################################################### -state.occ( perm=c(1,3,5,7,9,2,4,6,8), line=5 ) - - -################################################### -### code chunk number 52: etm.rnw:719-744 -################################################### -get.rates <- function( N=10 ) -{ -Rem.Dead <- ci.lin( m1i, ctr.mat=cbind(CM,CM*0), sample=N ) -Rel.Dead <- ci.lin( m1i, ctr.mat=cbind(CM,CM ), sample=N ) -Rem.Rel <- ci.lin( p2 , subset="TrRem", ctr.mat=CP , sample=N ) -Rel.DLI <- ci.lin( p2 , subset="TrRel", ctr.mat=CP , sample=N ) -DLI.Rem <- ci.lin( p2 , subset="TrDLI", ctr.mat=CP , sample=N ) -states <- levels( dli$lex.Cst ) -dnam <- list( From = states, - To = states, - time = pr.pt, - sample = 1:N ) -AR <- AP <- array( 0, dimnames=dnam, dim=sapply(dnam,length) ) -AR["Rem" ,"D/Rem" ,,] <- exp(Rem.Dead) -AR["Rem2","D/Rem2",,] <- exp(Rem.Dead) -AR["Rel" ,"D/Rel" ,,] <- exp(Rel.Dead) -AR["DLI" ,"D/DLI" ,,] <- exp(Rel.Dead) -AR["Rem" ,"Rel" ,,] <- exp(Rem.Rel ) -AR["Rem2","Rel2" ,,] <- exp(Rem.Rel ) -AR["Rel" ,"DLI" ,,] <- exp(Rel.DLI ) -AR["DLI" ,"Rem2" ,,] <- exp(DLI.Rem ) -AR -} -system.time( AR <- get.rates(1000) ) -str( AR ) - - -################################################### -### code chunk number 53: AR-AP -################################################### -trans.prob <- -function( AR ) -{ -# A matrix for transition probabilities: -AP <- AR * 0 -# Compute the interval length for the give rates -il <- mean( diff( as.numeric( dimnames(AR)[[3]] ) ) ) -# Sum of the Intensities out of each state -SI <- apply(AR,c(1,3,4),sum) -for( i in 1:dim(AR)[3] ) # Loop over times -for( j in 1:dim(AR)[4] ) # Loop over samples - { - AP[,,i,j] <- AR[,,i,j]/SI[,i,j] * (1-exp(-SI[,i,j]*il)) - diag(AP[,,i,j]) <- exp(-SI[,i,j]*il) - } -AP[is.na(AP)] <- 0 -invisible( AP ) -} -system.time( AP <- trans.prob( AR ) ) - - -################################################### -### code chunk number 54: AP-ST -################################################### -pi0 <- rep(1:0,c(1,n.st-1)) -ST <- AP[1,,,]*0 -names( dimnames(ST) )[1] <- "State" -str( ST ) -system.time( -for( j in 1:dim(ST)[3] ) - { - ST[,1,j] <- pi0 %*% AP[,,1,j] - for( i in 2:dim(ST)[2] ) ST[,i,j] <- ST[,i-1,j] %*% AP[,,i,j] - } - ) -str( ST ) - - -################################################### -### code chunk number 55: def-st-oc-sim -################################################### -state.occ.sim <- -function( perm = 1:n.st, - pct = c(5,95), - cicol = rgb(1/9,1/9,1/9,1/9), - line = NULL ) -{ -clr <- st.col[perm] -cST <- apply(ST[perm,,],2:3,cumsum) -cST <- apply(cST,1:2,quantile,probs=c(pct/100,0.5) ) -mindist <- 1/40 -endpos <- cST["50%",,n.pt] - diff(c(0,cST["50%",,n.pt]))/2 -minpos <- (1:n.st-0.5)*mindist -maxpos <- 1-rev(minpos) -endpos <- pmin( pmax( endpos, minpos ), maxpos ) - -matplot( pr.pt, t(cST["50%",,]), - type="n", # lty=1, lwd=2, col=gray(c(0.6,0)[c(1,2,1,2,1,2,1,2,1)]), - xlim=c(0,11.5), ylim=c(0,1), yaxs="i", xaxs="i", bty="n", - xlab="Time since 1st remission", ylab="Fraction of patients" ) -text( 10.05, endpos, dimnames(ST)[[1]][perm], font=2, adj=0, col=clr ) -for( i in n.st:1 ) -polygon( c( pr.pt, rev(pr.pt) ), - c( cST["50%",i,], if(i>1) rev(cST["50%",i-1,]) else rep(0,dim(cST)[3]) ), - col=clr[i], border=clr[i] ) -for( i in n.st:1 ) -polygon( c( pr.pt, rev(pr.pt) ), - c( cST[1,i,], rev(cST[2,i,]) ), - col=cicol, border=cicol ) -if( !is.null(line) ) matlines( pr.pt, t(cST["50%",c(line,NA),]), - type="l", lty=1, lwd=3, col="black" ) -} - - -################################################### -### code chunk number 56: states-ci1 -################################################### -par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1) -state.occ.sim( pct=c(5,95) ) - - -################################################### -### code chunk number 57: states-ci2 -################################################### -par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1) -state.occ.sim( perm=c(1,3,5,7,9,2,4,6,8), pct=c(5,95), line=5 ) - - -################################################### -### code chunk number 58: states-cirem -################################################### -par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1) -state.occ.sim( perm=c(1,7,3,5,9,4,6,8,2), pct=c(5,95), line=c(2,5) ) - - -################################################### -### code chunk number 59: parm-CLFS -################################################### -CLFS <- apply( ST["Rem",,]+ST["Rem2",,], 1, quantile, probs=c(500,25,975)/1000 ) -str( CLFS ) - - -################################################### -### code chunk number 60: CLFS -################################################### -par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 ) -matplot( pr.pt, t(CLFS), - type="l", lty=1,lwd=c(3,1,1), col="black", - ylim=c(0,1), - ylab="P(CLFS)", xlab="Time since first remission" ) - - -################################################### -### code chunk number 61: etm-ex -################################################### -tra <- matrix(FALSE, 9, 9, - dimnames = list(as.character(0:8), as.character(0:8))) -tra[1, 2:3] <- TRUE -tra[3, 4:5] <- TRUE -tra[5, 6:7] <- TRUE -tra[7, 8:9] <- TRUE -### computation of the transition probabilities -dli.etm <- etm::etm(dli.data, as.character(0:8), tra, "cens", s = 0) -str(dli.etm) -### Computation of the clfs + var clfs -clfs <- dli.etm$est["0", "0", ] + - dli.etm$est["0", "6", ] -var.clfs <- dli.etm$cov["0 0", "0 0", ] + - dli.etm$cov["0 6", "0 6", ] + - 2 * dli.etm$cov["0 0", "0 6", ] -## computation of the 95% CIs + plot -ciplus <- clfs + qnorm(0.975) * sqrt(var.clfs) -cimoins <- clfs - qnorm(0.975) * sqrt(var.clfs) - -plot(dli.etm$time, clfs, type = "s", lwd=3, - bty = "n", ylim = c(0, 1), yaxs="i", las=1, - xlab = "Time since 1st remission (years)", - ylab = "P(CLFS)" ) -lines(dli.etm$time, cimoins, lty = 3, type = "s") -lines(dli.etm$time, ciplus , lty = 3, type = "s") -matlines( pr.pt, t(CLFS), lty=c(1,3,3), lwd=c(3,1,1), col="red" ) - - -################################################### -### code chunk number 62: etm-ex2 -################################################### -xdli.etm <- etm( dli ) -str( xdli.etm ) -plot( xdli.etm, col=rainbow(15), lty=1, lwd=3, - legend.pos="topright", bty="n", yaxs="i" ) - - Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/inst/doc/flup.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/inst/doc/flup.pdf differ diff -Nru r-cran-epi-2.32/inst/doc/flup.R r-cran-epi-2.37/inst/doc/flup.R --- r-cran-epi-2.32/inst/doc/flup.R 2018-05-03 14:34:59.000000000 +0000 +++ r-cran-epi-2.37/inst/doc/flup.R 2019-05-23 08:21:05.000000000 +0000 @@ -2,7 +2,7 @@ ### Encoding: UTF-8 ################################################### -### code chunk number 1: flup.rnw:5-8 +### code chunk number 1: flup.rnw:22-25 ################################################### options( width=90, SweaveHooks=list( fig=function() @@ -10,315 +10,547 @@ ################################################### -### code chunk number 2: flup.rnw:101-103 +### code chunk number 2: flup.rnw:128-130 ################################################### library(Epi) print( sessionInfo(), l=F ) ################################################### -### code chunk number 3: flup.rnw:109-116 +### code chunk number 3: flup.rnw:139-148 ################################################### -data( nickel ) -nicL <- Lexis( entry = list( per=agein+dob, - age=agein, - tfh=agein-age1st ), - exit = list( age=ageout ), - exit.status = ( icd %in% c(162,163) )*1, - data = nickel ) +data( DMlate ) +head( DMlate ) +dmL <- Lexis( entry = list( per=dodm, + age=dodm-dobth, + tfD=0 ), + exit = list( per=dox ), + exit.status = factor( !is.na(dodth), labels=c("DM","Dead") ), + data = DMlate ) +timeScales(dmL) ################################################### -### code chunk number 4: flup.rnw:126-129 +### code chunk number 4: flup.rnw:171-173 ################################################### -str( nickel ) -str( nicL ) -head( nicL ) +str( dmL ) +head( dmL )[,1:10] ################################################### -### code chunk number 5: flup.rnw:138-139 +### code chunk number 5: flup.rnw:189-190 ################################################### -summary( nicL ) +summary.Lexis( dmL, timeScales=TRUE ) ################################################### -### code chunk number 6: nicL1 +### code chunk number 6: dmL1 ################################################### -plot( nicL ) +plot( dmL ) ################################################### -### code chunk number 7: nicL2 +### code chunk number 7: dmL2 ################################################### par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 ) -plot( nicL, 1:2, lwd=1, col=c("blue","red")[(nicL$exp>0)+1], +plot( dmL, 1:2, lwd=1, col=c("blue","red")[dmL$sex], grid=TRUE, lty.grid=1, col.grid=gray(0.7), - xlim=1900+c(0,90), xaxs="i", - ylim= 10+c(0,90), yaxs="i", las=1 ) -points( nicL, 1:2, pch=c(NA,3)[nicL$lex.Xst+1], - col="lightgray", lwd=3, cex=1.5 ) -points( nicL, 1:2, pch=c(NA,3)[nicL$lex.Xst+1], - col=c("blue","red")[(nicL$exp>0)+1], lwd=1, cex=1.5 ) + xlim=1960+c(0,60), xaxs="i", + ylim= 40+c(0,60), yaxs="i", las=1 ) +points( dmL, 1:2, pch=c(NA,3)[dmL$lex.Xst], + col="lightgray", lwd=3, cex=0.3 ) +points( dmL, 1:2, pch=c(NA,3)[dmL$lex.Xst], + col=c("blue","red")[dmL$sex], lwd=1, cex=0.3 ) +box(bty='o') ################################################### -### code chunk number 8: flup.rnw:193-196 +### code chunk number 8: flup.rnw:246-249 ################################################### -nicS1 <- splitLexis( nicL, "age", breaks=seq(0,100,10) ) -summary( nicL ) -summary( nicS1 ) +dmS1 <- splitLexis( dmL, "age", breaks=seq(0,100,5) ) +summary( dmL ) +summary( dmS1 ) ################################################### -### code chunk number 9: flup.rnw:204-205 +### code chunk number 9: flup.rnw:259-262 ################################################### -round( subset( nicS1, id %in% 8:10 ), 2 ) +wh.id <- c(9,27,52,484) +subset( dmL , lex.id %in% wh.id )[,1:10] +subset( dmS1, lex.id %in% wh.id )[,1:10] ################################################### -### code chunk number 10: flup.rnw:211-213 +### code chunk number 10: flup.rnw:268-270 ################################################### -nicS2 <- splitLexis( nicS1, "tfh", breaks=c(0,1,5,10,20,30,100) ) -round( subset( nicS2, id %in% 8:10 ), 2 ) +dmS2 <- splitLexis( dmS1, "tfD", breaks=c(0,1,2,5,10,20,30,40) ) +subset( dmS2, lex.id %in% wh.id )[,1:10] ################################################### -### code chunk number 11: flup.rnw:218-223 +### code chunk number 11: flup.rnw:275-281 ################################################### library( popEpi ) -nicM <- splitMulti( nicL, age = seq(0,100,10), - tfh = c(0,1,5,10,20,30,100) ) -summary( nicS2 ) -summary( nicM ) +dmM <- splitMulti( dmL, age = seq(0,100,5), + tfD = c(0,1,2,5,10,20,30,40), + drop = FALSE ) +summary( dmS2 ) +summary( dmM ) ################################################### -### code chunk number 12: flup.rnw:227-230 +### code chunk number 12: flup.rnw:292-295 ################################################### -identical( nicS2, nicM ) -class( nicS2 ) -class( nicM ) +identical( dmS2, dmM ) +class( dmS2 ) +class( dmM ) ################################################### -### code chunk number 13: flup.rnw:250-258 +### code chunk number 13: flup.rnw:325-335 ################################################### -timeBand( nicM, "age", "middle" )[1:20] -# For nice printing and column labelling use the data.frame() function: -data.frame( nicS2[,c("lex.id","per","age","tfh","lex.dur")], - mid.age=timeBand( nicS2, "age", "middle" ), - mid.t=timeBand( nicS2, "tfh", "middle" ), - left.t=timeBand( nicS2, "tfh", "left" ), - right.t=timeBand( nicS2, "tfh", "right" ), - fact.t=timeBand( nicS2, "tfh", "factor" ) )[1:20,] +whc <- c(names(dmL)[1:7],"dodm","doins") # WHich Columns do we want to see? +subset( dmL, lex.id %in% wh.id )[,whc] +dmC <- cutLexis( data = dmL, + cut = dmL$doins, + timescale = "per", + new.state = "Ins", + new.scale = "tfI", + precursor.states = "DM" ) +whc <- c(names(dmL)[1:8],"doins") # WHich Columns do we want to see? +subset( dmC, lex.id %in% wh.id )[,whc] ################################################### -### code chunk number 14: flup.rnw:278-279 +### code chunk number 14: flup.rnw:353-354 ################################################### -summary( (nicS2$age-nicS2$tfh) - nicS2$age1st ) +timeSince( dmC ) ################################################### -### code chunk number 15: flup.rnw:284-286 +### code chunk number 15: flup.rnw:363-370 ################################################### -summary( timeBand( nicS2, "age", "middle" ) - - timeBand( nicS2, "tfh", "middle" ) - nicS2$age1st ) +dmS2C <- cutLexis( data = dmS2, + cut = dmS2$doins, + timescale = "per", + new.state = "Ins", + new.scale = "tfI", + precursor.states = "DM" ) +subset( dmS2C, lex.id %in% wh.id )[,whc] ################################################### -### code chunk number 16: flup.rnw:308-316 +### code chunk number 16: flup.rnw:394-395 ################################################### -subset( nicL, id %in% 8:10 ) -agehi <- nicL$age1st + 50 / nicL$exposure -nicC <- cutLexis( data = nicL, - cut = agehi, - timescale = "age", - new.state = 2, - precursor.states = 0 ) -subset( nicC, id %in% 8:10 ) +summary( dmS2C, timeScales=TRUE ) ################################################### -### code chunk number 17: flup.rnw:323-331 +### code chunk number 17: box1 ################################################### -subset( nicS2, id %in% 8:10 ) -agehi <- nicS2$age1st + 50 / nicS2$exposure -nicS2C <- cutLexis( data = nicS2, - cut = agehi, - timescale = "age", - new.state = 2, - precursor.states = 0 ) -subset( nicS2C, id %in% 8:10 ) +boxes( dmC, boxpos=TRUE, scale.R=1000, show.BE=TRUE ) ################################################### -### code chunk number 18: flup.rnw:390-392 +### code chunk number 18: flup.rnw:438-446 ################################################### -( a.kn <- with( subset( nicM, lex.Xst==1 ), quantile( age+lex.dur, (1:5-0.5)/5 ) ) ) -( t.kn <- with( subset( nicM, lex.Xst==1 ), quantile( tfh+lex.dur, (1:5-0.5)/5 ) ) ) +timeBand( dmS2C, "age", "middle" )[1:10] +# For nice printing and column labelling we use the data.frame() function: +data.frame( dmS2C[,c("per","age","tfD","lex.dur")], + mid.age=timeBand( dmS2C, "age", "middle" ), + mid.t=timeBand( dmS2C, "tfD", "middle" ), + left.t=timeBand( dmS2C, "tfD", "left" ), + right.t=timeBand( dmS2C, "tfD", "right" ), + fact.t=timeBand( dmS2C, "tfD", "factor" ) )[1:15,] ################################################### -### code chunk number 19: flup.rnw:405-410 +### code chunk number 19: flup.rnw:481-482 ################################################### -ma <- glm( (lex.Xst==1) ~ Ns(age,knots=a.kn), - family = poisson, - offset = log(lex.dur), - data = nicM ) +summary( (dmS2$age-dmS2$tfD) - (dmS2$dodm-dmS2$dobth) ) + + +################################################### +### code chunk number 20: flup.rnw:487-489 +################################################### +summary( timeBand( dmS2, "age", "middle" ) - + timeBand( dmS2, "tfD", "middle" ) - (dmS2$dodm-dmS2$dobth) ) + + +################################################### +### code chunk number 21: flup.rnw:594-596 +################################################### +dmCs <- splitMulti( dmC, age = seq(0,110,1/4) ) +summary( dmCs, t=T ) + + +################################################### +### code chunk number 22: flup.rnw:618-623 +################################################### +( a.kn <- with( subset( dmCs, lex.Xst=="Dead" ), + quantile( age+lex.dur, (1:5-0.5)/5 ) ) ) +( i.kn <- c( 0, + with( subset( dmCs, lex.Xst=="Dead" & lex.Cst=="Ins" ), + quantile( tfI+lex.dur, (1:4)/5 ) ) ) ) + + +################################################### +### code chunk number 23: flup.rnw:639-644 +################################################### +ma <- glm( (lex.Xst=="Dead") ~ Ns(age,knots=a.kn), + family = poisson, + offset = log(lex.dur), + data = dmCs ) summary( ma ) ################################################### -### code chunk number 20: pr-a +### code chunk number 24: flup.rnw:663-666 +################################################### +Ma <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn), + family = poisreg, data = dmCs ) +summary( Ma ) + + +################################################### +### code chunk number 25: flup.rnw:674-676 +################################################### +Xa <- glm.Lexis( dmCs, from="DM", to="Dead", + formula = ~ Ns(age,knots=a.kn) ) + + +################################################### +### code chunk number 26: flup.rnw:679-680 +################################################### +attr( Xa, "Lexis" ) + + +################################################### +### code chunk number 27: flup.rnw:689-690 +################################################### +xa <- glm.Lexis( dmCs, formula = ~ Ns(age,knots=a.kn) ) + + +################################################### +### code chunk number 28: flup.rnw:693-694 +################################################### +c( deviance(ma), deviance(Ma), deviance(Xa), deviance(xa) ) + + +################################################### +### code chunk number 29: pr-a ################################################### nd <- data.frame( age=40:85, lex.dur=1000 ) -pr.a <- ci.pred( ma, newdata = nd ) -matplot( nd$age, pr.a, - type="l", lty=1, col=1, lwd=c(3,1,1), - log="y", xlab="Age (years)", - ylab="Lunng cancer mortality per 1000 PY") +pr.0 <- ci.pred( ma, newdata = nd ) # mortality per 100 PY +pr.a <- ci.pred( Ma, newdata = nd )*1000 # mortality per 100 PY +summary(pr.0/pr.a) +matshade( nd$age, pr.a, plot=TRUE, + type="l", lty=1, + log="y", xlab="Age (years)", + ylab="DM mortality per 1000 PY") + + +################################################### +### code chunk number 30: flup.rnw:740-744 +################################################### +pm <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn) + + lex.Cst + sex, + family=poisreg, data = dmCs ) +round( ci.exp( pm ), 3 ) + + +################################################### +### code chunk number 31: flup.rnw:758-762 +################################################### +pm <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + lex.Cst + sex, + family=poisreg, data = tsNA20(dmCs) ) + + +################################################### +### code chunk number 32: flup.rnw:768-774 +################################################### +Pm <- glm.Lexis( tsNA20(dmCs), + form = ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + lex.Cst + sex ) +c( deviance(Pm), deviance(pm) ) +identical( model.matrix(Pm), model.matrix(pm) ) + + +################################################### +### code chunk number 33: flup.rnw:780-781 +################################################### +round( ci.exp( Pm, subset="ex" ), 3 ) + + +################################################### +### code chunk number 34: ins-time +################################################### +ndI <- data.frame( expand.grid( tfI=c(NA,seq(0,15,0.1)), + ai=seq(40,80,10) ), + sex="M", + lex.Cst="Ins" ) +ndI <- transform( ndI, age=ai+tfI ) +head( ndI ) +ndA <- data.frame( age= seq(40,100,0.1), tfI=0, lex.Cst="DM", sex="M" ) +pri <- ci.pred( Pm, ndI ) * 1000 +pra <- ci.pred( Pm, ndA ) * 1000 +matshade( ndI$age, pri, plot=TRUE, las=1, + xlab="Age (years)", ylab="DM mortality per 1000 PY", + log="y", lty=1, col="blue" ) +matshade( ndA$age, pra ) + + +################################################### +### code chunk number 35: flup.rnw:818-822 +################################################### +library( survival ) +cm <- coxph( Surv(age,age+lex.dur,lex.Xst=="Dead") ~ + Ns(tfI,knots=i.kn) + lex.Cst + sex, + data = tsNA20(dmCs) ) + + +################################################### +### code chunk number 36: flup.rnw:826-829 +################################################### +Cm <- coxph.Lexis( tsNA20(dmCs), + form= age ~ Ns(tfI,knots=i.kn) + lex.Cst + sex ) +cbind( ci.exp( cm ), ci.exp( Cm ) ) + + +################################################### +### code chunk number 37: flup.rnw:838-841 +################################################### +round( cbind( ci.exp( Pm ), + rbind( matrix(NA,5,3), + ci.exp( cm )[-6,] ) ), 3 ) + + +################################################### +### code chunk number 38: Ieff +################################################### +nd <- data.frame( tfI=seq(0,15,,151), lex.Cst="Ins", sex="M" ) +nr <- data.frame( tfI= 2 , lex.Cst="Ins", sex="M" ) +ppr <- ci.exp( pm, list(nd,nr), xvars="age" ) +cpr <- ci.exp( cm, list(nd,nr) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( nd$tfI, cbind(ppr,cpr), plot=T, + lty=c(1,2), log="y", + xlab="Time since insulin (years)", ylab="Rate ratio") +abline( h=1, lty=3 ) + + +################################################### +### code chunk number 39: IeffR +################################################### +nd <- data.frame( tfI=seq(0,15,,151), lex.Cst="Ins", sex="M" ) +nr <- data.frame( tfI= 0 , lex.Cst="DM" , sex="M" ) +ppr <- ci.exp( pm, list(nd,nr), xvars="age" ) +cpr <- ci.exp( cm, list(nd,nr) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( nd$tfI, cbind(ppr,cpr), + xlab="Time since insulin (years)", + ylab="Rate ratio relative to non-Insulin", + lty=c(1,2), log="y", plot=T ) + + +################################################### +### code chunk number 40: flup.rnw:946-951 +################################################### +imx <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age ,knots=a.kn) + + Ns( tfI,knots=i.kn) + + Ns(age-tfI,knots=a.kn) + + lex.Cst + sex ) + + +################################################### +### code chunk number 41: flup.rnw:961-971 +################################################### +Im <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age ,knots=a.kn) + + Ns( tfI,knots=i.kn) + + Ns((age-tfI)*(lex.Cst=="Ins"),knots=a.kn) + + lex.Cst + sex ) +im <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age ,knots=a.kn) + + Ns( tfI,knots=i.kn) + + lex.Cst:Ns(age-tfI,knots=a.kn) + + lex.Cst + sex ) + + +################################################### +### code chunk number 42: flup.rnw:986-987 +################################################### +anova( imx, Im, im, test='Chisq') ################################################### -### code chunk number 21: flup.rnw:445-447 +### code chunk number 43: dur-int ################################################### -mat <- update( ma, . ~ . + Ns(tfh,knots=t.kn) ) -summary( mat ) +pxi <- ci.pred( imx, ndI ) +pxa <- ci.pred( imx, ndA ) +pIi <- ci.pred( Im , ndI ) +pIa <- ci.pred( Im , ndA ) +pii <- ci.pred( im , ndI ) +pia <- ci.pred( im , ndA ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind( pxi, pIi, pii)*1000, plot=T, log="y", + xlab="Age", ylab="Mortality per 1000 PY", + lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) +matshade( ndA$age, cbind( pxa, pIa, pia)*1000, + lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) ################################################### -### code chunk number 22: flup.rnw:457-458 +### code chunk number 44: dur-int-RR ################################################### -summary( nickel$age1st ) +ndR <- transform( ndI, tfI=0, lex.Cst="DM" ) +cbind( head(ndI), head(ndR) ) +Rxi <- ci.exp( imx, list(ndI,ndR) ) +Rii <- ci.exp( im , list(ndI,ndR) ) +RIi <- ci.exp( Im , list(ndI,ndR) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind( Rxi, RIi, Rii), plot=T, log="y", + xlab="Age (years)", ylab="Rate ratio vs, non-Insulin", + lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) +abline( h=1 ) +abline( h=ci.exp(imx,subset="lex.Cst")[,1], lty="25", col="blue" ) ################################################### -### code chunk number 23: flup.rnw:462-468 +### code chunk number 45: splint ################################################### -nd <- data.frame( expand.grid( age=c(20:90,NA), age1st=seq(15,45,10) ) ) -nd <- transform( nd, tfh = ifelse( age > age1st, age-age1st, NA ), - lex.dur = 1000 ) -# makes no sense to have age < age1st -nd <- transform( nd, age = ifelse( age > age1st, age, NA ) ) -head( nd ) +gm <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + lex.Cst:Ns(age,knots=a.kn):Ns(tfI,knots=i.kn) + + lex.Cst + sex ) +pgi <- ci.pred( gm, ndI ) +pga <- ci.pred( gm, ndA ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind( pgi, pii )*1000, plot=T, + lty=c("solid","21"), lend="butt", lwd=2, log="y", + xlab="Age (years)", ylab="Mortality rates per 1000 PY", + alpha=c(0.2,0.1), col=c("black","red") ) +matshade( ndA$age, cbind( pga, pia )*1000, + lty=c("solid","21"), lend="butt", lwd=2, + alpha=c(0.2,0.1), col=c("black","red") ) ################################################### -### code chunk number 24: pr-at +### code chunk number 46: RR-int ################################################### -pr.at <- ci.pred( mat, newdata = nd ) -matplot( nd$age, pr.at, - type="l", lty=1, col=1, lwd=c(3,1,1), - log="y", xlab="Age (years)", - ylab="Lunng cancer mortality per 1000 PY") +ndR <- transform( ndI, lex.Cst="DM", tfI=0 ) +iRR <- ci.exp( im, ctr.mat=list(ndI,ndR) ) +gRR <- ci.exp( gm, ctr.mat=list(ndI,ndR) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind(gRR,iRR), lty=1, log="y", plot=TRUE, + xlab="Age (years)", ylab="Rate ratio: Ins vs. non-Ins", + col=c("black","red") ) +abline( h=1 ) ################################################### -### code chunk number 25: flup.rnw:492-493 +### code chunk number 47: flup.rnw:1110-1123 ################################################### -anova( ma, mat, test="Chisq" ) +dmd <- glm.Lexis( dmCs, + from="DM", to="Dead", + formula = ~ Ns(age,knots=a.kn) + + sex ) +ind <- glm.Lexis( dmCs, + from="Ins", to="Dead", + formula = ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + Ns(age-tfI,knots=a.kn) + + sex ) +ini <- ci.pred( ind, ndI ) +dmi <- ci.pred( dmd, ndI ) +dma <- ci.pred( dmd, ndA ) ################################################### -### code chunk number 26: flup.rnw:504-508 +### code chunk number 48: sep-mort ################################################### -( f.kn <- with( subset( nicM, lex.Xst==1 ), quantile( age1st, (1:5-0.5)/5 ) ) ) -maf <- update( ma, . ~ . + Ns(age1st,knots=f.kn) ) -summary( maf ) -anova( maf, ma, mat, test="Chisq" ) +par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1,bty="n") +matshade( ndI$age, ini*1000, plot=TRUE, log="y", + xlab="Age (years)", ylab="Mortality rates per 1000 PY", + lwd=2, col="red" ) +matshade( ndA$age, dma*1000, + lwd=2, col="black" ) ################################################### -### code chunk number 27: pr-at-af +### code chunk number 49: sep-HR ################################################### -pr.af <- ci.pred( maf, newdata = nd ) -matplot( nd$age, pr.at, - type="l", lty=1, col=1, lwd=c(3,1,1), - log="y", xlab="Age (years)", - ylab="Lunng cancer mortality per 1000 PY") -matlines( nd$age, pr.af, - type="l", lty=1, col=2, lwd=c(3,0,0) ) +par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1,bty="n") +matshade( ndI$age, ci.ratio(ini,dmi), plot=TRUE, log="y", + xlab="Age (years)", ylab="RR insulin vs. no insulin", + lwd=2, col="red" ) +abline( h=1 ) ################################################### -### code chunk number 28: flup.rnw:536-547 +### code chunk number 50: flup.rnw:1169-1177 ################################################### -maft <- update( mat, . ~ . + Ns(age1st,knots=f.kn) ) -summary( maft ) -mft <- update( maft, . ~ . - Ns(age,knots=a.kn) ) -mf <- update( maf , . ~ . - Ns(age,knots=a.kn) ) -mt <- update( mat , . ~ . - Ns(age,knots=a.kn) ) -allp <- anova( maft, mat, ma, maf, mf, mft, mt, mat, - maf, maft, mft, - test="Chisq" ) -mall <- as.matrix( allp ) -cbind( mod = c("maft","mat","ma","maf","mf","mft","mt","mat","maf","maft","mft"), - round( allp[,1:5], 3 ) ) +dmCs <- cutLexis( data = dmS2, + cut = dmS2$doins, + timescale = "per", + new.state = "Ins", + new.scale = "tfI", + precursor.states = "DM", + split.states = TRUE ) +summary( dmCs ) ################################################### -### code chunk number 29: flup.rnw:562-571 +### code chunk number 51: box4 ################################################### -data( nickel ) -nicL <- Lexis( entry = list( per = agein+dob, - age = agein, - tfh = agein-age1st ), - exit = list( age = ageout ), - exit.status = ( icd > 0 ) + ( icd %in% c(162,163) ), - data = nickel ) -summary( nicL ) -subset( nicL, id %in% 8:10 ) +boxes( dmCs, boxpos=list(x=c(15,15,85,85), + y=c(85,15,85,15)), + scale.R=1000, show.BE=TRUE ) ################################################### -### code chunk number 30: flup.rnw:576-585 +### code chunk number 52: flup.rnw:1207-1215 ################################################### -nicL <- Lexis( entry = list( per = agein+dob, - age = agein, - tfh = agein-age1st ), - exit = list( age = ageout ), - exit.status = ( icd > 0 ) + ( icd %in% c(162,163) ), - data = nickel, - states = c("Alive","D.oth","D.lung") ) -summary( nicL ) -str( nicL ) +dmM <- mcutLexis( dmL, + timescale = "per", + wh = c("doins","dooad"), + new.states = c("Ins","OAD"), + new.scales = c("tfI","tfO"), + precursor.states = "DM", + ties.resolve = TRUE ) +summary( dmM, t=T ) ################################################### -### code chunk number 31: flup.rnw:597-605 +### code chunk number 53: flup.rnw:1219-1225 ################################################### -nicL$agehi <- nicL$age1st + 50 / nicL$exposure -nicC <- cutLexis( data = nicL, - cut = nicL$agehi, - timescale = "age", - new.state = "HiExp", - precursor.states = "Alive" ) -subset( nicC, id %in% 8:10 ) -summary( nicC, scale=1000 ) +wh <- c(subset(dmM,lex.Cst=="Ins-OAD")$lex.id[1:2], + subset(dmM,lex.Cst=="OAD-Ins")$lex.id[1:2]) +options( width=110 ) +print( subset( dmM, lex.id %in% wh )[,c('lex.id',names(dmM[1:8]),c("doins","dooad"))], + digits=6, row.names=FALSE ) +summary( dmM, t=T ) ################################################### -### code chunk number 32: flup.rnw:624-633 +### code chunk number 54: mbox ################################################### -nicC <- cutLexis( data = nicL, - cut = nicL$agehi, - timescale = "age", - new.state = "HiExp", - new.scale = "tfe", - split.states = TRUE, - precursor.states = "Alive" ) -subset( nicC, id %in% 8:10 ) -summary( nicC, scale=1000, timeScales=TRUE ) +boxes( dmM, boxpos=list(x=c(15,80,40,40,85,85), + y=c(50,50,90,10,90,10)), + scale.R=1000, show.BE=TRUE ) ################################################### -### code chunk number 33: nic-box +### code chunk number 55: mboxr ################################################### -boxes( nicC, boxpos = list(x=c(10,10,80,80,80,80), - y=c(75,25,87,63,13,37)), - scale.Y = 1000, - show.BE = TRUE ) +summary( dmMr <- Relevel( dmM, list('OAD+Ins'=5:6), first=FALSE) ) +boxes( dmMr, boxpos=list(x=c(15,50,15,85,85), + y=c(85,50,15,85,15)), + scale.R=1000, show.BE=TRUE ) diff -Nru r-cran-epi-2.32/inst/doc/index.html r-cran-epi-2.37/inst/doc/index.html --- r-cran-epi-2.32/inst/doc/index.html 2018-03-11 15:34:29.000000000 +0000 +++ r-cran-epi-2.37/inst/doc/index.html 2019-02-17 15:20:33.000000000 +0000 @@ -5,7 +5,7 @@

Vignettes for the Epi package

-Here is the website for the -Epi package. -
-A list of published papers using -the Lexis macinery is -here. +

Other Epi package related stuff

+
    +
  • \pkg{Epi} has grown out of the course 'Statistical Practise in Epidemiology + with R', abbreviated SPE. +
  • The website for the Epi package. +
  • A list of reports and published papers using + the Lexis machinery is + here. +
Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/inst/doc/simLexis.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/inst/doc/simLexis.pdf differ diff -Nru r-cran-epi-2.32/inst/doc/simLexis.R r-cran-epi-2.37/inst/doc/simLexis.R --- r-cran-epi-2.32/inst/doc/simLexis.R 2018-05-03 14:36:26.000000000 +0000 +++ r-cran-epi-2.37/inst/doc/simLexis.R 2019-05-23 08:22:24.000000000 +0000 @@ -2,7 +2,7 @@ ### Encoding: UTF-8 ################################################### -### code chunk number 1: simLexis.rnw:24-27 +### code chunk number 1: simLexis.rnw:23-26 ################################################### options( width=90, SweaveHooks=list( fig=function() @@ -35,8 +35,7 @@ new.state = "Ins", new.scale = "t.Ins", split.states = TRUE ) -summary( dmi ) -str(dmi) +summary( dmi, timeScales=T ) ################################################### @@ -50,8 +49,8 @@ ################################################### ### code chunk number 6: split ################################################### -Si <- splitLexis( dmi, 0:30/2, "DMdur" ) -dim( Si ) +Si <- splitLexis( dmi, seq(0,20,1/4), "DMdur" ) +summary( Si ) print( subset( Si, lex.id==97 )[,1:10], digits=6 ) @@ -80,35 +79,48 @@ I(Per-2000) + sex, family=poisson, offset=log(lex.dur), data = subset(Si,lex.Cst=="DM") ) -DM.Dead <- glm( (lex.Xst=="Dead") ~ Ns( Age , knots=ad.kn ) + +ci.exp( DM.Ins ) +class( DM.Ins ) + + +################################################### +### code chunk number 9: simLexis.rnw:282-288 +################################################### +DM.Ins <- glm.Lexis( Si, from = "DM", to = "Ins", + formula = ~ Ns( Age , knots=ai.kn ) + + Ns( DMdur, knots=di.kn ) + + I(Per-2000) + sex ) +ci.exp( DM.Ins ) +class( DM.Ins ) + + +################################################### +### code chunk number 10: simLexis.rnw:293-302 +################################################### +DM.Dead <- glm.Lexis( Si, from = "DM", to = "Dead", + formula = ~ Ns( Age , knots=ad.kn ) + + Ns( DMdur, knots=dd.kn ) + + I(Per-2000) + sex ) +Ins.Dead <- glm.Lexis( Si, from = "Ins", + formula = ~ Ns( Age , knots=ad.kn ) + Ns( DMdur, knots=dd.kn ) + - I(Per-2000) + sex, - family=poisson, offset=log(lex.dur), - data = subset(Si,lex.Cst=="DM") ) -Ins.Dead <- glm( (lex.Xst=="Dead(Ins)") ~ Ns( Age , knots=ad.kn ) + - Ns( DMdur, knots=dd.kn ) + - Ns( t.Ins, knots=ti.kn ) + - I(Per-2000) + sex, - family=poisson, offset=log(lex.dur), - data = subset(Si,lex.Cst=="Ins") ) + Ns( t.Ins, knots=ti.kn ) + + I(Per-2000) + sex ) ################################################### -### code chunk number 9: prop-haz +### code chunk number 11: prop-haz ################################################### -with( Si, table(lex.Cst) ) -All.Dead <- glm( (lex.Xst %in% c("Dead(Ins)","Dead")) ~ - Ns( Age , knots=ad.kn ) + - Ns( DMdur, knots=dd.kn ) + - lex.Cst + - I(Per-2000) + sex, - family=poisson, offset=log(lex.dur), - data = Si ) +All.Dead <- glm.Lexis( Si, to = c("Dead(Ins)","Dead"), + formula = ~ Ns( Age , knots=ad.kn ) + + Ns( DMdur, knots=dd.kn ) + + lex.Cst + + I(Per-2000) + sex ) round( ci.exp( All.Dead ), 3 ) ################################################### -### code chunk number 10: get-dev +### code chunk number 12: get-dev ################################################### what <- c("null.deviance","df.null","deviance","df.residual") ( rD <- unlist( DM.Dead[what] ) ) @@ -118,7 +130,7 @@ ################################################### -### code chunk number 11: pr-array +### code chunk number 13: pr-array ################################################### pr.rates <- NArray( list( DMdur = seq(0,12,0.1), DMage = 4:7*10, @@ -129,19 +141,17 @@ ################################################### -### code chunk number 12: simLexis.rnw:382-383 +### code chunk number 14: mknd ################################################### -ci.pred +nd <- data.frame( DMdur = as.numeric( dimnames(pr.rates)[[1]] ), + lex.Cst = factor( 1, levels=1:4, + labels=levels(Si$lex.Cst) ), + sex = factor( 1, levels=1:2, labels=c("M","F")) ) ################################################### -### code chunk number 13: make-pred +### code chunk number 15: make-pred ################################################### -nd <- data.frame( DMdur = as.numeric( dimnames(pr.rates)[[1]] ), - lex.Cst = factor( 1, levels=1:4, - labels=levels(Si$lex.Cst) ), - sex = factor( 1, levels=1:2, labels=c("M","F")), - lex.dur = 1000 ) for( ia in dimnames(pr.rates)[[2]] ) { dnew <- transform( nd, Age = as.numeric(ia)+DMdur, @@ -161,22 +171,22 @@ ################################################### -### code chunk number 14: mort-int +### code chunk number 16: mort-int ################################################### par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1 ) plot( NA, xlim=c(40,82), ylim=c(5,300), bty="n", log="y", xlab="Age", ylab="Mortality rate per 1000 PY" ) abline( v=seq(40,80,5), h=outer(1:9,10^(0:2),"*"), col=gray(0.8) ) for( aa in 4:7*10 ) for( ii in 1:4 ) - matlines( aa+as.numeric(dimnames(pr.rates)[[1]]), - cbind( pr.rates[,paste(aa),ii,"DM/Ins",], - pr.rates[,paste(aa),ii,"All" ,] ), - type="l", lty=1, lwd=c(3,1,1), - col=rep(c("red","limegreen"),each=3) ) + matshade( aa+as.numeric(dimnames(pr.rates)[[1]]), + cbind( pr.rates[,paste(aa),ii,"DM/Ins",], + pr.rates[,paste(aa),ii,"All" ,] )*1000, + type="l", lty=1, lwd=2, + col=c("red","limegreen") ) ################################################### -### code chunk number 15: Tr +### code chunk number 17: Tr ################################################### Tr <- list( "DM" = list( "Ins" = DM.Ins, "Dead" = DM.Dead ), @@ -184,17 +194,13 @@ ################################################### -### code chunk number 16: make-ini +### code chunk number 18: make-ini ################################################### -str( Si[NULL,1:9] ) -ini <- subset(Si,FALSE,select=1:9) -str( ini ) -ini <- subset(Si,select=1:9)[NULL,] -str( ini ) +str( ini <- Si[NULL,1:9] ) ################################################### -### code chunk number 17: ini-fill +### code chunk number 19: ini-fill ################################################### ini[1:2,"lex.id"] <- 1:2 ini[1:2,"lex.Cst"] <- "DM" @@ -206,7 +212,7 @@ ################################################### -### code chunk number 18: simL +### code chunk number 20: simL ################################################### set.seed( 52381764 ) Nsim <- 5000 @@ -217,13 +223,13 @@ ################################################### -### code chunk number 19: sum-simL +### code chunk number 21: sum-simL ################################################### summary( simL, by="sex" ) ################################################### -### code chunk number 20: Tr.p-simP +### code chunk number 22: Tr.p-simP ################################################### Tr.p <- list( "DM" = list( "Ins" = DM.Ins, "Dead" = All.Dead ), @@ -236,7 +242,7 @@ ################################################### -### code chunk number 21: Cox-dur +### code chunk number 23: Cox-dur ################################################### library( survival ) Cox.Dead <- coxph( Surv( DMdur, DMdur+lex.dur, @@ -246,11 +252,10 @@ I(Per-2000) + sex, data = Si ) round( ci.exp( Cox.Dead ), 3 ) -round( ci.exp( All.Dead ), 3 ) ################################################### -### code chunk number 22: TR.c +### code chunk number 24: TR.c ################################################### Tr.c <- list( "DM" = list( "Ins" = Tr$DM$Ins, "Dead" = Cox.Dead ), @@ -263,7 +268,7 @@ ################################################### -### code chunk number 23: nState +### code chunk number 25: nState ################################################### system.time( nSt <- nState( subset(simL,sex=="M"), @@ -272,7 +277,7 @@ ################################################### -### code chunk number 24: pstate0 +### code chunk number 26: pstate0 ################################################### pM <- pState( nSt, perm=c(1,2,4,3) ) head( pM ) @@ -287,14 +292,14 @@ ################################################### -### code chunk number 25: pstatex +### code chunk number 27: pstatex ################################################### clr <- c("limegreen","orange") # expand with a lighter version of the two chosen colors clx <- c( clr, rgb( t( col2rgb( clr[2:1] )*2 + rep(255,3) ) / 3, max=255 ) ) par( mfrow=c(1,2), las=1, mar=c(3,3,4,2), mgp=c(3,1,0)/1.6 ) # Men -plot( pM, col=clx ) +plot( pM, col=clx, xlab="Date of FU" ) lines( as.numeric(rownames(pM)), pM[,2], lwd=3 ) mtext( "60 year old male, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) mtext( "Survival curve", side=3, line=1.5, adj=0 ) @@ -309,19 +314,19 @@ from=1995, time.scale="Per" ), perm=c(1,2,4,3) ) -plot( pF, col=clx ) +plot( pF, col=clx, xlab="Date of FU" ) lines( as.numeric(rownames(pF)), pF[,2], lwd=3 ) mtext( "60 year old female, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) mtext( "Survival curve", side=3, line=1.5, adj=0 ) -mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[1] ) -mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) axis( side=4 ) axis( side=4, at=1:19/20, labels=FALSE ) axis( side=4, at=1:99/100, labels=FALSE, tcl=-0.3 ) ################################################### -### code chunk number 26: pstatey +### code chunk number 28: pstatey ################################################### par( mfrow=c(1,2), las=1, mar=c(3,3,4,2), mgp=c(3,1,0)/1.6 ) # Men @@ -350,8 +355,8 @@ lines( as.numeric(rownames(pF)), pF[,2], lwd=3 ) mtext( "60 year old female, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) mtext( "Survival curve", side=3, line=1.5, adj=0 ) -mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[1] ) -mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) axis( side=4 ) axis( side=4, at=1:9/10, labels=FALSE ) axis( side=4, at=1:19/20, labels=FALSE, tcl=-0.4 ) @@ -359,7 +364,7 @@ ################################################### -### code chunk number 27: comp-0 +### code chunk number 29: comp-0 ################################################### PrM <- pState( nState( subset(simP,sex=="M"), at=seq(0,11,0.2), @@ -387,23 +392,23 @@ lines( PrM, border="blue" , col="transparent", lwd=3 ) lines( CoxM, border="red" , col="transparent", lwd=3 ) text( 60.5, 0.05, "M" ) -box( lwd=3 ) +box( lwd=5, col="white" ) ; box( lwd=2, col="black" ) plot( pF, border="black", col="transparent", lwd=3 ) lines( PrF, border="blue" , col="transparent", lwd=3 ) lines( CoxF, border="red" , col="transparent", lwd=3 ) text( 60.5, 0.05, "F" ) -box( lwd=3 ) +box( lwd=5, col="white" ) ; box( lwd=2, col="black" ) ################################################### -### code chunk number 28: CHANGE1 (eval = FALSE) +### code chunk number 30: CHANGE1 (eval = FALSE) ################################################### ## source( "../R/simLexis.R", keep.source=TRUE ) ################################################### -### code chunk number 29: CHANGE2 +### code chunk number 31: CHANGE2 ################################################### simX <- Epi:::simX sim1 <- Epi:::sim1 @@ -413,63 +418,63 @@ ################################################### -### code chunk number 30: simLexis.rnw:934-937 +### code chunk number 32: simLexis.rnw:972-975 ################################################### cbind( -attr( ini, "time.scale" ), +attr( ini, "time.scales" ), attr( ini, "time.since" ) ) ################################################### -### code chunk number 31: simLexis.rnw:962-963 +### code chunk number 33: simLexis.rnw:1000-1001 ################################################### simLexis ################################################### -### code chunk number 32: simLexis.rnw:980-981 +### code chunk number 34: simLexis.rnw:1018-1019 ################################################### simX ################################################### -### code chunk number 33: simLexis.rnw:993-994 +### code chunk number 35: simLexis.rnw:1031-1032 ################################################### sim1 ################################################### -### code chunk number 34: simLexis.rnw:1006-1007 +### code chunk number 36: simLexis.rnw:1044-1045 ################################################### lint ################################################### -### code chunk number 35: simLexis.rnw:1017-1018 +### code chunk number 37: simLexis.rnw:1055-1056 ################################################### get.next ################################################### -### code chunk number 36: simLexis.rnw:1027-1028 +### code chunk number 38: simLexis.rnw:1065-1066 ################################################### chop.lex ################################################### -### code chunk number 37: simLexis.rnw:1045-1046 +### code chunk number 39: simLexis.rnw:1083-1084 ################################################### nState ################################################### -### code chunk number 38: simLexis.rnw:1055-1056 +### code chunk number 40: simLexis.rnw:1093-1094 ################################################### pState ################################################### -### code chunk number 39: simLexis.rnw:1060-1062 +### code chunk number 41: simLexis.rnw:1098-1100 ################################################### plot.pState lines.pState Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/inst/doc/yll.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/inst/doc/yll.pdf differ diff -Nru r-cran-epi-2.32/man/apc.fit.Rd r-cran-epi-2.37/man/apc.fit.Rd --- r-cran-epi-2.32/man/apc.fit.Rd 2018-05-03 12:43:43.000000000 +0000 +++ r-cran-epi-2.37/man/apc.fit.Rd 2019-04-07 09:56:48.000000000 +0000 @@ -21,7 +21,7 @@ ref.p, dist = c("poisson","binomial"), model = c("ns","bs","ls","factor"), - dr.extr = "weighted", + dr.extr = "Y", parm = c("ACP","APC","AdCP","AdPC","Ad-P-C","Ad-C-P","AC-P","AP-C"), npar = c( A=5, P=5, C=5 ), scale = 1, @@ -48,8 +48,9 @@ \item{dist}{Distribution (or more precisely: Likelihood) used for modeling. if a binomial model us used, \code{Y} is assumed to be the denominator; \code{"binomial"} gives a binomial model with logit - link.} - \item{model}{Type of model fitted: + link. The Age-effects returned are converted to the + probability scale, Period and Cohort effects are still odds-ratios.} + \item{model}{Type of model (covariate effects) fitted: \itemize{ \item \code{ns} fits a model with natural splines for each of the terms, with \code{npar} parameters for the terms. @@ -57,8 +58,8 @@ the terms, with \code{npar} parameters for the terms. \item \code{ls} fits a model with linear splines. \item \code{factor} fits a factor model with one parameter - per value of \code{A}, \code{P} and \code{C}. \code{npar} - is ignored in this case. + per value of \code{A}, \code{P} and \code{P-A}. \code{npar} + is ignored in this case. } } \item{dr.extr}{Character or numeric. @@ -67,21 +68,33 @@ definition of orthogonality of the period / cohort effects to the linear effects --- in terms of a diagonal matrix. - \code{"weighted"} (or "t") (default) uses the no. cases, \code{D}, - corresponding to the observed information about the log-rate - (usually termed "theta", hence the "\code{t}"). \code{"r"} or \code{"l"} uses - \code{Y*Y/D} corresponding to the observed information about the - rate (usually termed "lambda", hence the "\code{l}"). \code{"y"} - uses the person-years as the weight in the inner product. If given - "\code{n}" (Naive) (well, in fact any other character value) will + \code{"Y"} (default) uses the no. person-time, \code{Y}, + corresponding to the observed information about the square root of + the rate. + + \code{"R"} or \code{"L"} uses \code{Y*Y/D} corresponding to the + observed information about the rate (usually termed "lambda", hence + the "\code{L}"). + + \code{"D"} or \code{"T"} uses the no. events as the weight in the + inner product, corresponding to the information about the log-rate + (usually termed "theta", hence the "\code{T}"). + + If given \code{"n"} (naive) (well, in fact any other character value) will induce the use of the standard inner product putting equal weight on all units in the dataset. - If given as a numeric vector this is used as the diagonal of the - matrix inducing the inner product. + If \code{dr.extr} is a numeric vector this is used as the diagonal + of the matrix inducing the inner product. + If \code{dr.extr} is a numeric scalar, \code{D + dr.extr*Y} is used + as the diagonal of the matrix inducing the inner product. This + family of inner products are the only ones that meet the + split-observation invariance criterion. + The setting of this parameter has no effect on the fit of the model, - only on the parametrization. + it only influences the parametrization returned in the \code{Age}, + \code{Per} and \code{Coh} elements of the resulting list. } \item{parm}{Character. Indicates the parametrization of the effects. The first four refer to the ML-fit of the Age-Period-Cohort model, @@ -129,9 +142,10 @@ \item{npar}{The number of parameters/knots to use for each of the terms in the model. If it is vector of length 3, the numbers are taken as the no. of knots for Age, Period and Cohort, respectively. Unless it has - a names attribute with vales "A", "P" and "C" in which case these + a names attribute with values "A", "P" and "C" in which case these will be used. The knots chosen are the quantiles - \code{(1:nk-0.5)/nk} of the events (i.e. of \code{rep(A,D)}) + \code{(1:nk-0.5)/nk} of the events (i.e. of \code{rep(A,D)} and + similarly for \code{P} and \code{C}). \code{npar} may also be a named list of three numerical vectors with names "A", "P" and "C", in which case these taken as the knots for @@ -146,7 +160,7 @@ \value{ An object of class "apc" (recognized by \code{\link{apc.plot}} and \code{\link{apc.lines}}) --- a list with components: - \item{Type}{Text describing the model and parametrization returned} + \item{Type}{Text describing the model and parametrization returned.} \item{Model}{The model object(s) on which the parametrization is based.} \item{Age}{Matrix with 4 columns: \code{A.pt} with the ages (equals \code{unique(A)}) and three columns giving the estimated rates with @@ -165,18 +179,17 @@ latter is given.} \item{Ref}{Numerical vector of length 2 with reference period and cohort. If ref.p or ref.c was not supplied the corresponding element is NA.} - \item{AOV}{Analysis of deviance table comparing the five classical + \item{Anova}{Analysis of deviance table comparing the five classical models.} - \item{Type}{Character string explaining the model and the parametrization.} \item{Knots}{If \code{model} is one of \code{"ns"} or \code{"bs"}, a list with three components: \code{Age}, \code{Per}, \code{Coh}, each one a - vector of knots. The max and the min are the boundary knots.} + vector of knots. The max and the min of the vectors are the boundary knots.} } \details{ Each record in the input data frame represents a subset of a Lexis diagram. The subsets need not be of equal length on the age and period axes, in fact there are no restrictions on the shape of - these; they could be Lexis triangels for example. The requirement is + these; they could be Lexis triangles for example. The requirement is that \code{A} and \code{P} are coded with the mean age and calendar time of observation in the subset. This is essential since \code{A} and \code{P} are used as quantitative variables in the models. @@ -184,7 +197,9 @@ This approach is different from to the vast majority of the uses of APC-models in the literature where a factor model is used for age, period and cohort effects. The latter can be obtained by using - \code{model="factor"}. + \code{model="factor"}. Note however that the cohort factor is defined + from \code{A} and \code{P}, so that it is not possible in this + framework to replicate the Boyle-Robertson fallacy. } \references{ The considerations behind the parametrizations used in this function @@ -209,26 +224,30 @@ library( Epi ) data(lungDK) -# Taylor a dataframe that meets the requirements +# Taylor a dataframe that meets the requirements for variable names exd <- lungDK[,c("Ax","Px","D","Y")] names(exd)[1:2] <- c("A","P") -# Two different ways of parametrizing the APC-model, ML -ex.H <- apc.fit( exd, npar=7, model="ns", dr.extr="Holford", parm="ACP", scale=10^5 ) -ex.W <- apc.fit( exd, npar=7, model="ns", dr.extr="weighted", parm="ACP", scale=10^5 ) +# Three different ways of parametrizing the APC-model, ML +ex.1 <- apc.fit( exd, npar=7, model="ns", dr.extr="1", parm="ACP", scale=10^5 ) +ex.D <- apc.fit( exd, npar=7, model="ns", dr.extr="D", parm="ACP", scale=10^5 ) +ex.Y <- apc.fit( exd, npar=7, model="ns", dr.extr="Y", parm="ACP", scale=10^5 ) # Sequential fit, first AC, then P given AC. ex.S <- apc.fit( exd, npar=7, model="ns", parm="AC-P", scale=10^5 ) # Show the estimated drifts -ex.H[["Drift"]] -ex.W[["Drift"]] +ex.1[["Drift"]] +ex.D[["Drift"]] +ex.Y[["Drift"]] ex.S[["Drift"]] # Plot the effects -fp <- apc.plot( ex.H ) -apc.lines( ex.W, frame.par=fp, col="red" ) -apc.lines( ex.S, frame.par=fp, col="blue" ) +lt <- c("solid","22")[c(1,1,2)] +apc.plot( ex.1, lty=c(1,1,3) ) +apc.lines( ex.D, col="red", lty=c(1,1,3) ) +apc.lines( ex.Y, col="limegreen", lty=c(1,1,3) ) +apc.lines( ex.S, col="blue", lty=c(1,1,3) ) } \keyword{models} -\keyword{regression} \ No newline at end of file +\keyword{regression} diff -Nru r-cran-epi-2.32/man/apc.LCa.Rd r-cran-epi-2.37/man/apc.LCa.Rd --- r-cran-epi-2.32/man/apc.LCa.Rd 2018-04-02 11:22:16.000000000 +0000 +++ r-cran-epi-2.37/man/apc.LCa.Rd 2019-05-22 22:16:45.000000000 +0000 @@ -55,7 +55,7 @@ } \examples{ library( Epi ) - +clear() # Danish lung cancer incidence in 5x5x5 Lexis triangles data( lungDK ) lc <- subset( lungDK, Ax>40 )[,c("Ax","Px","D","Y")] @@ -63,10 +63,7 @@ head( lc ) al <- apc.LCa( lc, npar=c(9,6,6,6,10), keep.models=TRUE, maxit=500, eps=10e-3 ) -show.apc.LCa( al, dev=FALSE ) -show.apc.LCa( al, top="AP" ) -show.apc.LCa( al, top="APa" ) -show.apc.LCa( al, top="ACa" ) +show.apc.LCa( al, dev=TRUE ) # Danish mortality data \dontrun{ diff -Nru r-cran-epi-2.32/man/bootLexis.Rd r-cran-epi-2.37/man/bootLexis.Rd --- r-cran-epi-2.32/man/bootLexis.Rd 2018-03-08 14:09:04.000000000 +0000 +++ r-cran-epi-2.37/man/bootLexis.Rd 2019-04-05 12:45:04.000000000 +0000 @@ -3,10 +3,10 @@ \alias{nid.Lexis} \alias{bootLexis} \title{ -Create a bootstrap sample of persons (\code{lex.id}) from a Lexis object +Create a bootstrap sample of persons (as identified by \code{lex.id}) from a Lexis object } \description{ -\code{lex.id} is the identifier of persons in a \code{\link{Lexis}} +\code{lex.id} is the person identifier in a \code{\link{Lexis}} object. This is used to sample persons from a Lexis object. If a person is sampled, all records from this persons is transported to the bootstrap sample. @@ -14,7 +14,7 @@ \usage{ nid( Lx, \dots ) \method{nid}{Lexis}( Lx, by=NULL, \dots ) -bootLexis( Lx, size = NULL, by = NULL ) +bootLexis( Lx, size = NULL, by = NULL, replace=TRUE ) } \arguments{ \item{Lx}{A \code{Lexis} object.} @@ -22,9 +22,9 @@ \item{size}{Numeric. How many persons should be sampled from the \code{Lexis} object. Defaults to the number of persons in the \code{Lx}, or, if \code{by} is given, to the number of persons in - each level of \code{by}. If by is given \code{size} can have length - \code{nid(Lx)}, to indicate how many are sampled from each level of - \code{by}.} + each level of \code{by}. If \code{by} is given, \code{size} can have length + \code{length(unique(by))}, to indicate how many are sampled from + each level of \code{by}.} \item{by}{Character. Name of a variable (converted to factor) in the \code{Lexis} object. @@ -32,33 +32,34 @@ Calculation of the number of persons (\code{lex.id}) is done within each level of \code{by}, and a vector returned. - } -} -\value{ - A Lexis object of the same structure as the input, with \emph{persons} - bootstrapped. The variable \code{lex.id} has values - 1,2,...,\code{nid(Lx)}. + } + \item{replace}{Should persons be sampled by replacement? Default is + \code{TRUE}. Setting \code{replace} to \code{FALSE} enables + selecting a random subset of persons from the Lexis object.} +} +\value{ + \code{bootLexis} returns a Lexis object of the same structure as the + input, with \emph{persons} bootstrapped. The variable \code{lex.id} + in the resulting \code{Lexis} object has values 1,2,... The original + values of \code{lex.id} from \code{Lx} are stored in the variable + \code{old.id}. \code{nid} counts the number of persons in a Lexis object, possibly by \code{by}. If \code{by} is given, a named vector is returned. } \author{Bendix Carstensen, \url{BendixCarstensen.com}.} -\seealso{\code{\link{Relevel.Lexis}}} +\seealso{\code{\link{Relevel.Lexis}},\code{\link{subset.Lexis}}} \examples{ # A small bogus cohort xcoh <- data.frame( id = c("A", "B", "C"), - birth = c("14/07/1952", "01/04/1954", "10/06/1987"), - entry = c("04/08/1965", "08/09/1972", "23/12/1991"), - exit = c("27/06/1997", "23/05/1995", "24/07/1998"), - fail = c(1, 0, 1), - sex = c("M","F","M") ) -xcoh <- data.frame( id = c("A", "B", "C"), - birth = as.Date(c("14/07/1952","01/04/1954","10/06/1987"),format="\%d/\%m/\%Y"), - entry = as.Date(c("04/08/1965","08/09/1972","23/12/1991"),format="\%d/\%m/\%Y"), - exit = as.Date(c("27/06/1997","23/05/1995","24/07/1998"),format="\%d/\%m/\%Y"), + birth = c("1952-07-14", "1954-04-01", "1987-06-10"), + entry = c("1965-08-04", "1972-09-08", "1991-12-23"), + exit = c("1997-06-27", "1995-05-23", "1998-07-24"), fail = c(1, 0, 1), sex = c("M","F","M") ) -xcoh <- cal.yr( xcoh ) +# Convert to calendar years +for( i in 2:4 ) xcoh[,i] <- cal.yr(xcoh[,i]) +xcoh Lcoh <- Lexis( entry = list( per=entry ), exit = list( per=exit, age=exit-birth ), exit.status = fail, @@ -70,10 +71,14 @@ Lx nid( Lx ) nid( Lx, by="sex" ) -bootLexis( Lx ) +Lb <- bootLexis( Lx ) +head( Lb ) nid( bootLexis( Lx, size=7 ) ) Li <- bootLexis( Lx, by="id" ) # superfluous +summary( Lx ) +summary( Li ) L2 <- bootLexis( Lx, by="sex", size=c(2,5) ) nid( L2, by="sex" ) +summary( L2 ) } \keyword{manip} diff -Nru r-cran-epi-2.32/man/boxes.MS.Rd r-cran-epi-2.37/man/boxes.MS.Rd --- r-cran-epi-2.32/man/boxes.MS.Rd 2016-11-10 13:29:18.000000000 +0000 +++ r-cran-epi-2.37/man/boxes.MS.Rd 2019-04-01 09:14:35.000000000 +0000 @@ -32,9 +32,9 @@ boxarr( b1, b2, offset=FALSE, pos=0.45, ... ) \method{boxes}{Lexis}( obj, boxpos = FALSE, - wmult = 1.15, - hmult = 1.15, - cex = 1.45, + wmult = 1.20, + hmult = 1.20 + 0.85*(!show.Y), + cex = 1.40, show = inherits( obj, "Lexis" ), show.Y = show, scale.Y = 1, @@ -44,7 +44,7 @@ show.D = show, scale.D = FALSE, digits.D = as.numeric(as.logical(scale.D)), - show.R = is.numeric(scale.R), + show.R = show & is.numeric(scale.R), scale.R = 1, digits.R = as.numeric(as.logical(scale.R)), DR.sep = if( show.D ) c("\n(",")") else c("",""), diff -Nru r-cran-epi-2.32/man/ci.cum.Rd r-cran-epi-2.37/man/ci.cum.Rd --- r-cran-epi-2.32/man/ci.cum.Rd 2017-04-16 07:17:55.000000000 +0000 +++ r-cran-epi-2.37/man/ci.cum.Rd 2019-05-23 07:34:11.000000000 +0000 @@ -1,10 +1,11 @@ \name{ci.cum} \alias{ci.cum} +\alias{ci.surv} \title{ Compute cumulative sum of estimates. } \description{ Computes the cumulative sum of parameter functions and the -standard error of it. Optionally the exponential is applied to the -parameter functions before it is cumulated. +standard error of it. Used for computing the cumulative rate, or the +survival function based on a \code{glm} with parametric baseline. } \usage{ ci.cum( obj, @@ -15,17 +16,26 @@ Exp = TRUE, ci.Exp = FALSE, sample = FALSE ) +ci.surv( obj, + ctr.mat = NULL, + subset = NULL, + intl = 1, + alpha = 0.05, + Exp = TRUE, + sample = FALSE ) } \arguments{ \item{obj}{A model object (of class - \code{lm}, \code{glm}, - \code{coxph}, \code{survreg}, - \code{lme},\code{mer},\code{nls},\code{gnlm}, - \code{MIresult} - or \code{polr}). } - \item{ctr.mat}{ Contrast matrix defining the parameter functions from - the parameters of the model. } - \item{subset}{ Subset of the parameters of the model to which + \code{lm}, \code{glm}. } + \item{ctr.mat}{Matrix or data frame. + + If \code{ctr.mat} is a matrix, it should be a contrast matrix to be + multiplied to the parameter vector, i.e. the desired linear + function of the parameters. + + If it is a data frame it should have columns corresponding to a + prediction data frame for \code{obj}, see details for \code{\link{ci.lin}}.} + \item{subset}{ Subset of the parameters of the model to which a matrix \code{ctr.mat} should be applied. } \item{intl}{ Interval length for the cumulation. Either a constant or a numerical vector of length \code{nrow(ctr.mat)}. } @@ -41,30 +51,40 @@ \details{ The purpose of this function is to the compute cumulative rate (integrated intensity) at a set of points based on a model for the - rates. - \code{ctr.mat} is a matrix which, when premultiplied to the parameters - of the model reurn the (log)rates at a set of increasing time points. If log-rates are - returned from the model, the they should be exponentiated before - cumulated, and the variances computed accordingly. Since the primary - use is for log-linear Poisson models the \code{Exp} parameter defaults - to TRUE. - + rates. \code{ctr.mat} is a matrix which, when premultiplied to the + parameters of the model returns the (log)rates at a set of increasing + time points. If log-rates are returned from the model, the they should + be exponentiated before cumulated, and the variances computed + accordingly. Since the primary use is for log-linear Poisson models + the \code{Exp} parameter defaults to TRUE. + + Each row in the object supplied via \code{ctr.mat} is assumed to + represent a midpoint in an interval. \code{ci.cum} will then return + the cumulative rates at the \emph{end} of these + intervals. \code{ci.surv} will return the survival probability at the + \emph{start} of each of these intervals, assuming the the first + interval starts at 0 - the first row of the result is \code{c(1,1,1)}. + The \code{ci.Exp} argument ensures that the confidence intervals for - the cumulaive rates are alwys positive, so that exp(-cum.rate) is + the cumulative rates are always positive, so that exp(-cum.rate) is always in [0,1]. } \value{ - A matrix with 4 columns: Estimate, lower and upper c.i. and standard - error. If \code{sample} is TRUE, a sampled vector is reurned, if + A matrix with 3 columns: Estimate, lower and upper c.i. and standard + error, unless \code{se=TRUE}, in which case the standard error is + returned too. If \code{sample} is TRUE, a sampled vector is returned, if \code{sample} is numeric a matrix with \code{sample} columns is returned, each column a cumulative rate based on a random sample from the distribution of the parameter estimates. + + \code{ci.surv} returns a 3 column matrix with estimate, lower and + upper confidence interval. } \author{ Bendix Carstensen, \url{http://BendixCarstensen.com} } -\seealso{ See also \code{\link{ci.lin}} } +\seealso{ See also \code{\link{ci.lin}}, \code{\link{ci.pred}} } \examples{ # Packages required for this example library( splines ) @@ -76,41 +96,42 @@ plot( survfit( Surv( time, status==2 ) ~ 1, data=lung ) ) # Declare data as Lexis -lungL <- Lexis( exit=list("tfd"=time), - exit.status=(status==2)*1, data=lung ) +lungL <- Lexis( exit=list(tfd=time), + exit.status=(status==2)*1, + data=lung ) summary( lungL ) -# Cut the follow-up every 10 days +# Split the follow-up every 10 days sL <- splitLexis( lungL, "tfd", breaks=seq(0,1100,10) ) -str( sL ) summary( sL ) -# Fit a Poisson model with a natural spline for the effect of time. -# Extract the variables needed -D <- status(sL, "exit") -Y <- dur(sL) -tB <- timeBand( sL, "tfd", "left" ) -MM <- ns( tB, knots=c(50,100,200,400,700), intercept=TRUE ) -mp <- glm( D ~ MM - 1 + offset(log(Y)), - family=poisson, eps=10^-8, maxit=25 ) - -# mp is now a model for the rates along the time scale tB - -# Contrast matrix to extract effects, i.e. matrix to multiply with the -# coefficients to produce the log-rates: unique rows of MM, in time order. -T.pt <- sort( unique( tB ) ) -T.wh <- match( T.pt, tB ) - -# ctr.mat=MM[T.wh,] selects the rates as evaluated at times T.pt: -Lambda <- ci.cum( mp, ctr.mat=MM[T.wh,], intl=diff(c(0,T.pt)) ) +# Fit a Poisson model with a natural spline for the effect of time (left +# end points of intervals are used as covariate) +mp <- glm( cbind(lex.Xst==1,lex.dur) ~ Ns(tfd,knots=c(0,50,100,200,400,700)), + family=poisreg, data=sL ) + +# mp is now a model for the rates along the time scale tfd +# prediction data frame for select time points on this time scale +nd <- data.frame( tfd = seq(5,995,10) ) # *midpoints* of intervals +Lambda <- ci.cum ( mp, nd, intl=10 ) +surv <- ci.surv( mp, nd, intl=10 ) # Put the estimated survival function on top of the KM-estimator -matlines( c(0,T.pt[-1]), exp(-Lambda[,1:3]), lwd=c(3,1,1), lty=1, col="Red" ) +# recall the ci.surv return the survival at *start* of intervals +matshade( nd$tfd-5, surv, col="Red", alpha=0.15 ) # Extract and plot the fitted intensity function -lambda <- ci.lin( mp, ctr.mat=MM[T.wh,], Exp=TRUE ) -matplot( T.pt, lambda[,5:7]*10^3, type="l", lwd=c(3,1,1), col="black", lty=1, - log="y", ylim=c(0.2,20) ) +lambda <- ci.pred( mp, nd )*365.25 # mortality +matshade( nd$tfd, lambda, log="y", ylim=c(0.2,5), plot=TRUE, + xlab="Time since diagnosis", ylab="Mortality per year" ) + +# same thing works with gam from mgcv +library( mgcv ) +mg <- gam( cbind(lex.Xst==1,lex.dur) ~ s(tfd), family=poisreg, data=sL ) +matshade( nd$tfd-5, ci.surv( mg, nd, intl=10 ), plot=TRUE, + xlab="Days since diagnosis", ylab="P(survival)" ) +matshade( nd$tfd , ci.pred( mg, nd )*365.25 , plot=TRUE, log="y", + xlab="Days since diagnosis", ylab="Mortality per 1 py" ) } \keyword{models} \keyword{regression} diff -Nru r-cran-epi-2.32/man/ci.lin.Rd r-cran-epi-2.37/man/ci.lin.Rd --- r-cran-epi-2.32/man/ci.lin.Rd 2018-08-20 21:05:56.000000000 +0000 +++ r-cran-epi-2.37/man/ci.lin.Rd 2019-02-11 11:01:48.000000000 +0000 @@ -7,7 +7,7 @@ \alias{Wald} \title{ Compute linear functions of parameters with standard errors and - confidence limits + confidence limits, optionally transforming to a different scale. } \description{ For a given model object the function computes a linear function of @@ -61,15 +61,16 @@ } \item{ctr.mat}{Matrix, data frame or list (of two data frames). - If it is a matrix, it should be a contrast matrix to be multiplied - to the parameter vector, i.e. the desired linear function of the - parameters. + If \code{ctr.mat} is a matrix, it should be a contrast matrix to be + multiplied to the parameter vector, i.e. the desired linear function + of the parameters. - If it is a data.frame it should have columns corresponding to a + If it is a data frame it should have columns corresponding to a prediction frame, see details. If it is a list, it must contain two data frames that are (possibly - partial) prediction frames for \code{obj}, see \code{xvars} and details.} + partial) prediction frames for \code{obj}, see argument \code{xvars} + and details.} \item{xvars}{Character vector. If variables in the model are omitted from data frames supplied in a list to \code{ctr.mat}, they should be listed here. Omitted factors need not be mentioned here.} @@ -83,7 +84,8 @@ select a subset of parameters and only the \code{int}ersection of these is returned.} \item{diffs}{If TRUE, all differences between parameters - in the subset are computed. \code{ctr.mat} is ignored. If \code{obj} + in the subset are computed, and the \code{subset} argumnt is + required. The argument \code{ctr.mat} is ignored. If \code{obj} inherits from \code{lm}, and \code{subset} is given as a string \code{subset} is used to search among the factors in the model and differences of all factor levels for the first match are shown. @@ -91,14 +93,14 @@ pairwise differences between parameters matching are returned.} \item{fnam}{Should the common part of the parameter names be included with the annotation of contrasts? Ignored if \code{diffs==T}. If a - sting is supplied this will be prefixed to the labels.} + string is supplied this will be prefixed to the labels.} \item{vcov}{Should the covariance matrix of the set of parameters be returned? If this is set, \code{Exp} is ignored. See details.} \item{alpha}{Significance level for the confidence intervals.} \item{df}{Integer. Number of degrees of freedom in the t-distribution used to compute the quantiles used to construct the confidence intervals.} \item{Exp}{For \code{ci.lin}, if \code{TRUE} columns 5:6 are replaced - with exp( columns 1,5,6 ). For \code{ci.exp} of \code{FALSE}, the + with exp( columns 1,5,6 ). For \code{ci.exp}, if \code{FALSE}, the untransformed parameters are returned. For \code{ci.pred} it indicates whether the predictions should be exponentiated - the default (\code{Exp=NULL}) is to make a prediction with a Wald CI on @@ -144,13 +146,16 @@ exponentiated, use \code{ci.exp(...,Exp=FALSE)}. If \code{ctr.mat} is a data frame, the model matrix corresponding to - this is constructed and supplied, so the default behaviour will be to - produce the same as \code{ci.pred}, apparently superfluous. The purpose - of this is to allow the use of the arguments \code{vcov} that produces - the variance-covariance matrix of the predictions, and \code{sample} - that produces a sample of predictions using sampling from the - multivariate normal with mean equal to parameters and variance equal - to the hessian. + this is constructed and supplied. This is only supported for objects + of class \code{lm}, \code{glm}, \code{gam} and \code{coxph}. + + So the default behaviour will be to produce the same as + \code{ci.pred}, apparently superfluous. The purpose of this is to + allow the use of the arguments \code{vcov} that produces the + variance-covariance matrix of the predictions, and \code{sample} that + produces a sample of predictions using sampling from the multivariate + normal with mean equal to parameters and variance equal to the + hessian. If \code{ctr.mat} is a list of two data frames, the difference of the predictions from using the first versus the last as newdata arguments @@ -173,12 +178,12 @@ data frame or a list of two data frames. You can leave out variables (columns) from the two data frames that - would be constant and identical, basically variables not relevant for - the calculation of the contrast. In many cases \code{ci.lin} (really + would be identical, basically variables not relevant for the + calculation of the contrast. In many cases \code{ci.lin} (really \code{Epi:::ci.dfr}) can figure out the names of the omitted columns, but occasionally you will have to supply the names of the omitted variables in the \code{xvars} argument. Factors omitted need not be - listed in \code{xvars}, though no harm is done doing so. + listed in \code{xvars}, although no harm is done doing so. \code{Wald} computes a Wald test for a subset of (possibly linear combinations of) parameters being equal to the vector of null @@ -247,11 +252,11 @@ ci.lin( mi, list(nda,ndb) ) # Same result if f column is omitted because "f" columns are identical ci.lin( mi, list(nda[,-3],ndb[,-3]) ) -# However, Crashes if knots in spline is supplied, and non-factor omitted +# However, crashes if knots in spline is supplied, and non-factor omitted xk <- -1:1 xi <- c(-0.5,0.5) ww <- rnorm(200) -mi <- update( mm, . ~ . -x +ww + Ns(x,knots=xk) + g:Ns(x,knots=xi) ) +mi <- update( mm, . ~ . -x + ww + Ns(x,knots=xk) + g:Ns(x,knots=xi) ) # Will crash try( cbind( nda$x, ci.lin( mi, list(nda,ndb) ) ) ) # Must specify num vars (not factors) omitted from nda, ndb diff -Nru r-cran-epi-2.32/man/cutLexis.Rd r-cran-epi-2.37/man/cutLexis.Rd --- r-cran-epi-2.32/man/cutLexis.Rd 2017-10-23 10:45:22.000000000 +0000 +++ r-cran-epi-2.37/man/cutLexis.Rd 2019-02-12 13:08:45.000000000 +0000 @@ -52,6 +52,10 @@ and one after the cutpoint. An extra time-scale is added; the time since the event at \code{cut}. This is \code{NA} for any follow-up prior to the intermediate event. + + The function \code{tsNA20} will replace all missing values in + timescales with 0. This is commonly meeded when timescales defined as + time since entry into an inermediate state are used in modeling. } \note{ The \code{cutLexis} function superficially resembles the @@ -125,6 +129,7 @@ \code{\link{splitLexis}}, \code{\link{Lexis}}, \code{\link{summary.Lexis}}, + \code{\link{timeSince}}, \code{\link{boxes.Lexis}} } \examples{ diff -Nru r-cran-epi-2.32/man/detrend.Rd r-cran-epi-2.37/man/detrend.Rd --- r-cran-epi-2.32/man/detrend.Rd 2018-05-13 18:41:13.000000000 +0000 +++ r-cran-epi-2.37/man/detrend.Rd 2018-11-23 14:57:55.000000000 +0000 @@ -1,39 +1,42 @@ \name{detrend} \alias{detrend} \alias{decurve} -\title{ Projection of a model matrix on to the orthogonal +\title{ Projection of a model matrix on the orthogonal complement of a trend or curvature.} \description{ The columns of a model matrix \code{M} is projected on the orthogonal complement to the matrix \code{(1,t)}, resp. \code{(1,t,t^2)}. - Orthogonality is defined w.r.t. an inner product defined by the - matrix \code{diag(weight)}. + Orthogonality is w.r.t. an inner product defined by the positive + definite matrix matrix \code{diag(weight)}. Non-diagonal matrices + defining the inner product is not supported. } \usage{ detrend( M, t, weight = rep(1, nrow(M)) ) decurve( M, t, weight = rep(1, nrow(M)) ) } \arguments{ - \item{M}{A model matrix. } + \item{M}{A model matrix.} \item{t}{The trend defining a subspace. A numerical vector of length - \code{nrow(M)} } + \code{nrow(M)}.} \item{weight}{ Weights defining the inner product of vectors \code{x} and \code{y} as \code{sum(x*w*y)}. A numerical vector of length \code{nrow(M)}, defaults to a vector of \code{1}s. Must be all non-negative.} } \details{ - The functions are intended to be used in parametrization of - age-period-cohort models. + The functions are intended to be used in construction of particular + parametrizations of age-period-cohort models. } \value{ - A full-rank matrix with columns orthogonal to \code{(1,t)}, for - \code{decurv}, \code{(1,t,t^2)}. + \code{detrend} returns full-rank matrix with columns orthogonal to + \code{(1,t)}; + \code{decurve} returns full-rank matrix with columns orthogonal to + \code{(1,t,t^2)}. } \author{ - Bendix Carstensen, Steno Diabetes Center, + Bendix Carstensen, Steno Diabetes Center Copenhagen, \url{http://BendixCarstensen.com}, with essential help from Peter Dalgaard. } \seealso{ \code{\link{projection.ip}} } diff -Nru r-cran-epi-2.32/man/diet.Rd r-cran-epi-2.37/man/diet.Rd --- r-cran-epi-2.32/man/diet.Rd 2016-11-10 13:29:18.000000000 +0000 +++ r-cran-epi-2.37/man/diet.Rd 2019-03-28 13:48:07.000000000 +0000 @@ -23,20 +23,20 @@ \code{\link{Date}} variable. \cr \code{dob}: \tab date of birth, a \code{\link{Date}} variable. \cr -\code{y}: \tab - number of years at risk, a numeric vector. \cr -\code{fail}: \tab status on exit, a numeric vector (codes 1, 3, 11, and +\code{y}: \tab number of years at risk, a numeric vector. \cr +\code{fail}: \tab status on exit, a numeric vector (codes 1, 3 and 13 represent CHD events) \cr \code{job}: \tab occupation, a factor with levels \code{Driver} \code{Conductor} \code{Bank worker} \cr \code{month}: \tab month of dietary survey, a numeric vector \cr -\code{energy}: \tab total energy intake (KCal per day/100), a numeric +\code{energy}: \tab total energy intake (kCal per day/100), a numeric vector \cr \code{height}: \tab (cm), a numeric vector \cr \code{weight}: \tab (kg), a numeric vector \cr -\code{fat}: \tab fat intake (g/day), a numeric vector \cr -\code{fibre}: \tab dietary fibre intake (g/day), a numeric vector \cr +\code{fat}: \tab fat intake (10 g/day), a numeric vector \cr +\code{fibre}: \tab dietary fibre intake (10 g/day), a numeric vector \cr \code{energy.grp}: \tab high daily energy intake, a factor with levels \code{<=2750 KCal} \code{>2750 KCal} \cr diff -Nru r-cran-epi-2.32/man/DMepi.Rd r-cran-epi-2.37/man/DMepi.Rd --- r-cran-epi-2.32/man/DMepi.Rd 2018-03-05 00:07:37.000000000 +0000 +++ r-cran-epi-2.37/man/DMepi.Rd 2018-10-23 12:38:51.000000000 +0000 @@ -1,18 +1,18 @@ \name{DMepi} \alias{DMepi} \docType{data} -\title{Epidmiological rates for diabetes in Denmark 1996--2015} -\description{Register based counts and person-uears for incidece of +\title{Epidemiological rates for diabetes in Denmark 1996--2015} +\description{Register based counts and person-years for incidence of diabetes and mortality with and without diabetes. } \usage{data("DMepi")} \format{ - A data frame with 4000 observations on the following 8 variables. + A data frame with 4200 observations on the following 8 variables. \describe{ - \item{\code{sex}}{a factor with levels \code{M} \code{F}} - \item{\code{A}}{Age glass 0 -- 99} - \item{\code{P}}{Calendar year, 1996-2015} - \item{\code{X}}{Number of new diagnoses of diabetes} + \item{\code{sex}}{a factor with levels \code{M}, \code{F}} + \item{\code{A}}{Age class, 0--99} + \item{\code{P}}{Calendar year, 1996--2016} + \item{\code{X}}{Number of new diagnoses of diabetes among persons without diabetes} \item{\code{D.nD}}{Number of deaths among persons without diabetes} \item{\code{Y.nD}}{Person-years among persons without diabetes} \item{\code{D.DM}}{Number of deaths among persons with diabetes} @@ -20,7 +20,8 @@ } } \details{Based on registers of the Danish population. Only included for - illustrative purposes. Cannot be used as scientifically validaed data. + illustrative purposes. Cannot be used as scientifically validated + data, since small numbers are randomly permuted between units. } \examples{ data(DMepi) diff -Nru r-cran-epi-2.32/man/effx.Rd r-cran-epi-2.37/man/effx.Rd --- r-cran-epi-2.32/man/effx.Rd 2016-11-10 13:29:18.000000000 +0000 +++ r-cran-epi-2.37/man/effx.Rd 2019-02-24 20:29:33.000000000 +0000 @@ -19,14 +19,13 @@ digits = 3, data = NULL ) } -%- maybe also 'usage' for other objects documented here. \arguments{ \item{response}{The \code{response} variable - must be numeric or logical. If logical, \code{TRUE} is considered the outcome.} \item{type}{The type of response\code{type} - must be one of "metric", "binary", "failure", or "count"} \item{fup}{The \code{fup} variable contains the follow-up time for a - failure response}. This must be numeric. + failure response. This must be numeric.} \item{exposure}{The \code{exposure} variable can be numeric or a factor} \item{strata}{The \code{strata} stratifying variable - must be a factor} \item{control}{The \code{control} variable(s) (confounders) - these are @@ -80,6 +79,8 @@ effx(bweight,exposure=hyp,strata=sex,data=births) # effect of hypertension on birth weight controlled for sex effx(bweight,exposure=hyp,control=sex,data=births) + +print( options('na.action') ) # effect of gestation time on birth weight effx(bweight,exposure=gestwks,data=births) # effect of gestation time on birth weight stratified by sex diff -Nru r-cran-epi-2.32/man/Epi.Rd r-cran-epi-2.37/man/Epi.Rd --- r-cran-epi-2.32/man/Epi.Rd 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-epi-2.37/man/Epi.Rd 2019-02-17 15:21:48.000000000 +0000 @@ -0,0 +1,18 @@ +\docType{package} +\name{Epi} +\alias{Epi} +\alias{Epi-package} +\title{Epi: Functions for manipulation and statistical analysis of epidemiological data} +\description{ +\pkg{Epi} has grown out of the course 'Statistical Practise in +Epidemiology with R' \url{http://bendixcarstensen.com/SPE/}. + +The major contributions from this course have been the +\code{\link{stat.table}} function for advanced tabulation and summary, +and the functions for representation and the \code{\link{Lexis}} +function(s) for manipulation of multistate data with multiple time +scales. +} +\details{Click on the \code{Index} link below the line to access + vignettes (tutorial documents) and an alphabetic list of the functions +in \code{Epi}.} \ No newline at end of file diff -Nru r-cran-epi-2.32/man/in.span.Rd r-cran-epi-2.37/man/in.span.Rd --- r-cran-epi-2.32/man/in.span.Rd 2018-07-13 09:58:39.000000000 +0000 +++ r-cran-epi-2.37/man/in.span.Rd 2019-02-16 10:29:21.000000000 +0000 @@ -25,9 +25,9 @@ idSpan( A, B, tol=1e-08 ) thinCol( A, tol = 1e-06, col.num = FALSE ) } -\details{\code{\link{thinCol}} is mainly a workhorse in \code{in.span} - and \code{\link{detrend}}, but made available because of its general - usefulness. +\details{\code{\link{thinCol}} is mainly a workhorse in + \code{\link{detrend}}, but made available because of its general + usefulness. \code{in.span} and \code{inSpan} are just different names for the same to accommodate different naming schools. @@ -36,17 +36,18 @@ parametrizations of a model are identical in the sense of spanning the same linear space. Equivalent to checking whether fitted values under different parametrizations are identical, but has the further use of - checking if subspaces of models are equivalent. Relies on - \code{\link[MASS]{ginv}} to compute generalized inverse. The function - simply checks if $A A^{-} x$ is equal to $x$. + checking if subspaces of models are equivalent. The function + simply checks if the regression of (columns of) \code{x} on the + columns of \code{A} produces residuals that are all 0. - \code{id.span} checks whether two matrices have the same column span. + \code{id.span} (equivalent to \code{idSpan}) checks whether two + matrices have the same column span. } \arguments{ \item{A}{A matrix.} \item{B}{A matrix.} -\item{x}{A vector or matrix. Must have \code{length(x)} (or - \code{nrow(x)}) equal to \code{nrow(A)}. } +\item{x}{A vector or matrix. \code{length(x)} (or \code{nrow(x)}) must + be equal to \code{nrow(A)}.} \item{coef}{Logical. Should the coefficient matrix (\code{k}) be returned, so that \code{Ak=x}?} \item{tol}{Tolerance for identity of matrices in check @@ -64,8 +65,8 @@ \code{thinCol} returns a matrix of full rank, formed from \code{A} by deleting columns linearly dependent on other. If \code{col.num=TRUE} - positions of columns forming a full rank basis for the column space of - \code{A} is returned. + (one possible set of) positions of columns forming a full rank basis + for the column space of \code{A} is returned. } \author{Bendix Carstensen, \url{BendixCarstensen.com} with essential help from Lars Jorge Diaz and Peter Dalgaard. diff -Nru r-cran-epi-2.32/man/Lexis.Rd r-cran-epi-2.37/man/Lexis.Rd --- r-cran-epi-2.32/man/Lexis.Rd 2018-08-18 05:14:21.000000000 +0000 +++ r-cran-epi-2.37/man/Lexis.Rd 2019-05-05 12:06:11.000000000 +0000 @@ -7,7 +7,7 @@ } \usage{ Lexis( entry, exit, duration, entry.status = 0, exit.status = 0, id, data, - merge=TRUE, states, tol=.Machine$double.eps^0.5, keep.dropped=FALSE ) + merge=TRUE, states, notes=TRUE, tol=.Machine$double.eps^0.5, keep.dropped=FALSE ) } \arguments{ \item{entry}{a named list of entry times. Each element of the list is @@ -33,6 +33,7 @@ \item{states}{A vector of labels for the states. If given, the state variables \code{lex.Cst} and \code{lex.Xst} are returned as factors with identical levels attributes equal to \code{states}.} + \item{notes}{Logical. Should notes on entry states and time be given.} \item{tol}{Numerical tolerance for follow-up time. Rows with duration less than this value are automatically dropped.} \item{keep.dropped}{Logical. Should dropped rows from \code{data} be diff -Nru r-cran-epi-2.32/man/lines.apc.Rd r-cran-epi-2.37/man/lines.apc.Rd --- r-cran-epi-2.32/man/lines.apc.Rd 2018-05-13 19:15:11.000000000 +0000 +++ r-cran-epi-2.37/man/lines.apc.Rd 2019-03-17 09:39:26.000000000 +0000 @@ -24,6 +24,7 @@ col = "black", type = "l", knots = FALSE, + shade = FALSE, ... ) apc.lines( x, P, C, scale = c("log","ln","rates","inc","RR"), @@ -38,6 +39,7 @@ col = "black", type = "l", knots = FALSE, + shade = FALSE, ... ) } \arguments{ @@ -77,6 +79,8 @@ \item{col}{Colours for the three effects.} \item{type}{What type of lines / points should be used.} \item{knots}{Should knots from the model be shown?} + \item{shade}{Should confidence intervals be plotted as shaded areas? + If true, the setting of \code{ci} is ignored.} \item{...}{Further parameters to be transmitted to \code{points} \code{lines}, \code{matpoints} or \code{matlines} used for plotting the three sets of curves.} diff -Nru r-cran-epi-2.32/man/mcutLexis.Rd r-cran-epi-2.37/man/mcutLexis.Rd --- r-cran-epi-2.32/man/mcutLexis.Rd 2017-10-23 13:04:45.000000000 +0000 +++ r-cran-epi-2.37/man/mcutLexis.Rd 2018-10-08 09:12:24.000000000 +0000 @@ -43,9 +43,9 @@ not accept that two events occur at the same time for a person (ties). If \code{TRUE} a random quantity in the range \code{c(-1,1)/100} will be added to all event times in all records - with at least one tie. If numeric a random quantity in the range - \code{c(-1,1)*ties.resolve} will be added to all event times in all - records with at least one tie.} + with at least one tie. If \code{ties.resolve} is numeric a random + quantity in the range \code{c(-1,1)*ties.resolve} will be added to + all event times in all records with at least one tie.} } \value{A \code{\link{Lexis}} object with extra states created by occurrence of a number of intermediate events. diff -Nru r-cran-epi-2.32/man/mod.Lexis.Rd r-cran-epi-2.37/man/mod.Lexis.Rd --- r-cran-epi-2.32/man/mod.Lexis.Rd 2018-08-23 04:15:50.000000000 +0000 +++ r-cran-epi-2.37/man/mod.Lexis.Rd 2019-05-23 07:54:52.000000000 +0000 @@ -9,61 +9,75 @@ Lexis objects where the events and risk time have predefined representations. This allows a simpler syntax than the traditional explicit modeling using \code{\link{glm}}, \code{\link{gam}} -and \code{\link{coxph}}. But it is only a set of wrappers fro +and \code{\link{coxph}}. Requires that \code{lex.Cst} and \code{lex.Xst} +are defined as factors. + +But it is just a set of wrappers fro \code{glm}, \code{gam} and \code{coxph}. } \usage{ glm.Lexis( Lx, # Lexis object - resp, # 'to' states + from = preceding(Lx,to), # 'from' states + to = absorbing(Lx) , # 'to' states formula, # ~ model - xpos, # 'from' states + paired = FALSE, # only the pairwise link = "log", # link function scale = 1, # scaling of PY verbose = TRUE, # report what is done? - ... ) + \dots ) # further arguments to glm gam.Lexis( Lx, # Lexis object - resp, # 'to' states + from = preceding(Lx,to), # 'from' states + to = absorbing(Lx) , # 'to' states formula, # ~ model - xpos, # 'from' states + paired = FALSE, # only the pairwise link = "log", # link function scale = 1, # scaling of PY verbose = TRUE, # report what is done? - ... ) -coxph.Lexis( Lx, # Lexis object - resp, # 'to' states - formula, # timescale ~ model - xpos, # 'from' states + \dots ) # further arguments to glm +coxph.Lexis( Lx, # Lexis object + from = preceding(Lx,to), # 'from' states + to = absorbing(Lx) , # 'to' states + formula, # timescale ~ model + paired = FALSE, # only the pairwise verbose = TRUE, # report what is done? - ... ) + \dots ) # further arguments to glm } \arguments{ \item{Lx}{A \code{\link{Lexis}} object representing cohort follow-up. } - \item{resp}{Character vector of states \bold{to} which a transition is + \item{from}{Character vector of states \bold{from} which transitions + are considered. May also be an integer vector in which case the + reference will be to the position of levels of + \code{lex.Cst}. Defaults to the collection of transient states + immediately preceding the absorbing states. +} + \item{to}{Character vector of states \bold{to} which a transition is considered an event. May also be an integer vector in which case the - reference will be to the position of levels of \code{lex.Xst}. + reference will be to the position of levels of \code{lex.Xst}. + Defaults to the set of absorbing states. } \item{formula}{Model formula describing the model for the - intensity. For \code{glm} and \code{gam}, the formula should be + intensity(-ies). For \code{glm} and \code{gam}, the formula should be one-sided; for \code{coxph} the formula should be two-sided and have the name of the time-scale used for baseline as the l.h.s. } -\item{xpos}{Character vector of states \bold{from} which transitions - are considered. May also be an integer vector in which case the - reference will be to the position of levels of \code{lex.Cst}. If - missing (that is not supplied), the entire \code{Lexis} object is - used in the modeling. -} -\item{link}{Link function used, allowed values are \code{log} (the - default), \code{identity} and \code{sqrt}, see the family - \code{\link{poisreg}}. + \item{paired}{Logical. Should the states mentioned in \code{to}, + rep. \code{from} be taken as pairs, indicating the only transitions + modeled. If \code{FALSE} all transitions from any of the states in + \code{from} to any states in \code{to} are modeled. +} + \item{link}{Character; name of the link function used, allowed values + are \code{'log'} (the default), \code{'identity'} and \code{'sqrt'}, + see the family \code{\link{poisreg}}. } \item{scale}{Scalar. \code{lex.dur} is divided by this number before analysis, so that you can get resulting rates on a scale of your wish. } - \item{verbose}{Print information on the states modeled? +\item{verbose}{Print information on the states modeled? +} +\item{\dots}{Further arguments passed on to \code{glm}, \code{glm} or + \code{coxph} } - \item{\dots}{Arguments passed on to the methods. } } \details{ @@ -75,30 +89,56 @@ \code{lex.dur} (and would ignore this) as variable in the \code{newdata}. \code{ci.pred} returns the estimated rates in units of the \code{lex.dur} in the \code{Lexis} object, scaled by - \code{scale}. + \code{scale}, which has a default value of 1. + The default is to model all transitions into any absorbing state by + the same model (how wise is that??). If only \code{from} is given, + \code{to} is set to all states reachable from \code{from}, which may + be a really goofy model and if so a warning is issued. If only + \code{to} is given, \code{from} is set to the collection of states + from which \code{to} can be reached directly --- see + \code{\link{preceding}} and its cousins. This convention means that if + you have a \code{\link{Lexis}} object representing a simple survival + analysis, with states, say, "alive" and "dead", you can dispense with + the \code{from} and \code{to} arguments. + + Occasionally you only want to model a subset of the possible + transitions from states in \code{from} to states in \code{to}, in + which case you specify \code{from} and \code{to} as character vectors + of the same length and set \code{paired=TRUE}. Then only transitions + \code{from[i]} to \code{to[i]}, \code{i}=1,2,... will be modeled. + + There is no working \code{update} functions for these objects (yet). + Strictly speaking, it is a bit counter-intuitive to have the time-scale on the l.h.s. of the formula for the \code{coxph} since the time scale is also a predictor of the occurrence rate. On the other hand, calling \code{coxph} directly would also entail having the name of the time scale in the \code{Surv} object on the l.h.s. of the formula. So the inconsistency is merely carried over from \code{coxph}. - - The functions are slightly experimental so far. Argument names and - ordering may change in the future. The \code{update} methods do not - always work. } -\value{\code{glm} returns a \code{\link{glm}} object, \code{gam} returns - a \code{\link[mgcv]{gam}} object and \code{coxph} returns a - \code{\link[survival]{coxph}} object. The returned objects all have an - extra attribute, \code{Lexis}; a list with names \code{Exposure} and - \code{Events}; character vectors of state names from which transitions - are modeled and that are considered events, respectively. The - \code{glm} object also has a \code{scale} element in the list, an scalar - indicating the scaling of \code{lex.dur} before modeling. The - \code{coxph} object also has a \code{Timescale} element in the list, a - character indicating the underlying timescale variable. +\value{\code{glm.Lexis} returns a \code{\link{glm}} object, which is + also of class \code{glm.lex}, + \code{gam.Lexis} returns a \code{\link[mgcv]{gam}} object, which is + also of class \code{gam.lex}, and + \code{coxph.Lexis} returns a \code{\link[survival]{coxph}} object, which is + also of class \code{coxph.lex}. These extra class attributes are meant + to facilitate the (still pending) implementation of an \code{update} function. + + The returned objects all have an extra attribute, \code{Lexis} which + is a list with entries + \code{data}, the name of the \code{Lexis} object modeled (note that it + is \emph{not} the object, only the name of it, which may not be portable); + \code{trans}, a character vector of transitions modeled; + \code{formula}, the model formula; and + \code{scale}, the scaling applied to \code{lex.dur} before modeling. + + Only the \code{glm} and \code{gam} objects have the \code{scale} element + in the list; a scalar indicating the scaling of \code{lex.dur} before + modeling. Note that the formula component if the \code{Lexis} + attribute of a \code{coxph} object, the \code{formula} component is a + two-sided formula with the baseline time scale as the l.h.s. } \author{ @@ -106,7 +146,12 @@ } \seealso{ -\code{\link{Lexis}}, \code{\link{cutLexis}} + \code{\link{Lexis}}, + \code{\link{cutLexis}}, + \code{\link{mcutLexis}}, + \code{\link{addCov.Lexis}}, + \code{\link{absorbing}}, + \code{\link{transient}} } \examples{ @@ -128,13 +173,15 @@ # Split follow-up on age-axis system.time( sL <- splitLexis( cL, breaks=0:25*4, time.scale="age") ) +# ( consider splitMulti from the popEpi package ) summary( sL ) # glm models for rates based on the time-split dataset by insulin and sex -# proportional hazards model with insulin as time-dependent variable -mt <- glm.Lexis( sL, resp="Dead", - ~ sex + lex.Cst + Ns(age,knots=c(15,3:8*10)) ) +# Proportional hazards model with insulin as time-dependent variable +# - uses the defaul of modeling all transitions from both transient +# states ("Alive" and "Ins") to the absorbing state ("Dead"). +mt <- glm.Lexis( sL, formula= ~ sex + lex.Cst + Ns(age,knots=c(15,3:8*10)) ) # prediction of mortality rates from "Alive" with and without PH assumption nA <- data.frame( age=40:70, sex="M", lex.Cst="Alive" ) @@ -144,13 +191,13 @@ lwd=3, lty=1, log="y", col=c("black","blue","red"), xlab="Age", ylab="Mortality per 1000 PY" ) -# gam models takes quite some time so we leave it out +# gam models may take some time to run so we leave it out \dontrun{ -mt.gam <- gam.Lexis( sL, "Dead", ~ sex + lex.Cst + s(age), scale=1000 ) +mt.gam <- gam.Lexis( sL, to="Dead", formula=~ sex + lex.Cst + s(age), scale=1000 ) } # Fit a Cox model with age as baseline time scale and insulin as time-dependent -mt.cox <- coxph.Lexis( sL, "Dead", age ~ sex + lex.Cst ) +mt.cox <- coxph.Lexis( sL, c("Alive","Ins"), "Dead", age ~ sex + lex.Cst ) # Pretty much the same results for regression paramters as the glm: ci.exp( mt , subset="ex" ) diff -Nru r-cran-epi-2.32/man/poisreg.Rd r-cran-epi-2.37/man/poisreg.Rd --- r-cran-epi-2.32/man/poisreg.Rd 2018-07-10 05:41:13.000000000 +0000 +++ r-cran-epi-2.37/man/poisreg.Rd 2019-02-09 13:33:31.000000000 +0000 @@ -57,7 +57,7 @@ ## age and sex data(DMepi) DMepi$agegrp <- cut(DMepi$A, seq(from=0, to=100, by=5)) - inc.diab <- glm(cbind(X, Y.nD) ~ 0 + agegrp + sex, family=poisreg, + inc.diab <- glm(cbind(X, Y.nD) ~ -1 + agegrp + sex, family=poisreg, data=DMepi) ## The coefficients for agegrp are log incidence rates for men in each ## age group. The coefficient for sex is the log of the female:male @@ -70,9 +70,11 @@ gam.diab <- gam( cbind(X, Y.nD) ~ s(A,by=sex) + sex, family=poisreg, data=DMepi) + ## There is no need/use for Y.nD in prediction data frames: nM <- data.frame( A=20:90, sex="M" ) nF <- data.frame( A=20:90, sex="F" ) + ## Rates are returned in units of (1 year)^-1, so we must scale the ## rates by hand: matshade( nM$A, cbind( ci.pred(gam.diab, nM )*1000, diff -Nru r-cran-epi-2.32/man/pr.Rd r-cran-epi-2.37/man/pr.Rd --- r-cran-epi-2.32/man/pr.Rd 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-epi-2.37/man/pr.Rd 2018-09-25 20:01:51.000000000 +0000 @@ -0,0 +1,23 @@ +\name{pr} +\alias{pr} +\docType{data} +\title{Diabetes prevance as of 2010-01-01 in Denmark +} +\description{ +Diabetes prevalence as of 2010-01-01 in Denmark in 1-year age classes by sex. +} +\usage{data("pr")} +\format{ + A data frame with 200 observations on the following 4 variables. + \describe{ + \item{\code{A}}{Numeric, age, 0-99} + \item{\code{sex}}{Sex, a factor with levels \code{M} \code{F}} + \item{\code{X}}{Number of diabetes patients} + \item{\code{N}}{Population size} + } +} +\examples{ +data(pr) +str(pr) +} +\keyword{datasets} diff -Nru r-cran-epi-2.32/man/simLexis.Rd r-cran-epi-2.37/man/simLexis.Rd --- r-cran-epi-2.32/man/simLexis.Rd 2016-11-10 13:29:18.000000000 +0000 +++ r-cran-epi-2.37/man/simLexis.Rd 2019-01-27 10:24:52.000000000 +0000 @@ -44,7 +44,7 @@ list. Elements of the intter lists represent transitions. See details.} \item{init}{A (pre-)\code{\link{Lexis}} object representing the initial state of the persons whose trajectories through the multiple states - we want to simulate. Must have an attribute "time.since" --- see + we want to simulate. Must have attributes "time.scales" and "time.since" --- see details. Duplicate values of \code{lex.id} are not sensible and not accepted.} \item{N}{Numeric. How many persons should be simulated. \code{N} @@ -218,11 +218,9 @@ "Ins" = list( "Dead(Ins)" = Ins.Dead ) ) lapply( Tr, names ) -# Define an initial object - note the combination of "select=" and NULL -# which ensures that the relevant attributes from the Lexis object 'Si' -# are carried over to 'ini' ( Si[NULL,1:9] will lose essential -# attributes ): -ini <- subset(Si,select=1:9)[NULL,] +# Define an initial object - note the subsetting that ensures that +# all attributes are carried over +ini <- Si[1,1:9][-1,] ini[1:2,"lex.Cst"] <- "DM" ini[1:2,"Per"] <- 1995 ini[1:2,"Age"] <- 60 diff -Nru r-cran-epi-2.32/man/start.Lexis.Rd r-cran-epi-2.37/man/start.Lexis.Rd --- r-cran-epi-2.32/man/start.Lexis.Rd 2016-11-10 13:29:18.000000000 +0000 +++ r-cran-epi-2.37/man/start.Lexis.Rd 2019-02-17 15:12:40.000000000 +0000 @@ -1,41 +1,74 @@ -\name{start.Lexis} +\name{entry.Lexis} \alias{entry} \alias{exit} \alias{status} \alias{dur} -\title{Time series methods for Lexis objects} +\alias{transient} +\alias{absorbing} +\alias{before} +\alias{preceding} +\alias{after} +\alias{succeeding} +\title{Time series and other methods for Lexis objects} \description{ Extract the entry time, exit time, status or duration of follow-up from a - \code{Lexis} object. + \code{Lexis} object. Classify states. } \usage{ - entry(x, time.scale = NULL, by.id=FALSE) - exit(x, time.scale = NULL, by.id=FALSE) -status(x, at="exit" , by.id=FALSE) - dur(x, by.id=FALSE) + entry( x, time.scale = NULL, by.id=FALSE ) + exit( x, time.scale = NULL, by.id=FALSE ) + status( x, at="exit" , by.id=FALSE ) + dur( x, by.id=FALSE ) + transient( x ) + absorbing( x ) + preceding( x, states ) + before( x, states ) +succeeding( x, states ) + after( x, states ) } \arguments{ \item{x}{an object of class \code{Lexis}.} \item{time.scale}{a string or integer indicating the time scale. If omitted, all times scales are used.} \item{by.id}{Logical, if \code{TRUE}, only one record per unique value - of \code{lex.id} is returned; either the first, the last or for + of \code{lex.id} is returned; either the first, the last, or for \code{dur}, the sum of \code{lex.dur}. If \code{TRUE}, the returned - object have the \code{lex.id} as (row)nmes attribute.} + object have the \code{lex.id} as (row)names attribute.} \item{at}{string indicating the time point(s) at which status is to be - measured.} + measured. Possible values are "exit" or "entry".} + \item{states}{Character vector of states.} } \value{ The \code{entry} and \code{exit} functions return a vector of entry times and exit times, respectively, on the requested time - scale. If multiple time scales are requested, then a matrix is + scale. If multiple time scales are requested, a matrix is returned. - The \code{status} function returns a vector giving the status - at entry or exit and \code{dur} returns a vector with the lengths - of the follow-up intervals. + The \code{status} function returns a vector giving the status at + "\code{at}" (either '\code{entry}' or '\code{exit}') and \code{dur} + returns a vector with the lengths of the follow-up intervals. + + \code{entry}, \code{exit}, \code{status} and \code{dur} return vectors + of length \code{nrow(x)} if \code{by.id=FALSE}; if \code{by.id=TRUE} a + vector of length \code{length(unique(lex.id))}. + + The functions \code{transient} and \code{absorbing} return character + vectors of the transient, resp. absorbing states in \code{x}. These + are necessariy disjoint but the union may be a proper subset of + \code{levels(x)}, since the latter may have levels that are never + assumed by either \code{lex.Cst} or \code{lex.Xst}. + + \code{preceding} returns a character vector with names of the states + of the Lexis object \code{x} from which one of the states in + \code{states} can be reached directly - the preceding + states. \code{before} is just a synonym for \code{preceding}. + + \code{succeeding} returns a character vector with names of the states + of the Lexis object \code{x} that can be reached directly from one of + the states in \code{states}. \code{after} is just a synonym for + \code{succeeding}. } -\author{Martyn Plummer} +\author{Martyn Plummer & Bendix Carstensen} \seealso{\code{\link{Lexis}}} \keyword{survival} \keyword{ts} diff -Nru r-cran-epi-2.32/man/subset.Lexis.Rd r-cran-epi-2.37/man/subset.Lexis.Rd --- r-cran-epi-2.32/man/subset.Lexis.Rd 2016-11-10 13:29:18.000000000 +0000 +++ r-cran-epi-2.37/man/subset.Lexis.Rd 2018-08-31 08:01:57.000000000 +0000 @@ -32,5 +32,5 @@ A \code{Lexis} object with selected rows and columns. } \author{Martyn Plummer} -\seealso{\code{\link{Lexis}}, \code{\link{merge.Lexis}}} +\seealso{\code{\link{Lexis}}, \code{\link{merge.Lexis}}, \code{\link{bootLexis}}} \keyword{manip} diff -Nru r-cran-epi-2.32/man/time.scales.Rd r-cran-epi-2.37/man/time.scales.Rd --- r-cran-epi-2.32/man/time.scales.Rd 2016-11-10 13:29:18.000000000 +0000 +++ r-cran-epi-2.37/man/time.scales.Rd 2019-02-12 12:47:15.000000000 +0000 @@ -1,20 +1,36 @@ \name{timeScales} \alias{timeScales} +\alias{timeSince} +\alias{tsNA20} \title{The time scales of a Lexis object} \description{ - Function to get the names of the time scales of a \code{Lexis} object. + Functions to get the names and type of the time scales of a + \code{Lexis} object. } \usage{ timeScales(x) +timeSince(x) +tsNA20( x, all.scales=FALSE ) } \arguments{ \item{x}{an object of class \code{Lexis}.} + \item{all.scales}{Should NAs in all timescales be replaced by 0? If + \code{FALSE} (the default) only timescales defined as time since + entry to a state get \code{NA}s replaced by 0s} } \value{ - A character vector containing the names of the variables in \code{x} - that represent the time scales. Extracted from the \code{time.scales} - attribute of the object. + \code{timeScales} returns a character vector containing the names of + the variables in \code{x} that represent the time scales. Extracted + from the \code{time.scales} attribute of the object. + + \code{timeSince} returns a named character vector, the names being the + names of the timescales and the content being the names of the states + to which the corresponding timescale is defined as time since + entry. For those time scales that are not defined as such an empty + string is used. Hence, if none of the timescales are defined as time + since entry to a state \code{timeSince} will return a vector of empty + strings. } -\author{Martyn Plummer} +\author{Martyn Plummer, Bendix Carstensen} \seealso{\code{\link{Lexis}}, \code{\link{splitLexis}}} \keyword{attribute} diff -Nru r-cran-epi-2.32/man/transform.Lexis.Rd r-cran-epi-2.37/man/transform.Lexis.Rd --- r-cran-epi-2.32/man/transform.Lexis.Rd 2017-11-27 15:50:08.000000000 +0000 +++ r-cran-epi-2.37/man/transform.Lexis.Rd 2019-03-15 09:40:58.000000000 +0000 @@ -5,9 +5,9 @@ \alias{factorize} \alias{factorize.Lexis} \alias{levels.Lexis} -\alias{order.Lexis} -\alias{sort.Lexis} -\title{Transform a Lexis (or stacked.Lexis) objects} +% \alias{order.Lexis} +% \alias{sort.Lexis} +\title{Transform a Lexis (or stacked.Lexis) object} \description{ Modify a Lexis object. } @@ -16,9 +16,9 @@ \method{Relevel}{Lexis}( x, states, print = TRUE, \dots ) \method{levels}{Lexis}( x ) \method{factorize}{Lexis}( x, states, print = TRUE, \dots ) -%\method{order}{Lexis}( x, \dots ) -%\method{sort}{Lexis}( x, \dots ) \method{transform}{stacked.Lexis}( `_data`, \dots) +% order.Lexis( x, \dots ) +% sort.Lexis( x, \dots ) } \arguments{ \item{_data}{an object of class \code{Lexis}.} @@ -29,7 +29,8 @@ sanity check for the latter operation is undertaken.} \item{print}{Should a conversion between old and new levels be printed?} \item{\dots}{Additional arguments to be passed to - \code{\link{transform.data.frame}} or \code{\link{Relevel}}.} + \code{\link{transform.data.frame}}, \code{\link{Relevel}} + \code{order} or \code{sort}.} } \details{ The transform method for \code{Lexis} objects works exactly as the @@ -49,16 +50,24 @@ matrix as a second argument - the number of levels of \code{lex.Cst} is rarely (if ever) large. + Note that if \code{states} is an integer vector, the levels of + \code{lex.Cst} and \code{lex.Xst} are permuted. If \code{states} is a + list of numbers or strings, the levels of \code{lex.Cst} and + \code{lex.Xst} will be permuted. But if \code{states} is a character + vector it must have length \code{nlevels(lex.Cst)}, and the result + will be a \emph{renaming} of the levels. + If \code{states} is \code{NULL}, as when for example the argument is not passed to the function, the returned object have levels of \code{lex.Cst}, \code{lex.Xst} (and for \code{stacked.Lexis} objects - \code{lex.Tr}) shaved down to the actually occurring values. + \code{lex.Tr}) shaved down to the actually occurring values; that is, + empty levels are discarded. % \code{order} returns the order of the rows in a Lexis object to sort % it by (\code{lex.id},\code{timeScales[x]}). % \code{sort} returns the Lexis object sorted by -% (\code{lex.id},\code{timeScales[x]}). +% (\code{lex.id},\code{timeScales[x]}). } \value{ A transformed \code{Lexis} object. @@ -71,7 +80,9 @@ \code{\link{merge.Lexis}}, \code{\link{subset.Lexis}}, \code{\link{subset.stacked.Lexis}}, - \code{\link{Relevel}}} + \code{\link{Relevel}}, + \code{\link{transient}}, + \code{\link{absorbing}}} \examples{ data( nickel ) nic <- Lexis( data = nickel, diff -Nru r-cran-epi-2.32/MD5 r-cran-epi-2.37/MD5 --- r-cran-epi-2.32/MD5 2018-08-23 05:44:27.000000000 +0000 +++ r-cran-epi-2.37/MD5 2019-05-23 17:30:04.000000000 +0000 @@ -1,6 +1,6 @@ -8716d8df90528ccfd7de3e9bc1137ccc *CHANGES -46729466a0b85be593c1e97d98d3515a *DESCRIPTION -fe0cadcce9043148b616e0352c698d63 *NAMESPACE +d486ce5c04d548077eb33546829e1d06 *CHANGES +967f058c2e2aacda47ddb200ceeab790 *DESCRIPTION +87e418bd7c73d91953978b6a10897da9 *NAMESPACE fac3c7f01ab0bd6930cf3cbea20e6bcc *R/Aplot.R a031997be7388601dd1df6a28fc55328 *R/Cplot.R 6a1c4f4cbbf509d1f3c40b9f0adb6399 *R/Icens.R @@ -17,17 +17,17 @@ 0e3e3d23b8ab79dcf3046828190462b2 *R/Termplot.R 3f30311f13dff23a36857e49a320536f *R/Wald.R 171324300a75a19f01fab28c1eb4ab44 *R/addCov.Lexis.R -b1f0000430d6e15f59fe949ed8f1beb1 *R/apc.LCa.R -052d938c7863f1a2e0b6d0688d379620 *R/apc.fit.R +3f1b3d03d4d91baa3625e6112d9bcb55 *R/apc.LCa.R +0f9a88d6d781d2fa1e5292454fd64421 *R/apc.fit.R 3c53b3d667196c204fab811d2231ccd5 *R/apc.frame.R -bd898be838955d2fd5e2b2af764fc273 *R/apc.plot.R +47465f1ff476493433d2ae5634ce3b12 *R/apc.plot.R 8ce5a4c989947cb9d93becdecfde642c *R/as.Date.cal.yr.R -7d5600ddbfd17861ed882617b2312da8 *R/bootLexis.R -f4e2a039dda786e4cad0b2961e3dd599 *R/boxes.MS.R +81ce578a7e479b90d62726d8fc9b2fd5 *R/bootLexis.R +2079a64747e13aea8c038437d2e679f5 *R/boxes.MS.R 44e5f888817c94b8a190eae4aa964a2c *R/cal.yr.R 6988e76eb64f004a8c5029c23f8603af *R/ccwc.R -05ab9917d6d7d1e99b5cdd336652ad8b *R/ci.cum.R -b153700b6cfcd42fb4bf33b7f111cdc4 *R/ci.lin.R +b078c41ae066c38ad950c169e6095493 *R/ci.cum.R +26596bc18fa84c144885d32eab734767 *R/ci.lin.R 6f80c7b2b2120068cd16c043fbc29db3 *R/ci.mat.R 886f87cbd08266fa281e2ed7b3b3c253 *R/ci.pd.R 7637da11fdfb3612df5d73a64ee044e4 *R/clear.R @@ -37,8 +37,8 @@ 8064804ee2b1cfa0cea2b44a58106b0a *R/contr.diff.R 8ffc3eabb90b6b9b710109015779d853 *R/contr.orth.R efb8f1a3b97b41244275534855707b8b *R/crr.Lexis.r -ddffd14a0d695b6de25c997b2aeb4440 *R/cutLexis.R -ee27ae6d60db9c8055796daf62500891 *R/detrend.R +ea30373f10400d562226afd2cd115106 *R/cutLexis.R +b0d625f846ebf87e6aa7dfc188f4d7c1 *R/detrend.R a86250cce524f9fa2215c0c0cac4c6e7 *R/effx.match.r 79292fb5601cdd6ead350751f4c7ae59 *R/effx.r abd3fdeb059a214f253e2f3e4658a302 *R/erl.R @@ -50,13 +50,13 @@ 7c7059bf47c6421d6412a4bc53fc5950 *R/float.R 8ff6ab0fcba0c2d962e51e757fbe417c *R/foreign.R da7b5d41fe7876ef6bab01839b613939 *R/ftrend.R -724d7003147bd4ac3283d5ba0677f867 *R/gen.exp.R -e109f0b87726345007d5e1485295b11c *R/lexis.R +fa3fd08784cc9ba4731ad8b5b974be50 *R/gen.exp.R +97af0a7669de5ab5e9824f7c32552d18 *R/lexis.R f8eba91de17a8207c6303ffb7b34f0a7 *R/lls.R 09fd4a9dfc1bfe97d4b45d02003c7755 *R/matshade.R 2cd7a2e56034371e12392aa9fa1ec9de *R/mcutLexis.R 79d6e0b02a2c4e98fbbfcd936b21d377 *R/mh.R -9b5534b3d2e80c70cd61596c551169f8 *R/mod.Lexis.R +a1dd4f3e84a00a460f9176cde67f8dcf *R/mod.Lexis.R 9e48e47769874a34f63a2e2d4e6ffffc *R/ncut.r 2a44a66ac4fe7eccab191f94957265e2 *R/pctab.R ff45a8401b6862061389134357b610cc *R/plotCIF.R @@ -66,21 +66,22 @@ c0229c2819d977a8c557a22dea1dd5fa *R/print.Icens.r cca51283f637927702fbe217367011eb *R/print.floated.R 961858aa39320463dffa6154c70c87f6 *R/projection.ip.r +879a24bc57311a2e9a0ababbdbac3a4a *R/qspr.R 0c2a2e493c190c4e74587b5127e9bef6 *R/rateplot.R 251d9ec19ef0ca158582f78d70255c23 *R/rm.tr.R -804e11d9d564a860fce275258f40db3d *R/simLexis.R +1f3397f4132776aadb6742688f3b75df *R/simLexis.R 358b6fa60093d8bbee969a76a88797c0 *R/splitLexis.R e08261ab194f4aca8502ee249afd309e *R/stack.Lexis.R 8796d2af48a79385ad22b03ed0f4059c *R/stackedCIF.R fb143d532001643d3f74fbd2e114e976 *R/stattable.R 889282ea37a5a9d9bf7155d790cd9036 *R/summary.Icens.r -8ceccd0ffa9694670f38c34b65d5be10 *R/summary.Lexis.r +b8638a405b03eaf8357a7c6f501aa75e *R/summary.Lexis.r 88983ba8314a37910993d469a982d800 *R/twoby2.R 53904292629435d35ec42f11329b2c60 *R/xgrep.R b9d3112271b9545896c98ec6406f5f02 *data/B.dk.rda 713014c7934c558deeba7accf8fd8e6c *data/BrCa.rda 666e336a1aefb9c4298abb83a81a3300 *data/DMconv.rda -ea83ed46a5581ea8489646abd3d1c0ae *data/DMepi.rda +074ff34399f500115f0cf77f22d215c7 *data/DMepi.rda e200a06eee9305b2fa2c59af5982d62b *data/DMlate.rda e8f6f529b77cb3a860b9d53f5f65c3bb *data/DMrand.rda ba4f4ad111ab2f488d1373c1cfcb7398 *data/M.dk.rda @@ -101,26 +102,26 @@ cb139c31c1fb0421232caf88181ec5ac *data/mortDK.rda deb94384c7380312f9b33f75cf6d8559 *data/nickel.rda 4640a31484934cccc1d626104af1365f *data/occup.rda +e5d644438c62b0a219c98a9bb64f7e8d *data/pr.rda 404cc2c826ccdfb5b5edbe9834561e02 *data/testisDK.rda 1953ddd7d750051c5682fa08b11cb777 *data/thoro.rda 9bd39178a387f935acc54519109c9f4a *inst/CITATION -f981b8fde8b6349a0716edfe58684262 *inst/doc/etm.R -edd5671d5a51db16bdefb30c3533971c *inst/doc/etm.pdf -79f9f6f2e09e51ae2a51369afe860840 *inst/doc/flup.R -a3f34682ae33cb1854a686ba290d1726 *inst/doc/flup.pdf -45c955bf379daab5d2d8b0fea4432249 *inst/doc/index.html -f89cf850bd20072be74838eb4137ee09 *inst/doc/simLexis.R -5dfc49b898f0125b15e3e471a90caf42 *inst/doc/simLexis.pdf +8c30a8997e51bbc63d39f355699d153a *inst/doc/flup.R +b796753578ca2f2da400e540f1d6bcd9 *inst/doc/flup.pdf +946025af2611cfeaa4a6aa91ea5e09d3 *inst/doc/index.html +f067c3b6790b9a799c39d9dfbff62bc7 *inst/doc/simLexis.R +bab9c80735a2ff8dc8ad4564df061288 *inst/doc/simLexis.pdf a80a260dbf7d640a43c921d042aae6e2 *inst/doc/yll.R -247c9b4a1b95e00c254520eb85592866 *inst/doc/yll.pdf +99a30706a045beb8794f31f9126c4481 *inst/doc/yll.pdf 317bb6ebeaec5e7376b609d7e382c166 *man/B.dk.Rd ffe380e7252698c820a448b587714b49 *man/BrCa.Rd 3abc5c20e62c874d70dcb2688ad2cce5 *man/DMconv.Rd -ae0088f5434c66484668b6d085145889 *man/DMepi.Rd +abe65ad85f5ed8ee4a4efbe865dc4c13 *man/DMepi.Rd 362200b221355a7bcbcd1c7c8df5243f *man/DMlate.Rd +69b5075e6ad38e535c6251ad11c0ddb9 *man/Epi.Rd c87e6a0d0a5c461ecf4df07c2c96f1c1 *man/Icens.Rd c94a1f15f90406b35c24ff9e3e6b9147 *man/LCa.fit.Rd -b2b72f05c096c2a8cf397a45170a4272 *man/Lexis.Rd +66e6c62f92cd073f6f414489f0e26394 *man/Lexis.Rd c97dabafce248e4d47c5b18b6f04efe0 *man/Lexis.diagram.Rd 2e71ca58213788c8a358708e776e2112 *man/Lexis.lines.Rd b4b2ca144feacece204f86bc6640d51a *man/Life.lines.Rd @@ -135,28 +136,28 @@ 9351888a37017068f2c9ac8b843c846d *man/Termplot.Rd b4879df831de32e9e298b7b3d90389a9 *man/Y.dk.Rd 89a0abc6ca87f561fc48e0f24f268622 *man/addCov.Lexis.Rd -5d347f8d93ad0ca09b8992fa7aa2d241 *man/apc.LCa.Rd -14636e67b2485cba556cfcb832f8c950 *man/apc.fit.Rd +d0c12ce329066c797e9abcb470aeb977 *man/apc.LCa.Rd +0559f791e735daec0e605ab961157ad7 *man/apc.fit.Rd 345568f28bad999578b39277080a7283 *man/apc.frame.Rd 92b663829915395cdabf3e5d37cdd2b4 *man/bdendo.Rd 968ba8cef65153e9c9c521dd1541123e *man/bdendo11.Rd d74a4f6e66ce91c73cd2817bb690446a *man/births.Rd 8dc11840302a388e4f26892335d5d888 *man/blcaIT.Rd -46d07584ef468f0ff58338f5379a5b0e *man/bootLexis.Rd -6b9f6bc7ca862808b89f6439914b8a0a *man/boxes.MS.Rd +3220d0fca428fa6925336d4adb50704f *man/bootLexis.Rd +1db75b537dd111cd44968eead45d429d *man/boxes.MS.Rd 18d942d375c5d5f6899291bf9cd1fe11 *man/brv.Rd a2bedd44df3606b6585fd1da897de7ec *man/cal.yr.Rd effba2c17ade00f0afd10184a073d34c *man/ccwc.Rd -90337e7772c67e4e140e21165c30f079 *man/ci.cum.Rd -ba805bd44ae980ebffbf77ae74f49a43 *man/ci.lin.Rd +cd53424f8e828d3cf6e04427fa1f3921 *man/ci.cum.Rd +644a163ab1054c1308ba3a9caa5a3051 *man/ci.lin.Rd e7477402aa4746d653a0c72b2239d185 *man/ci.pd.Rd 128c0b1d00a69e41d131a6f74a1bae23 *man/clogistic.Rd cf5e5a6e18e07b7bb8d70dc0417e6166 *man/contr.cum.Rd 24343adc886e90c75f7af178201bd6cd *man/crr.Lexis.rd -7257b81a897faca909e48c6e065efd25 *man/cutLexis.Rd -fb67a9ca34c612618927beeab4097ae8 *man/detrend.Rd -386d0a4aa3515cf754ef458e1f32089c *man/diet.Rd -64dd6599bb830e98f4717b1a34ee12ab *man/effx.Rd +1a0a552b45d98ed5839be37d891c946a *man/cutLexis.Rd +5ae9f2331451715f3c1216ab6330c986 *man/detrend.Rd +8c51a1104754ab84c116f7e4f281ea9c *man/diet.Rd +b7ba0b42c46340a5fda9923a37d6b6d1 *man/effx.Rd 3c3cc3b1ecabece6b7709c1b147bab4e *man/effx.match.Rd f281dede5c1a091f9dca30199fb41bec *man/erl.Rd 0dc62e726cdff6694774fab66595ee81 *man/ewrates.Rd @@ -170,17 +171,17 @@ c5cc45fada3c644af88bc65a4f8c576b *man/gen.exp.Rd cbbc2a23f902d83ba2527b27b5ed3adb *man/gmortDK.Rd d9f1d31e109d6058a2160ca0aa49bf81 *man/hivDK.Rd -1fa4f1261c5c3cbf2760a29a4ca55574 *man/in.span.Rd +17ff9bd8b2de1f1066eb7b4d64fe0789 *man/in.span.Rd 9c3b059921d728298ccff73298b4dd31 *man/lep.Rd 470bf279897b9c9ee2600126127d8ada *man/lgrep.Rd -e14c71b1f693c680db3824ab97b0da49 *man/lines.apc.Rd +b49ccfbb53813015c2cdab43b46dfe41 *man/lines.apc.Rd 78125ba49d54905fb727765dc8997796 *man/lls.Rd 661ec298ffcd0f39cf55c5d033e04fd1 *man/lungDK.Rd f6fcbbdf12f30dd434f00938705cb4d1 *man/matshade.Rd -a378ebdcb3944f3844508f04775f59b9 *man/mcutLexis.Rd +b0c31103cfae36db577a8e9faf838589 *man/mcutLexis.Rd 3c462c4ce7ca1ce4b8c6fef50b3c7623 *man/merge.Lexis.Rd 34c530c36a3a1a7447ff8eea58f42084 *man/mh.Rd -cd966607c4e22c840472d5d298344a10 *man/mod.Lexis.Rd +1768d627477878408c79dd5552a8ff34 *man/mod.Lexis.Rd 2ecfec2d86e80b9bbcc2c3a253348edd *man/mortDK.Rd 9a531412399076898e3e39c1c20eb69a *man/ncut.Rd 880466737da2c1037259c3f712a7e9fe *man/nice.Rd @@ -193,66 +194,78 @@ 423b5c3961d8656bc8aa37c96bfca101 *man/plotCIF.Rd 5e3bda25091001df87ded934e0fd929f *man/plotEst.Rd b0047a7bab6a20b153485db9bd338394 *man/plotevent.rd -618319a63516359b95e1e24d2e957e31 *man/poisreg.Rd +5ec37fecb0bf62364a9ccbcff2126a19 *man/poisreg.Rd +e67824a4cb4bb5e2f545801b49e09aae *man/pr.Rd feb2228ef38635fcbf9083253c2f6d21 *man/projection.ip.rd a04f5841de6a7a9cf9a21e5caa8e54a5 *man/rateplot.Rd 78a9dbec1798f1b4e5735c71756f0ec6 *man/rbind.Lexis.Rd c5df7f725aed8ec3520ebd9735deeccb *man/rm.tr.Rd -9fee82808de1916808de45242ebafd78 *man/simLexis.Rd +86a4329f999e1c03193bb2ac81fcb6f6 *man/simLexis.Rd a22aa8901f1f717b6d3a7ed086bc8e02 *man/splitLexis.Rd 00fd0080aeb0551b2af69b6d918265e2 *man/stack.Lexis.Rd -eaad8ae22bd68a18750a94352f65f9c3 *man/start.Lexis.Rd +a1276119b4a28fd73149101911b6051b *man/start.Lexis.Rd 4f43436fc863747dad4fcdd566948039 *man/stattable.Rd 73cf4597fcb4a9d019bda47431b095be *man/stattable.funs.Rd -f10fc89dadf571e07f3b13923660cff6 *man/subset.Lexis.Rd +7deeb9aa7ac3eaf219335cfdc4d3a1c1 *man/subset.Lexis.Rd 75b79b094aa4e94218df58cd5bec86d9 *man/summary.Lexis.rd 14711d23ae0b89d9d22b9f3b07211093 *man/testisDK.Rd e3446824b09ea4ad8e8a0ddfc1987eef *man/thoro.Rd 6ea9cf64475187fa2f402275c40573a7 *man/time.band.Rd -2edb93539fa7b9f47a3083b624443f9d *man/time.scales.Rd -ca5a17eba0753419b75dcbe4e78ab26d *man/transform.Lexis.Rd +120a43f3c8f4bff2fd46ef1c4137a3a6 *man/time.scales.Rd +c9682642d61eee87fb35db0e7c91667c *man/transform.Lexis.Rd 02fc311541fae16a5d3a580a04925050 *man/twoby2.Rd bcc0556f8f0a158637592731928cf835 *src/chinv2.c 3e3a51847b5767274f0945350e947726 *src/cholesky2.c 09bf10fed80e805e7e58db9794e5edcc *src/chsolve2.c 872ef5e7ef0c1941c17d7dd612f6b1c3 *src/clogit.c d77bd820f6ce7dc7883c9e2813a44c89 *src/init.c -c2720c2f2634c041d8e0060d5e5d28ba *vignettes/fixall -3979bd89a72b00b468c2729b49ab8966 *vignettes/flup-nic-box.pdf -dbc159e5816e0edc098649fed8cc2f02 *vignettes/flup-nicL1.pdf -e59dc88157dfd364041097d769e03123 *vignettes/flup-nicL2.pdf -f04332278df5c7ba7414d28b0d585284 *vignettes/flup-pr-a.pdf -15f4cb80dac3cbfd568a247404322a44 *vignettes/flup-pr-at-af.pdf -dbf3d42b2e53c8eebaed3bb68f816ced *vignettes/flup-pr-at.pdf -79f9f6f2e09e51ae2a51369afe860840 *vignettes/flup.R -a3f34682ae33cb1854a686ba290d1726 *vignettes/flup.pdf -e571e2d2c713944c9e4a2e29572b4404 *vignettes/flup.rnw -6b18a4a5c5a1b4b921d24ee0ffe9ef7b *vignettes/flup.rwl -0f5a474691708d55b8c25654f672837d *vignettes/flup.tex -ad3c144cc840415436d0ca57199ef523 *vignettes/int-test.Rda -e92a0bc28e9f4f59a1557ce6d8cfe2e2 *vignettes/pr.Rda -5dfc49b898f0125b15e3e471a90caf42 *vignettes/sL.pdf -53297bb8f3d8d5f236a57a74094b99d6 *vignettes/simLexis-boxes.pdf -c1adf5ea9bfcb5730826815b3d9d603e *vignettes/simLexis-comp-0.pdf -c302f562af43a525ba4071f70e0832a4 *vignettes/simLexis-mort-int.pdf -93275a9279f994f5fb4164f5844ee002 *vignettes/simLexis-pstate0.pdf -29955ab2796c724d25b50685ec212b1f *vignettes/simLexis-pstatex.pdf -2cf807f77cb0b8e9f08e6cedf3593967 *vignettes/simLexis-pstatey.pdf -f89cf850bd20072be74838eb4137ee09 *vignettes/simLexis.R -8f77aa11a639cf50b07e16a71550dd77 *vignettes/simLexis.pdf -2754bf3a1b46be9334d0f2024a7caeaf *vignettes/simLexis.rnw -d1a872adecbee48f573d1579de5a709e *vignettes/simLexis.rwl -ad055b0b30f9a327d41570db28bfac96 *vignettes/simLexis.tex +c1908cc6ea84950e5718ffde4162a278 *vignettes/auto/flup.el +508d07459358369c4c7258f79a1fdfee *vignettes/fixall +40ec221bb0d068062ccbbbdfabd2cabb *vignettes/flup-Ieff.pdf +fbaadb8867e2eeb787f9d596e1e4e74e *vignettes/flup-IeffR.pdf +9c8b10fbea2796504ca87f68b94fd99b *vignettes/flup-RR-int.pdf +b3a50510a686e173ca86d959562010bc *vignettes/flup-box1.pdf +f14e32e5d668e6b8a33b766337975826 *vignettes/flup-box4.pdf +85b43ec0906dbe9d8fa5fe0a61cefaba *vignettes/flup-dmL1.pdf +e4024cef70b568a915315a7214ce0683 *vignettes/flup-dmL2.pdf +be8359647916d081ed7fec3f2e0f9061 *vignettes/flup-dur-int-RR.pdf +68319b5813b4cf25b3eadad738262386 *vignettes/flup-dur-int.pdf +82456a385ff932b61bda8f8e3dd6a609 *vignettes/flup-ins-time.pdf +de30a0b54da477bf94a95a0ef1123034 *vignettes/flup-mbox.pdf +3e0550ddb3dc1db4adc9c0ce9abc3989 *vignettes/flup-mboxr.pdf +58db58354dc934ef98501361f57f126a *vignettes/flup-pr-a.pdf +9d00a56ab201384b8a6942c410fac266 *vignettes/flup-sep-HR.pdf +958b9192ecde705c958ab56208730667 *vignettes/flup-sep-mort.pdf +cdf9e3fd29f0420d0286b0560567fbdf *vignettes/flup-splint.pdf +8c30a8997e51bbc63d39f355699d153a *vignettes/flup.R +b796753578ca2f2da400e540f1d6bcd9 *vignettes/flup.pdf +52d6b27e6fab89b7f822649295eea239 *vignettes/flup.rnw +663a290f9ac0442e19108907ac7d2f71 *vignettes/flup.rwl +4038ab9f53f94f7634afc8aaa7816ee2 *vignettes/flup.tex +bab9c80735a2ff8dc8ad4564df061288 *vignettes/sL.pdf +81c62dcf74258ecdba46b54058dbe14b *vignettes/sL.tex +74be950af4b296e6013f7e215de5f588 *vignettes/simLexis-boxes.pdf +d0365d66cc024dbe39e9d8a50267a019 *vignettes/simLexis-comp-0.pdf +0064d73fae0354a88baa165f7fe07957 *vignettes/simLexis-mort-int.pdf +bfc4b303cdd60cb40a7eec7083071645 *vignettes/simLexis-pstate0.pdf +26df4eca79e765ae240539cf1ef0c392 *vignettes/simLexis-pstatex.pdf +9780a51bfee61496400e69d24075ffb2 *vignettes/simLexis-pstatey.pdf +f067c3b6790b9a799c39d9dfbff62bc7 *vignettes/simLexis.R +3282ef26ea3cf5d24b522c1af561e717 *vignettes/simLexis.pdf +684a5c00a5b1bc49f85af075f8862bda *vignettes/simLexis.rnw +6d64f2aa510952f1b809e45abeecd529 *vignettes/simLexis.rwl +25863b7b1f53f2371166b5fc81799ec1 *vignettes/simLexis.tex e18d102389aa23a92012542bb87debdd *vignettes/toparticle.tex b4b55559db3e7a6c0a8eb3632ead0d8e *vignettes/topreport.tex -0ee2e7b027912753d8b3b92b46ed524a *vignettes/useful.tex -247c9b4a1b95e00c254520eb85592866 *vignettes/yl.pdf -c3958eb51451ee4673e9483a22d4f9d9 *vignettes/yll-imm.pdf -3997496b6fd79696e08282f3b54fa7b1 *vignettes/yll-states.pdf -6ee3aee63eb25042373adede269dbb7d *vignettes/yll-sus.pdf -43a542c2046b737a9b37ba7f3ccfaaea *vignettes/yll-tot.pdf +4ca326289be7225df127914243ab35fb *vignettes/useful.tex +99a30706a045beb8794f31f9126c4481 *vignettes/yl.pdf +012d4d0eda02ef12a710a499cfdb5dfd *vignettes/yl.tex +51fd19e7d3b0545697875c5c6bdd80d3 *vignettes/yll-imm.pdf +24a6f0d76c98f1e4eb168c873df452d1 *vignettes/yll-states.pdf +e7a1b502ae8c3e48be26b2280a971063 *vignettes/yll-sus.pdf +ddab10f320ac0a9b214adebf100c055b *vignettes/yll-tot.pdf a80a260dbf7d640a43c921d042aae6e2 *vignettes/yll.R -4a254f0768f19e3c8288a052896cfd6e *vignettes/yll.pdf +26cd613e042387d0bc8ea188b6c4b764 *vignettes/yll.pdf 3fbca640fc202dcc4736ac53a89a32cf *vignettes/yll.rnw -c6221b49df645e31fa1567fe0e75331f *vignettes/yll.rwl -bf1bea3b9bebd8bab7155eefb9d515e4 *vignettes/yll.tex +b913f4e2ef69c1acd9f815dc92eb853d *vignettes/yll.rwl +5141e0be29f81ac6f3d52fa42e57c2f4 *vignettes/yll.tex diff -Nru r-cran-epi-2.32/NAMESPACE r-cran-epi-2.37/NAMESPACE --- r-cran-epi-2.32/NAMESPACE 2018-07-13 09:52:11.000000000 +0000 +++ r-cran-epi-2.37/NAMESPACE 2019-05-22 21:55:53.000000000 +0000 @@ -24,6 +24,7 @@ ci.lin, ci.exp, ci.cum, + ci.surv, ci.pred, ci.ratio, ci.mat, @@ -108,7 +109,15 @@ status, timeBand, timeScales, + timeSince, + tsNA20, breaks, + absorbing, + transient, + preceding, + before, + succeeding, + after, tbox, dbox, fillarr, diff -Nru r-cran-epi-2.32/R/apc.fit.R r-cran-epi-2.37/R/apc.fit.R --- r-cran-epi-2.32/R/apc.fit.R 2018-07-19 07:07:55.000000000 +0000 +++ r-cran-epi-2.37/R/apc.fit.R 2019-04-06 09:09:01.000000000 +0000 @@ -8,7 +8,7 @@ ref.p, dist = c("poisson","binomial"), model = c("ns","bs","ls","factor"), - dr.extr = "weighted", + dr.extr = "Y", parm = c("ACP","APC","AdCP","AdPC","Ad-P-C","Ad-C-P","AC-P","AP-C"), npar = c( A=5, P=5, C=5 ), scale = 1, @@ -19,7 +19,12 @@ model <- match.arg(model) drtyp <- deparse(substitute(dr.extr)) parm <- toupper(match.arg(parm)) -if(!missing(data)) + +has.data <- !missing( data ) +has.pref <- !missing( ref.p ) +has.cref <- !missing( ref.c ) + +if(has.data) { if (length(match(c("A", "P", "D", "Y"), names(data))) != 4) stop("Data frame ", deparse(substitute(data)), @@ -49,51 +54,36 @@ stop( "\nLengths of variables (", paste(paste(names(lv), lv, sep = ":"), collapse = ", "), ") are not the same." ) } + +# Utility to compute where the median amount y is on the x scale med <- function(x, y) { -# Computes where the median number of ys is on the x scale o <- order(x) a <- y[o] names(a) <- x[o] return( as.numeric(names(a[cumsum(a)/sum(a) > 0.5][1])) ) } -p0 <- ifelse(missing(ref.p), med(P, D), ref.p) -c0 <- ifelse(missing(ref.c), med(P - A, D), ref.c) -ref.p <- !missing(ref.p) -ref.c <- !missing(ref.c) +# Set the reference points on the period and cohort scales +p0 <- ifelse( has.pref, ref.p, med(P , D) ) +c0 <- ifelse( has.cref, ref.c, med(P-A, D) ) + +# Number of parameters in the spline modeling if( is.list(npar) & length(npar)<3 ) - stop("npar as a list should have length 3! \n") + stop("npar given as a list - should have length 3! \n") if( !is.list(npar) & length(npar)!=3 ) { npar <- rep(npar, 3)[1:3] names(npar) = c("A","P","C") - cat("NOTE: npar is specified as:") - print( npar ) + cat("NOTE: npar is specified as:\n") ; print( npar ) } + if( is.null(names(npar)) ) names(npar) <- c("A", "P", "C") -lu <- paste(formatC(c(alpha/2, 1 - alpha/2) * 100, format = "f", - digits = 1), "%", sep = "") -proj.ip <- function(X, M, orth = FALSE, weight = rep(1, nrow(X))) { - if (nrow(X) != length(weight)) - stop("Dimension of space and length of i.p. weights differ!") - if (nrow(X) != nrow(M)) - stop("Dimension of space and rownumber of model matrix differ!") - Pp <- solve(crossprod(X * sqrt(weight)), t(X * weight)) %*% - M - PM <- X %*% Pp - if (orth) - PM <- M - PM - else PM -} -Thin.col <- function(X, tol = 1e-06) { - QR <- qr(X, tol = tol, LAPACK = FALSE) - X[, QR$pivot[seq(length = QR$rank)], drop = FALSE] -} -detrend <- function(M, t, weight = rep(1, nrow(M))) { - Thin.col(proj.ip(cbind(1, t), M, orth = TRUE, weight = weight)) -} -if (is.list(model)) { +# Labeling of confidence intervals +lu <- paste(formatC( c(alpha/2, 1 - alpha/2) * 100, + format = "f", digits = 1), "%", sep = "") + +if( is.list(model) ) { if (!all(sapply(model, is.function))) stop("'model' is a list, but not all elements are functions as they should be.") if ((lmod <- length(model)) < 3) @@ -102,7 +92,7 @@ names(model) <- c("A", "P", "C") MA <- model[["A"]](A) MP <- model[["P"]](P) - MC <- model[["C"]](P - A) + MC <- model[["C"]](P-A) Rp <- model[["P"]](p0) Rc <- model[["C"]](c0) } @@ -123,15 +113,17 @@ if( is.null(names(npar)) ) names(npar) <- c("A","P","C") # if names too long or wrong case, rectify names( npar ) <- toupper( substr(names(npar),1,1) ) - MA <- if (knl) Ns( A, knots = npar[["A"]] ) - else Ns( A, knots = quantile( rep(A,D), - probs=(1:npar["A"]-0.5)/npar["A"] ) ) - MP <- if (knl) Ns(P , knots = npar[["P"]] ) - else Ns(P , knots = quantile( rep(P,D), - probs=(1:npar["P"]-0.5)/npar["P"] ) ) - MC <- if (knl) Ns(P-A, knots = npar[["C"]] ) - else Ns(P-A, knots = quantile( rep(P-A,D), - probs=(1:npar["C"]-0.5)/npar["C"] ) ) + # if not a list make it one with the correct knots + if( !knl ){ + nkn <- npar + eqp <- function(n) (1:n-0.5)/n + npar <- list( A = quantile( rep( A,D), probs=eqp(nkn["A"]) ), + P = quantile( rep(P ,D), probs=eqp(nkn["P"]) ), + C = quantile( rep(P-A,D), probs=eqp(nkn["C"]) ) ) + } + MA <- Ns( A, knots = npar[["A"]] ) + MP <- Ns(P , knots = npar[["P"]] ) + MC <- Ns(P-A, knots = npar[["C"]] ) Rp <- ns(p0, knots = attr(MP,"knots"), Boundary.knots = attr(MP,"Boundary.knots")) Rc <- ns(c0, knots = attr(MC,"knots"), @@ -168,13 +160,14 @@ } } if (tolower(substr(dist, 1, 2)) == "po") { - m.APC <- glm(D ~ MA + I(P - p0) + MP + MC, offset = log(Y), - family = poisson) + m.APC <- glm(D ~ MA + I(P - p0) + MP + MC, + offset = log(Y), family = poisson) Dist <- "Poisson with log(Y) offset" } -if (tolower(substr(dist, 1, 3)) %in% c("bin")) { - m.APC <- glm(cbind(D, Y - D) ~ MA + I(P - p0) + MP + - MC, family = binomial) +is.bin <- FALSE +if (is.bin <- tolower(substr(dist, 1, 3)) %in% c("bin")) { + m.APC <- glm(cbind(D, Y - D) ~ MA + I(P - p0) + MP + MC, + family = binomial) Dist <- "Binomial regression (logistic) of D/Y" } m.AP <- update(m.APC, . ~ . - MC) @@ -183,9 +176,22 @@ m.A <- update(m.Ad, . ~ . - I(P - p0)) m.0 <- update(m.A, . ~ . - MA) AOV <- anova(m.A, m.Ad, m.AC, m.APC, m.AP, m.Ad, test = "Chisq") -attr(AOV, "heading") <- "\nAnalysis of deviance for Age-Period-Cohort model\n" -attr(AOV, "row.names") <- c("Age", "Age-drift", "Age-Cohort", - "Age-Period-Cohort", "Age-Period", "Age-drift") +colnames(AOV)[1:4] <- c("Mod. df.","Mod. dev.", + "Test df.","Test dev.") +AOV <- abs(AOV) +AOV <- cbind( Model = c("Age", + "Age-drift", + "Age-Cohort", + "Age-Period-Cohort", + "Age-Period", + "Age-drift"), + AOV, + 'Test dev/df' = AOV[,"Test dev."]/AOV[,"Test df."], + 'H0 ' = c("","zero drift ", + "Coh eff|dr.", + "Per eff|Coh", + "Coh eff|Per", + "Per eff|dr.") ) A.pt <- unique(A) A.pos <- match(A.pt, A) P.pt <- unique(P) @@ -193,8 +199,11 @@ C.pt <- unique(P - A) C.pos <- match(C.pt, P - A) MA <- cbind(1, MA) + +# Determine the inner product (diagonal) for projection if (!mode(dr.extr) %in% c("character", "numeric")) stop("\"dr.extr\" must be of mode \"character\" or \"numeric\".\n") + if (is.character(dr.extr)) { wt <- rep(1, length(D) ) @@ -207,19 +216,25 @@ drtyp <- "Y^2/D-weights" } else if( toupper(substr(dr.extr, 1, 1)) %in% c("Y") ) { wt <- Y - drtyp <- "Y-weights" } else - if( dr.extr %in% names(data) ) - { wt <- data[,dr.extr] - drtyp <- paste( dr.extr, "weights" ) } - } -if ( is.numeric(dr.extr) ) wt <- dr.extr + drtyp <- "Y-weights" } + } +if ( is.numeric(dr.extr) ) + { + if( length(dr.extr)==1 ) + { wt <- D + dr.extr*Y + drtyp <- paste("D+",dr.extr,"*Y weights",sep="") } + if( length(dr.extr)==nrow(data) ) + { wt <- dr.extr + if( any(wt<0) ) stop("dr.extr must be non-negative") + drtyp <- "extn-weights" } + } Rp <- matrix(Rp, nrow = 1) Rc <- matrix(Rc, nrow = 1) -xP <- detrend(rbind(Rp, MP), c(p0, P), weight = c(0, wt)) -xC <- detrend(rbind(Rc, MC), c(c0, P - A), weight = c(0, - wt)) -MPr <- xP[-1,,drop=FALSE] - ref.p * xP[rep(1, nrow(MP)),,drop=FALSE] -MCr <- xC[-1,,drop=FALSE] - ref.c * xC[rep(1, nrow(MC)),,drop=FALSE] +xP <- Epi::detrend(rbind(Rp, MP), c(p0, P ), weight = c(0, wt)) +xC <- Epi::detrend(rbind(Rc, MC), c(c0, P-A), weight = c(0, wt)) + +MPr <- xP[-1,,drop=FALSE] - has.pref * xP[rep(1, nrow(MP)),,drop=FALSE] +MCr <- xC[-1,,drop=FALSE] - has.cref * xC[rep(1, nrow(MC)),,drop=FALSE] if (length(grep("-", parm)) == 0) { if (parm %in% c("ADPC", "ADCP", "APC", "ACP")) m.APC <- update(m.0, . ~ . - 1 + MA + I(P - p0) + MPr + MCr) @@ -297,6 +312,10 @@ colnames(Coh)[-1] <- c("C.eff", lu) Type <- paste("Sequential modelling", Dist, ": (", parm, "):\n") } +# If the model was binomial we convert to probabilities +o2p <- function(o) o/(1+o) +if( is.bin ) Age[,-1] <- o2p(Age[,-1]) + res <- list(Type = Type, Model = Model, Age = Age, @@ -317,13 +336,13 @@ print(res$Anova) } # Print warnings about reference points: -if( !ref.p & parm %in% c("APC","ADPC") ) - cat( "No reference period given:\n", - "Reference period for age-effects is chosen as\n", +if( !has.pref & parm %in% c("APC","ADPC") ) + cat( "No reference period given; ", + "reference period for age-effects is chosen as\n", "the median date of event: ", p0, ".\n" ) -if( !ref.c & parm %in% c("ACP","ADCP") ) - cat( "No reference period given:\n", - "Reference period for age-effects is chosen as\n", +if( !has.cref & parm %in% c("ACP","ADCP") ) + cat( "No reference cohort given; ", + "reference cohort for age-effects is chosen as\n", "the median date of birth for persons with event: ", c0, ".\n" ) class(res) <- "apc" invisible(res) diff -Nru r-cran-epi-2.32/R/apc.LCa.R r-cran-epi-2.37/R/apc.LCa.R --- r-cran-epi-2.32/R/apc.LCa.R 2016-05-25 07:19:03.000000000 +0000 +++ r-cran-epi-2.37/R/apc.LCa.R 2019-05-22 21:47:17.000000000 +0000 @@ -14,13 +14,16 @@ APC <- apc.fit( data, npar = list( A=LCa.list[[2]]$knots$a.kn, P=LCa.list[[1]]$knots$p.kn, C=LCa.list[[1]]$knots$c.kn ) ) -dev <- c( APC$Anova[c(2,5,3,4),2], +dev <- c( APC$Anova[c(2,5,3,4),"Mod. dev."], sapply( LCa.list, function(x) x$deviance ) ) -df <- c( APC$Anova[c(2,5,3,4),1], +df <- c( APC$Anova[c(2,5,3,4),"Mod. df."], sapply( LCa.list, function(x) x$df ) ) names(dev)[1:4] <- names(df)[1:4] <- -gsub( "rift","", gsub("eriod","", gsub("ohort","", gsub("-","", -gsub( "ge", "", rownames(APC$Anova)[c(2,5,3,4)]))))) +gsub( "rift","", +gsub("eriod","", +gsub("ohort","", +gsub( "-","", +gsub( "ge", "", APC$Anova[c(2,5,3,4),"Model"]))))) if( keep.models ) return( list( dev = cbind( dev, df ), apc = APC, LCa = LCa.list ) ) @@ -30,7 +33,8 @@ show.apc.LCa <- function( x, dev.scale=TRUE, top="Ad", ... ) { -if( is.list(x) ) x <- x[[1]] +if( is.list(x) ) x <- x[[1]] +print(x) TM <- matrix(NA,9,9) rownames( TM ) <- colnames( TM ) <- paste( rownames(x), "\n", formatC(x[,1],format="f",digits=1) ) @@ -41,12 +45,14 @@ TM[5,7] <- TM[6,8] <- TM[c(7,8),9] <- 1 -TM bp <- list( x=c(50,30,70,50,10,90,30,70,50), y=c(90,70,70,50,50,50,30,30,10) ) if( !dev.scale ) boxes.matrix( TM, boxpos=bp, hm=1.5, wm=1.5, ... ) else { + print( top ) + print( str(x) ) + print( rownames(x) ) bp$y <- 5+90*(pmin(x[top,1],x[,1])-x[9,1])/(x[top,1]-x[9,1]) boxes.matrix( TM, boxpos=bp, hm=1.5, wm=1.5, ... ) } diff -Nru r-cran-epi-2.32/R/apc.plot.R r-cran-epi-2.37/R/apc.plot.R --- r-cran-epi-2.32/R/apc.plot.R 2018-05-01 18:02:24.000000000 +0000 +++ r-cran-epi-2.37/R/apc.plot.R 2019-03-17 09:33:04.000000000 +0000 @@ -56,6 +56,7 @@ col = "black", type = "l", knots = FALSE, + shade = FALSE, ... ) { @@ -116,12 +117,23 @@ exp( mean( A[,2] ) ), exp( mean( log( A[,2] ) ) ) ) ) # Now we can plot the lines - matlines( A[,1], A[,ifelse( ci[1], -1, 2)], +if( !shade ) + { + matlines( A[,1] , A[,ifelse( ci[1], -1, 2)], col=col[1], lwd=lwd, lty=lty[1], type=type[1], ... ) matlines( P[,1] - frame.par[1], P[,ifelse( ci[2], -1, 2)] * frame.par[2], col=col[2], lwd=lwd, lty=lty[2], type=type[2], ... ) matlines( C[,1] - frame.par[1], C[,ifelse( ci[3], -1, 2)] * frame.par[2], col=col[3], lwd=lwd, lty=lty[3], type=type[3], ... ) + } else { + matshade( A[,1] , A[,-1], + col=col[1], lwd=lwd, lty=lty[1], ... ) + matshade( P[,1] - frame.par[1], P[,-1] * frame.par[2], + col=col[2], lwd=lwd, lty=lty[2], ... ) + matshade( C[,1] - frame.par[1], C[,-1] * frame.par[2], + col=col[3], lwd=lwd, lty=lty[3], ... ) + } + points( obj$Ref - frame.par[1], frame.par[c(2,2)], pch=16, col="white" ) points( obj$Ref - frame.par[1], frame.par[c(2,2)], pch=1, lwd=2, col=col[2:3] ) if( knots & inherits( obj, "apc" ) ) diff -Nru r-cran-epi-2.32/R/bootLexis.R r-cran-epi-2.37/R/bootLexis.R --- r-cran-epi-2.32/R/bootLexis.R 2018-03-08 12:02:08.000000000 +0000 +++ r-cran-epi-2.37/R/bootLexis.R 2018-08-26 09:10:11.000000000 +0000 @@ -21,7 +21,8 @@ bootLexis <- function( Lx, size = NULL, - by = NULL ) + by = NULL, + replace = TRUE ) { if( !inherits( Lx, "Lexis" ) ) stop("Only meaningful for Lexis objects.") @@ -32,7 +33,7 @@ if( is.null( size ) ) size <- nid.Lexis( Lx, by = by ) # allowing for a length 1 x-vector -REsample <- function(x,sz) x[sample.int(length(x),size=sz,replace=TRUE)] +REsample <- function(x,sz) x[sample.int(length(x),size=sz,replace=replace)] if( is.null(by) ) { # Simple bootstrap bLx <- subid.Lexis( Lx, REsample( unique(Lx$lex.id), size ) ) diff -Nru r-cran-epi-2.32/R/boxes.MS.R r-cran-epi-2.37/R/boxes.MS.R --- r-cran-epi-2.32/R/boxes.MS.R 2016-06-24 06:13:59.000000000 +0000 +++ r-cran-epi-2.37/R/boxes.MS.R 2019-04-01 09:13:03.000000000 +0000 @@ -104,9 +104,9 @@ boxes.Lexis <- function( obj, boxpos = FALSE, - wmult = 1.15, - hmult = 1.15, - cex = 1.45, + wmult = 1.20, + hmult = 1.20 + 0.85*(!show.Y), + cex = 1.40, show = inherits( obj, "Lexis" ), show.Y = show, scale.Y = 1, @@ -116,7 +116,7 @@ show.D = show, scale.D = FALSE, digits.D = as.numeric(as.logical(scale.D)), - show.R = is.numeric(scale.R), + show.R = show & is.numeric(scale.R), scale.R = 1, digits.R = as.numeric(as.logical(scale.R)), DR.sep = if( show.D ) c("\n(",")") else c("",""), @@ -196,7 +196,7 @@ # Compute the rates - vectors are automatically expanded to matrices columnwise R <- D / Y * ifelse(scale.R,scale.R,1) -# If no person-years available anywhere, they or rates cannot be shown +# If no person-years available anywhere, neither they nor rates can be shown if( all(is.na(Y)) ) show.Y <- show.R <- FALSE # Derive state names, no. states and no. transitions @@ -235,9 +235,9 @@ # Recycling of box-arguments if( !missing(ht) ) -if( length(ht )0 ) + for( ts in names(tsc) ) + { + x[,ts] <- ifelse( is.na(x[,ts]), 0, x[,ts] ) + } + x + } diff -Nru r-cran-epi-2.32/R/detrend.R r-cran-epi-2.37/R/detrend.R --- r-cran-epi-2.32/R/detrend.R 2018-07-13 10:00:50.000000000 +0000 +++ r-cran-epi-2.37/R/detrend.R 2019-03-09 17:10:43.000000000 +0000 @@ -39,6 +39,8 @@ function( M, t, weight=rep(1,nrow(M)) ) { # Detrend the matrix using a weighted inner product. +# Numerically unstable if too large t, so scaled +t <- scale(t) thinCol( projection.ip( cbind( 1, t ), M , orth = TRUE, weight = weight ) ) } @@ -46,5 +48,6 @@ function( M, t, weight=rep(1,nrow(M)) ) { # De-trend and -curve the matrix using a weighted inner product. +t <- scale(t) thinCol( projection.ip( cbind( 1, t, t^2 ), M , orth = TRUE, weight = weight ) ) } diff -Nru r-cran-epi-2.32/R/gen.exp.R r-cran-epi-2.37/R/gen.exp.R --- r-cran-epi-2.32/R/gen.exp.R 2018-08-19 11:48:30.000000000 +0000 +++ r-cran-epi-2.37/R/gen.exp.R 2018-12-11 17:17:15.000000000 +0000 @@ -139,11 +139,14 @@ # indicator being off drug (set$off) and # the date a person goes off drug, doff (set to NA for remaining records) # with b) the break dates (which is where we want things computed) - # Note we merge on the variable dof. - dfr <- merge( data.frame( set[,c("id","dof","off")], - doff = ifelse(set$off,set$dof,NA) ), - data.frame( id = set$id[1], + # Note we merge on the variable dof and have the data frame with + # dof=xval as the >first< so that values of xval will be in the + # resulting dfr$dof even if dof-values in set are almost equal til + # an xval value. Ensures that sum(dfr$dof %in% xval)==length(xval) + dfr <- merge( data.frame( id = set$id[1], dof = xval ), + data.frame( set[,c("id","dof","off")], + doff = ifelse(set$off,set$dof,NA) ), all=TRUE ) # carry the off drug indicator forward to all break dates dfr$off <- zoo::na.locf( dfr$off, na.rm=FALSE ) @@ -157,6 +160,7 @@ dfr$tfi <- pmax( 0, dfr$dof-doi ) # restrict to the desired timepoints dfr <- subset( dfr, dof %in% xval ) + dfr <- dfr[!duplicated(dfr$dof),] # linear interpolation of the cumulative dose and time from the # purchase data (set) dfr$cdos <- approx( set$dof, set$cum.amt, xout=xval, rule=2 )$y diff -Nru r-cran-epi-2.32/R/lexis.R r-cran-epi-2.37/R/lexis.R --- r-cran-epi-2.32/R/lexis.R 2018-08-15 15:52:20.000000000 +0000 +++ r-cran-epi-2.37/R/lexis.R 2019-05-05 12:02:42.000000000 +0000 @@ -1,6 +1,6 @@ Lexis <- function(entry, exit, duration, entry.status=0, exit.status=0, id, data, - merge=TRUE, states, tol=.Machine$double.eps^0.5, + merge=TRUE, states, notes=TRUE, tol=.Machine$double.eps^0.5, keep.dropped=FALSE ) { nmissing <- missing(entry) + missing(exit) + missing(duration) @@ -42,7 +42,7 @@ if( is.logical( exit.status ) ) { entry.status <- FALSE - cat("NOTE: entry.status has been set to FALSE for all.\n" ) + if( notes ) cat("NOTE: entry.status has been set to FALSE for all.\n" ) } if( is.character( exit.status ) ) { @@ -54,13 +54,13 @@ length(exit.status)), levels=levels(exit.status), labels=levels(exit.status) ) - cat("NOTE: entry.status has been set to", + if( notes ) cat("NOTE: entry.status has been set to", paste( '"', levels(exit.status)[1], '"', sep='' ), "for all.\n" ) } if( is.numeric( exit.status ) ) { - entry.status <- rep( 0, length( exit.status ) ) + if( notes ) entry.status <- rep( 0, length( exit.status ) ) cat("NOTE: entry.status has been set to 0 for all.\n" ) } } @@ -76,7 +76,7 @@ all.levels = union(levels(entry.status),levels(exit.status)) entry.status <- factor( entry.status, levels=all.levels ) exit.status <- factor( exit.status, levels=all.levels ) - cat("Incompatible factor levels in entry.status and exit.status:\n", + if( notes ) cat("Incompatible factor levels in entry.status and exit.status:\n", "both lex.Cst and lex.Xst now have levels:\n", all.levels, "\n") } } @@ -101,7 +101,7 @@ { entry <- exit entry[[1]] <- 0*entry[[1]] - cat( "NOTE: entry is assumed to be 0 on the",names(exit),"timescale.\n") + if( notes ) cat( "NOTE: entry is assumed to be 0 on the",names(exit),"timescale.\n") } } else @@ -112,7 +112,7 @@ { entry <- duration entry[[1]] <- 0*entry[[1]] - cat( "NOTE: entry is assumed to be 0 on the",names(duration),"timescale.\n") + if( notes ) cat( "NOTE: entry is assumed to be 0 on the",names(duration),"timescale.\n") } } else @@ -229,12 +229,12 @@ } lex <- cbind(lex, data) } - + ## Drop rows with short or negative duration for consistency with splitLexis short.dur <- lex$lex.dur <= tol if ( any(short.dur) ) { - warning("Dropping ", sum(short.dur), - " rows with duration of follow up < tol\n", + if( notes ) cat("NOTE: Dropping ", sum(short.dur), + " rows with duration of follow up < tol\n", if( keep.dropped ) " The dropped rows are in the attribute 'dropped'\n", if( keep.dropped ) " To see them type attr(Obj,'dropped'),\n", if( keep.dropped ) " to get rid of them type: attr(Obj,'dropped') <- NULL\n", @@ -702,11 +702,46 @@ res } +transient <- +function( x ) +{ +if( !is.Lexis(x) ) stop( "Not a Lexis object" ) +tc <- tapply( x$lex.dur, x$lex.Cst, sum ) +nt <- names( tc[tc>0] ) +nt[!is.na(nt)] +} + +absorbing <- +function( x ) +{ +if( !is.Lexis(x) ) stop( "Not a Lexis object" ) +tc <- table( x$lex.Xst ) +setdiff( names( tc[tc>0] ), transient(x) ) +} + +updn <- +function( x, tt, states ) +{ +if( any(is.na(match(states,levels(x)))) ) + stop( "'states' must be among states: ", paste(levels(x),sep=',') ) +tt <- tt[,states,drop=FALSE] +setdiff( rownames( tt[apply(tt,1,sum)>0,,drop=FALSE] ), states ) +} +before <- preceding <- function( x, states ) updn( x, table(x$lex.Cst,x$lex.Xst), states ) +after <- succeeding <- function( x, states ) updn( x, table(x$lex.Xst,x$lex.Cst), states ) + timeScales <- function(x) { return (attr(x,"time.scales")) } +timeSince <- function(x) +{ + tt <- attr(x,"time.since") + names(tt) <- attr(x,"time.scales") + return( tt ) +} + timeBand <- function(lex, time.scale, type="integer") { time.scale <- check.time.scale(lex, time.scale)[1] diff -Nru r-cran-epi-2.32/R/mod.Lexis.R r-cran-epi-2.37/R/mod.Lexis.R --- r-cran-epi-2.32/R/mod.Lexis.R 2018-08-18 06:25:39.000000000 +0000 +++ r-cran-epi-2.37/R/mod.Lexis.R 2019-05-22 08:35:40.000000000 +0000 @@ -1,40 +1,70 @@ modLexis <- -function( Lx, resp, formula, xpos, link="log", scale, verbose=TRUE, ..., model ) +function( Lx, nameLx, + from = preceding(Lx,to), + to = absorbing(Lx), + formula, + paired = FALSE, + link = "log", scale = 1, verbose = TRUE, + model, ... ) { -# A common wrapper for glm and gam modeling of Lexis FU - +# a common wrapper for glm and gam modeling of Lexis FU # is this a Lexis object ? -if( !inherits(Lx,"Lexis") ) stop( "The first argument must be a Lexis object.\n") -nameLx <- deparse(substitute(Lx)) -# Beginning of a new feature with a countmultiplier of the transitions considered -if( !("lex.N" %in% names(Lx)) ) Lx$lex.N <- 1 +if( !inherits(Lx,"Lexis") ) stop("The first argument must be a Lexis object.\n") # check that events are actual levels of lex.Xst -if( is.numeric(resp) ) resp <- levels( Lx$lex.Xst )[resp] -wh <- match( resp, levels(Lx$lex.Xst) ) -if( any(is.na(wh)) ) stop("'resp' must be a subset of: '", +if( is.numeric(to) ) to <- levels( Lx$lex.Xst )[to] +wh <- match( to, levels(Lx$lex.Xst) ) +if( any(is.na(wh)) ) stop("'to' must be a subset of: '", paste(levels(Lx$lex.Xst), collapse="','", sep=""), "'\n" ) -# is xpos supplied? -if( missing(xpos) ) { - xpos <- levels( factor(Lx$lex.Cst) ) - } else { -# check that xpos are actual levels of lex.Cst -if( is.numeric(xpos) ) xpos <- levels( Lx$lex.Cst )[xpos] -wh <- match( xpos, levels(Lx$lex.Cst) ) -if( any(is.na(wh)) ) stop("'xpos' must be a subset of: '", +# check that from are actual levels of lex.Cst +if( is.numeric(from) ) from <- levels( Lx$lex.Cst )[from] +wh <- match( from, levels(Lx$lex.Cst) ) +if( any(is.na(wh)) ) stop("'from' must be a subset of: '", paste(levels(Lx$lex.Cst), collapse="','", sep=""), "'\n" ) -Lx <- Lx[Lx$lex.Cst %in% xpos,] - } - -# construct the model formula - note that we want the possibility of -# transitions to transient states, hence the lex.Xst != lex.Cst -if( length(formula) != 2 ) stop( "formula must be a one-sided formula") -form <- cbind( (Lx$lex.Xst %in% resp & - Lx$lex.Xst != Lx$lex.Cst)*Lx$lex.N, +Lx <- Lx[Lx$lex.Cst %in% from,] + +# work out which transitions are modeled +# first a small utility (transition as text) +trt <- function( f, t ) paste( f, "->", t, sep="" ) +if( paired ) + { +if( length(from) != length(to) ) + stop("If 'paired' is TRUE, from and to must have same length!\n") +if( any(from==to) ) + stop("If 'paired' is TRUE, entries in from and to must be different within pairs\n") +trnam <- trt( from, to ) + } else { +tm <- tmat( Lx )[from,to,drop=FALSE] +trnam <- outer( rownames(tm), colnames(tm), trt )[tm>0] +trnam <- trnam[!is.na(trnam)] + } +# just for formatting the explanatory text +onetr <- length( trnam )==1 +trprn <- paste( trnam, collapse=", " ) + +# warn if a potentially silly model is defined +if( any( (ts<-table(sapply( strsplit(trnam,"->"), function(x) x[1] )))>1 ) ) warning( + "NOTE:\nMultiple transitions *from* state '",names(ts[ts>1]),"' - are you sure?", + "\nThe analysis requested is effectively merging outcome states.", + "\nYou may want analyses using a *stacked* dataset - see ?stack.Lexis\n" ) + +# Beginning of a new feature with a countmultiplier of the transitions +# allowing tabular records to be merged to a Lexis object +# --- not used subsequently in this function (yet) +# if( !("lex.N" %in% names(Lx)) ) Lx$lex.N <- 1 + +# construct the model formula - note that we already made sure that +# from and to are pairwise different +if( length(formula) != 2 ) stop("formula must be a one-sided formula") + +form <- cbind( trt(Lx$lex.Cst,Lx$lex.Xst) %in% trnam, #*Lx$lex.N, Lx$lex.dur ) ~ 1 +## form <- cbind( (Lx$lex.Xst %in% to & +## Lx$lex.Xst != Lx$lex.Cst), #*Lx$lex.N, +## Lx$lex.dur ) ~ 1 form[3] <- formula[2] -xpos <- levels( factor(Lx$lex.Cst) ) # only levels present in lex.Cst +from <- levels( factor(Lx$lex.Cst) ) # only levels present in lex.Cst # Scaling Lx$lex.dur <- Lx$lex.dur/scale @@ -43,62 +73,118 @@ if( verbose ){ cat( deparse(substitute(model)), " Poisson analysis of Lexis object ", nameLx, " with ", link, " link", - ":\n Transition rates from '", paste( xpos, collapse="','"), - "' to '", paste( resp, collapse="','"), "'", - if( scale!=1 ) paste(" scaled by", scale ), "\n", sep="" ) + ":\nRates for", if( onetr ) " the", " transition", + if( !onetr ) "s", ": ", trprn, + if( scale!=1 ) paste(", PY scaled by", scale ), "\n", sep="" ) } # Fit the model mod <- model( form, family = poisreg(link=link), data = Lx, ... ) - -# An explanatory attribute -attr( mod, "Lexis" ) <- list( Exposure=xpos, Events=resp, scale=scale ) + +# Add an explanatory attribute +attr( mod, "Lexis" ) <- list( data=nameLx, + trans=trnam, + formula=form[-2], + scale=scale ) mod } -# Here are the actual functions of interest - +# Here are the actual functions of interest: +# the glm function glm.Lexis <- -function( Lx, resp, formula, xpos, link="log", scale=1 , verbose=TRUE , ... ) { -modLexis( Lx, resp, formula, xpos, link=link , scale=scale, verbose=verbose, ..., model=stats::glm ) } +function( Lx, from = preceding(Lx,to), + to = absorbing(Lx), + formula, + paired = FALSE, + link = "log", + scale = 1, + verbose = TRUE, + ... ) +{ +# name of the supplied object +nameLx <- deparse(substitute(Lx)) +# sensible defaults if one of to and from is missing +if( missing(from) & !missing(to) ) from <- preceding (Lx,to ) +if( !missing(from) & missing(to) ) to <- succeeding(Lx,from) +xx <- modLexis( Lx, nameLx, + from, to, formula, + paired = paired, link = link, scale = scale, verbose = verbose, + model = stats::glm, ... ) +class( xx ) <- c( "glm.lex", class(xx) ) +xx +} +# the gam function gam.Lexis <- -function( Lx, resp, formula, xpos, link="log", scale=1 , verbose=TRUE , ... ) { -modLexis( Lx, resp, formula, xpos, link=link , scale=scale, verbose=verbose, ..., model= mgcv::gam ) } +function( Lx, from = preceding(Lx,to), + to = absorbing(Lx), + formula, + paired = FALSE, + link = "log", + scale = 1, + verbose = TRUE, + ... ) +{ +# name of the supplied object +nameLx <- deparse(substitute(Lx)) -# and here the coxph counterpart: +# sensible defaults if one of the two is missing +if( missing(from) & !missing(to) ) from <- preceding (Lx,to ) +if( !missing(from) & missing(to) ) to <- succeeding(Lx,from) +xx <- modLexis( Lx, nameLx, + from, to, formula, + paired = paired, link = link, scale = scale, verbose = verbose, + model = mgcv::gam, ... ) +class( xx ) <- c( "gam.lex", class(xx) ) +xx +} +# And here is the coxph counterpart: coxph.Lexis <- function( Lx, # Lexis object - resp, # Events ('to' states) + from = preceding(Lx,to), # Exposure ('from' states) + to = absorbing(Lx) , # Events ('to' states) formula, # timescale ~ model - xpos, # exposure states ('from' states) + paired = FALSE, verbose = TRUE, ... ) { # Lexis object ? if( !inherits(Lx,"Lexis") ) stop( "The first argument must be a Lexis object.\n") -nameLx <- deparse(substitute(Lx)) -# check levels -if( is.numeric(resp) ) resp <- levels( Lx$lex.Xst )[resp] -wh <- match(resp,levels(Lx$lex.Xst)) -if( any(is.na(wh)) ) stop("'resp' must be a subset of: '", - paste(levels(Lx$lex.Xst), collapse="','", sep=""), "'\n" ) +# sensible defaults if only one of to and from is missing +if( missing(from) & !missing(to) ) from <- preceding (Lx,to ) +if( !missing(from) & missing(to) ) to <- succeeding(Lx,from) + +# name of the dataset +nameLx <- deparse(substitute(Lx)) -# is xpos supplied? -if( !missing(xpos) ) +# work out which transitions are modeled +# first a small utility +trt <- function( f, t ) paste( f, "->", t, sep="" ) +if( paired ) { -# check that xpos are actual levels of lex.Cst -if( is.numeric(xpos) ) xpos <- levels( Lx$lex.Cst )[xpos] -wh <- match( xpos, levels(Lx$lex.Cst) ) -if( any(is.na(wh)) ) stop("'xpos' must be a subset of: '", - paste(levels(Lx$lex.Cst), collapse="', '", sep=""), "'\n" ) -Lx <- Lx[Lx$lex.Cst %in% xpos,] - } +if( length(from) != length(to) ) stop("If 'paired' is TRUE, from and to must have same length!\n") +if( any(from==to) ) stop("If 'paired' is TRUE, entries in from and to must be pairwise different\n") +trnam <- trt( from, to ) + } else { +tm <- tmat( Lx )[from,to,drop=FALSE] +trnam <- outer( rownames(tm), colnames(tm), trt )[tm>0] +trnam <- trnam[!is.na(trnam)] + } +# just for formatting explanatory text +onetr <- length( trnam )==1 +trprn <- paste( trnam, collapse=", " ) + +# warn if a potentially silly model is defined +if( any( ts<-table(sapply( strsplit(trprn,"->"), function(x) x[1] ))>1 ) ) warning( + "NOTE:\nMultiple transitions *from* state '",names(ts[ts>1]),"' - are you sure?", + "\nThe analysis requested is effectively merging outcome states.", + "\nYou may want analyses using a *stacked* dataset - see ?stack.Lexis\n" ) # Correct formula? -if( length(formula) != 3 ) stop("'formula' must be a 2-sided formula, with the l.h.s. the timescale") +if( length(formula) != 3 ) + stop("'formula' must be a 2-sided formula, with the l.h.s. the timescale") # Is the l.h.s. a timescale? ts <- as.character( formula[2] ) @@ -106,25 +192,33 @@ stop( "l.h.s. of formula must be a timescale; one of:\n", tms, "\n" ) # What are the 'from' states -xpos <- levels( factor(Lx$lex.Cst) ) +from <- levels( factor(Lx$lex.Cst) ) # construct a Surv response object, and note that we want the possibility # of transitions to transient states, hence the lex.Xst != lex.Cst Sobj <- Surv( Lx[,ts], Lx[,ts]+Lx$lex.dur, - Lx$lex.Xst %in% resp & - Lx$lex.Xst != Lx$lex.Cst ) + trt( Lx$lex.Cst, Lx$lex.Xst ) %in% trnam ) +# Lx$lex.Xst %in% to & +# Lx$lex.Xst != Lx$lex.Cst ) # Tell what we intend to and then do it -cat( "survival::coxph analysis of Lexis object ", nameLx, " using timescale ", ts, - ":\nTransition rates from '", paste( xpos, collapse="','"), - "' to '", paste( resp, collapse="','"), "'\n", sep="" ) +if( verbose ){ +cat( deparse(substitute(model)), + " survival::coxph analysis of Lexis object ", nameLx, + ":\nRates for", if( onetr ) " the", " transition", + if( !onetr ) "s", " ", trprn, "\n", sep="" ) + } + mod <- coxph( as.formula( paste( "Sobj", as.character(formula[3]), sep="~") ), data = Lx, ... ) -# An explanatory attribute -attr( mod, "Lexis" ) <- list( Exposure=xpos, Events=resp, Timescale=ts ) +# Add an explanatory attribute +attr( mod, "Lexis" ) <- list( data=nameLx, + trans=trnam, + formula=formula ) +class( mod ) <- c( "coxph.lex", class(mod) ) mod } diff -Nru r-cran-epi-2.32/R/qspr.R r-cran-epi-2.37/R/qspr.R --- r-cran-epi-2.32/R/qspr.R 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-epi-2.37/R/qspr.R 2019-04-24 18:35:27.000000000 +0000 @@ -0,0 +1,32 @@ +spread <- +function( dd, tt, rand=TRUE ) +{ +dd <- tapply(dd,tt,sum,na.rm=TRUE) +tt <- as.numeric(names(dd)) +oo <- order(tt) +dd <- dd[oo] +tt <- tt[oo] +# now d is the number of items scored with value t +dt <- diff(tt) +nt <- length(tt) +# here are the nt+1 boundaries of the nt intervala we shall use +tx <- tt[c(1,1:nt)] + c(-dt[1],dt[c(1:(nt-1),(nt-1))])/2 +# the vector to hold the spread out values of dd +dx <- numeric(sum(dd)) +cd <- c(0,cumsum(dd)) +for( i in 1:nt ) dx[(cd[i]+1):cd[i+1]] <- + if( rand ) runif(dd[i],tx[i],tx[i+1]) + else seq(tx[i],tx[i+1],,nn<-dd[i]+2)[-c(1,nn)] +return( sort(dx) ) +} +dd <- c(5,4,6,2,10) +tt <- c(1,3,4,7,11) + +( dr <- spread(dd,tt) ) +( ds <- spread(dd,tt,rand=FALSE) ) +plot(tt,dd,type="h",lwd=4,xlim=c(0,15),ylim=c(0,max(dd))) +abline(v=quantile(rep(tt,dd),1:5/6),lty=3) +points( ds, rep(1.5,length(ds)), pch=16, col="red" ) +abline(v=quantile(ds,1:5/6),col="red") +points( dr, rep(0.5,length(ds)), pch=16, col="blue" ) +abline(v=quantile(dr,1:5/6),col="blue") diff -Nru r-cran-epi-2.32/R/simLexis.R r-cran-epi-2.37/R/simLexis.R --- r-cran-epi-2.32/R/simLexis.R 2016-01-02 11:43:10.000000000 +0000 +++ r-cran-epi-2.37/R/simLexis.R 2019-04-05 13:42:04.000000000 +0000 @@ -8,7 +8,7 @@ # Makes a linear interpolation, but does not crash if all ci values are # identical, but requires that both ci and tt are non-decreasing. # ci plays the role of cumulative intensity, tt of time -if( any( diff(ci)<0 ) | any( diff(tt)<0 ) ) stop("Non-icreasing arguments") +if( any( diff(ci)<0 ) | any( diff(tt)<0 ) ) stop("Non-increasing arguments") c.u <- min( c( ci[ci>u], max(ci) ) ) c.l <- max( c( ci[ciu], max(tt) ) ) @@ -62,8 +62,11 @@ prfrm[,tS] <- prfrm[,tS] + rep(time.pts,nr) prfrm$lex.dur <- il <- min( diff(time.pts) ) # Poisson-models should use the estimated rate at the midpoint of the -# intervals: +# intervals, and have risk time equal to 1 in order to accommodate +# both poisson and poisreg families - they only produce identical +# predictions if lex.dur is 1 (i.e. offset is 0), scaling is after prediction prfrp <- prfrm +prfrp[,"lex.dur"] <- 1 prfrp[,tS] <- prfrp[,tS]+il/2 # Make a data frame with predicted rates for each of the transitions @@ -74,7 +77,7 @@ if( inherits( Tr[[cst]][[i]], "glm" ) ) rt <- cbind( rt, predict( Tr[[cst]][[i]], type="response", - newdata=prfrp ) ) + newdata=prfrp ) * il ) # scaled to interval else if( inherits( Tr[[cst]][[i]], "coxph" ) ) rt <- cbind( rt, predict( Tr[[cst]][[i]], diff -Nru r-cran-epi-2.32/R/summary.Lexis.r r-cran-epi-2.37/R/summary.Lexis.r --- r-cran-epi-2.32/R/summary.Lexis.r 2016-10-02 15:29:32.000000000 +0000 +++ r-cran-epi-2.37/R/summary.Lexis.r 2019-02-12 12:13:55.000000000 +0000 @@ -70,9 +70,8 @@ trans <- trans[1,,drop = FALSE] res <- list( Transitions = trans, Rates = rates[-nrow(rates),,drop=FALSE], - timeScales = data.frame( "time scale" = attr( object, "time.scales" ), - "time since" = attr( object, "time.since" ) ) -) + timeScales = timeSince(object) ) + if( !timeScales ) res <- res[-3] if( !Rates ) res <- res[-2] class( res ) <- "summary.Lexis" diff -Nru r-cran-epi-2.32/vignettes/auto/flup.el r-cran-epi-2.37/vignettes/auto/flup.el --- r-cran-epi-2.32/vignettes/auto/flup.el 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-epi-2.37/vignettes/auto/flup.el 2019-03-01 07:58:51.000000000 +0000 @@ -0,0 +1,23 @@ +(TeX-add-style-hook + "flup" + (lambda () + (TeX-add-to-alist 'LaTeX-provided-class-options + '(("report" "a4paper" "dvipsnames" "twoside" "12pt"))) + (TeX-run-style-hooks + "latex2e" + "topreport" + "report" + "rep12") + (TeX-add-symbols + "Title" + "Tit" + "Version" + "Dates" + "Where" + "Homepage" + "Faculty") + (LaTeX-add-labels + "fig:fu2" + "fig:Lexis-diagram" + "fig:Ins-noIns"))) + diff -Nru r-cran-epi-2.32/vignettes/fixall r-cran-epi-2.37/vignettes/fixall --- r-cran-epi-2.32/vignettes/fixall 2018-05-01 13:15:57.000000000 +0000 +++ r-cran-epi-2.37/vignettes/fixall 2019-02-15 10:04:56.000000000 +0000 @@ -2,7 +2,7 @@ # 1) run rw on simLexis and yll with eval=TRUE in the lines with # CHANGE1 in order to get a version with properly formatted # code. -# 2) Move the .pdfs to sL.pdf and yl.pdf, respectively. +# 2) Move the .tex files to sL.tex and yl.tex, and compile. # 3) Swap TRUE and FALSE in the CHANGE1/2 lines in the rnw files. # Then we are ready for the following: Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-box1.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-box1.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-box4.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-box4.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-dmL1.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-dmL1.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-dmL2.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-dmL2.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-dur-int.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-dur-int.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-dur-int-RR.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-dur-int-RR.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-Ieff.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-Ieff.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-IeffR.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-IeffR.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-ins-time.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-ins-time.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-mbox.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-mbox.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-mboxr.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-mboxr.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-nic-box.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-nic-box.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-nicL1.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-nicL1.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-nicL2.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-nicL2.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-pr-a.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-pr-a.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-pr-at-af.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-pr-at-af.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-pr-at.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-pr-at.pdf differ diff -Nru r-cran-epi-2.32/vignettes/flup.R r-cran-epi-2.37/vignettes/flup.R --- r-cran-epi-2.32/vignettes/flup.R 2018-05-03 14:34:57.000000000 +0000 +++ r-cran-epi-2.37/vignettes/flup.R 2019-05-23 08:21:03.000000000 +0000 @@ -2,7 +2,7 @@ ### Encoding: UTF-8 ################################################### -### code chunk number 1: flup.rnw:5-8 +### code chunk number 1: flup.rnw:22-25 ################################################### options( width=90, SweaveHooks=list( fig=function() @@ -10,315 +10,547 @@ ################################################### -### code chunk number 2: flup.rnw:101-103 +### code chunk number 2: flup.rnw:128-130 ################################################### library(Epi) print( sessionInfo(), l=F ) ################################################### -### code chunk number 3: flup.rnw:109-116 +### code chunk number 3: flup.rnw:139-148 ################################################### -data( nickel ) -nicL <- Lexis( entry = list( per=agein+dob, - age=agein, - tfh=agein-age1st ), - exit = list( age=ageout ), - exit.status = ( icd %in% c(162,163) )*1, - data = nickel ) +data( DMlate ) +head( DMlate ) +dmL <- Lexis( entry = list( per=dodm, + age=dodm-dobth, + tfD=0 ), + exit = list( per=dox ), + exit.status = factor( !is.na(dodth), labels=c("DM","Dead") ), + data = DMlate ) +timeScales(dmL) ################################################### -### code chunk number 4: flup.rnw:126-129 +### code chunk number 4: flup.rnw:171-173 ################################################### -str( nickel ) -str( nicL ) -head( nicL ) +str( dmL ) +head( dmL )[,1:10] ################################################### -### code chunk number 5: flup.rnw:138-139 +### code chunk number 5: flup.rnw:189-190 ################################################### -summary( nicL ) +summary.Lexis( dmL, timeScales=TRUE ) ################################################### -### code chunk number 6: nicL1 +### code chunk number 6: dmL1 ################################################### -plot( nicL ) +plot( dmL ) ################################################### -### code chunk number 7: nicL2 +### code chunk number 7: dmL2 ################################################### par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 ) -plot( nicL, 1:2, lwd=1, col=c("blue","red")[(nicL$exp>0)+1], +plot( dmL, 1:2, lwd=1, col=c("blue","red")[dmL$sex], grid=TRUE, lty.grid=1, col.grid=gray(0.7), - xlim=1900+c(0,90), xaxs="i", - ylim= 10+c(0,90), yaxs="i", las=1 ) -points( nicL, 1:2, pch=c(NA,3)[nicL$lex.Xst+1], - col="lightgray", lwd=3, cex=1.5 ) -points( nicL, 1:2, pch=c(NA,3)[nicL$lex.Xst+1], - col=c("blue","red")[(nicL$exp>0)+1], lwd=1, cex=1.5 ) + xlim=1960+c(0,60), xaxs="i", + ylim= 40+c(0,60), yaxs="i", las=1 ) +points( dmL, 1:2, pch=c(NA,3)[dmL$lex.Xst], + col="lightgray", lwd=3, cex=0.3 ) +points( dmL, 1:2, pch=c(NA,3)[dmL$lex.Xst], + col=c("blue","red")[dmL$sex], lwd=1, cex=0.3 ) +box(bty='o') ################################################### -### code chunk number 8: flup.rnw:193-196 +### code chunk number 8: flup.rnw:246-249 ################################################### -nicS1 <- splitLexis( nicL, "age", breaks=seq(0,100,10) ) -summary( nicL ) -summary( nicS1 ) +dmS1 <- splitLexis( dmL, "age", breaks=seq(0,100,5) ) +summary( dmL ) +summary( dmS1 ) ################################################### -### code chunk number 9: flup.rnw:204-205 +### code chunk number 9: flup.rnw:259-262 ################################################### -round( subset( nicS1, id %in% 8:10 ), 2 ) +wh.id <- c(9,27,52,484) +subset( dmL , lex.id %in% wh.id )[,1:10] +subset( dmS1, lex.id %in% wh.id )[,1:10] ################################################### -### code chunk number 10: flup.rnw:211-213 +### code chunk number 10: flup.rnw:268-270 ################################################### -nicS2 <- splitLexis( nicS1, "tfh", breaks=c(0,1,5,10,20,30,100) ) -round( subset( nicS2, id %in% 8:10 ), 2 ) +dmS2 <- splitLexis( dmS1, "tfD", breaks=c(0,1,2,5,10,20,30,40) ) +subset( dmS2, lex.id %in% wh.id )[,1:10] ################################################### -### code chunk number 11: flup.rnw:218-223 +### code chunk number 11: flup.rnw:275-281 ################################################### library( popEpi ) -nicM <- splitMulti( nicL, age = seq(0,100,10), - tfh = c(0,1,5,10,20,30,100) ) -summary( nicS2 ) -summary( nicM ) +dmM <- splitMulti( dmL, age = seq(0,100,5), + tfD = c(0,1,2,5,10,20,30,40), + drop = FALSE ) +summary( dmS2 ) +summary( dmM ) ################################################### -### code chunk number 12: flup.rnw:227-230 +### code chunk number 12: flup.rnw:292-295 ################################################### -identical( nicS2, nicM ) -class( nicS2 ) -class( nicM ) +identical( dmS2, dmM ) +class( dmS2 ) +class( dmM ) ################################################### -### code chunk number 13: flup.rnw:250-258 +### code chunk number 13: flup.rnw:325-335 ################################################### -timeBand( nicM, "age", "middle" )[1:20] -# For nice printing and column labelling use the data.frame() function: -data.frame( nicS2[,c("lex.id","per","age","tfh","lex.dur")], - mid.age=timeBand( nicS2, "age", "middle" ), - mid.t=timeBand( nicS2, "tfh", "middle" ), - left.t=timeBand( nicS2, "tfh", "left" ), - right.t=timeBand( nicS2, "tfh", "right" ), - fact.t=timeBand( nicS2, "tfh", "factor" ) )[1:20,] +whc <- c(names(dmL)[1:7],"dodm","doins") # WHich Columns do we want to see? +subset( dmL, lex.id %in% wh.id )[,whc] +dmC <- cutLexis( data = dmL, + cut = dmL$doins, + timescale = "per", + new.state = "Ins", + new.scale = "tfI", + precursor.states = "DM" ) +whc <- c(names(dmL)[1:8],"doins") # WHich Columns do we want to see? +subset( dmC, lex.id %in% wh.id )[,whc] ################################################### -### code chunk number 14: flup.rnw:278-279 +### code chunk number 14: flup.rnw:353-354 ################################################### -summary( (nicS2$age-nicS2$tfh) - nicS2$age1st ) +timeSince( dmC ) ################################################### -### code chunk number 15: flup.rnw:284-286 +### code chunk number 15: flup.rnw:363-370 ################################################### -summary( timeBand( nicS2, "age", "middle" ) - - timeBand( nicS2, "tfh", "middle" ) - nicS2$age1st ) +dmS2C <- cutLexis( data = dmS2, + cut = dmS2$doins, + timescale = "per", + new.state = "Ins", + new.scale = "tfI", + precursor.states = "DM" ) +subset( dmS2C, lex.id %in% wh.id )[,whc] ################################################### -### code chunk number 16: flup.rnw:308-316 +### code chunk number 16: flup.rnw:394-395 ################################################### -subset( nicL, id %in% 8:10 ) -agehi <- nicL$age1st + 50 / nicL$exposure -nicC <- cutLexis( data = nicL, - cut = agehi, - timescale = "age", - new.state = 2, - precursor.states = 0 ) -subset( nicC, id %in% 8:10 ) +summary( dmS2C, timeScales=TRUE ) ################################################### -### code chunk number 17: flup.rnw:323-331 +### code chunk number 17: box1 ################################################### -subset( nicS2, id %in% 8:10 ) -agehi <- nicS2$age1st + 50 / nicS2$exposure -nicS2C <- cutLexis( data = nicS2, - cut = agehi, - timescale = "age", - new.state = 2, - precursor.states = 0 ) -subset( nicS2C, id %in% 8:10 ) +boxes( dmC, boxpos=TRUE, scale.R=1000, show.BE=TRUE ) ################################################### -### code chunk number 18: flup.rnw:390-392 +### code chunk number 18: flup.rnw:438-446 ################################################### -( a.kn <- with( subset( nicM, lex.Xst==1 ), quantile( age+lex.dur, (1:5-0.5)/5 ) ) ) -( t.kn <- with( subset( nicM, lex.Xst==1 ), quantile( tfh+lex.dur, (1:5-0.5)/5 ) ) ) +timeBand( dmS2C, "age", "middle" )[1:10] +# For nice printing and column labelling we use the data.frame() function: +data.frame( dmS2C[,c("per","age","tfD","lex.dur")], + mid.age=timeBand( dmS2C, "age", "middle" ), + mid.t=timeBand( dmS2C, "tfD", "middle" ), + left.t=timeBand( dmS2C, "tfD", "left" ), + right.t=timeBand( dmS2C, "tfD", "right" ), + fact.t=timeBand( dmS2C, "tfD", "factor" ) )[1:15,] ################################################### -### code chunk number 19: flup.rnw:405-410 +### code chunk number 19: flup.rnw:481-482 ################################################### -ma <- glm( (lex.Xst==1) ~ Ns(age,knots=a.kn), - family = poisson, - offset = log(lex.dur), - data = nicM ) +summary( (dmS2$age-dmS2$tfD) - (dmS2$dodm-dmS2$dobth) ) + + +################################################### +### code chunk number 20: flup.rnw:487-489 +################################################### +summary( timeBand( dmS2, "age", "middle" ) - + timeBand( dmS2, "tfD", "middle" ) - (dmS2$dodm-dmS2$dobth) ) + + +################################################### +### code chunk number 21: flup.rnw:594-596 +################################################### +dmCs <- splitMulti( dmC, age = seq(0,110,1/4) ) +summary( dmCs, t=T ) + + +################################################### +### code chunk number 22: flup.rnw:618-623 +################################################### +( a.kn <- with( subset( dmCs, lex.Xst=="Dead" ), + quantile( age+lex.dur, (1:5-0.5)/5 ) ) ) +( i.kn <- c( 0, + with( subset( dmCs, lex.Xst=="Dead" & lex.Cst=="Ins" ), + quantile( tfI+lex.dur, (1:4)/5 ) ) ) ) + + +################################################### +### code chunk number 23: flup.rnw:639-644 +################################################### +ma <- glm( (lex.Xst=="Dead") ~ Ns(age,knots=a.kn), + family = poisson, + offset = log(lex.dur), + data = dmCs ) summary( ma ) ################################################### -### code chunk number 20: pr-a +### code chunk number 24: flup.rnw:663-666 +################################################### +Ma <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn), + family = poisreg, data = dmCs ) +summary( Ma ) + + +################################################### +### code chunk number 25: flup.rnw:674-676 +################################################### +Xa <- glm.Lexis( dmCs, from="DM", to="Dead", + formula = ~ Ns(age,knots=a.kn) ) + + +################################################### +### code chunk number 26: flup.rnw:679-680 +################################################### +attr( Xa, "Lexis" ) + + +################################################### +### code chunk number 27: flup.rnw:689-690 +################################################### +xa <- glm.Lexis( dmCs, formula = ~ Ns(age,knots=a.kn) ) + + +################################################### +### code chunk number 28: flup.rnw:693-694 +################################################### +c( deviance(ma), deviance(Ma), deviance(Xa), deviance(xa) ) + + +################################################### +### code chunk number 29: pr-a ################################################### nd <- data.frame( age=40:85, lex.dur=1000 ) -pr.a <- ci.pred( ma, newdata = nd ) -matplot( nd$age, pr.a, - type="l", lty=1, col=1, lwd=c(3,1,1), - log="y", xlab="Age (years)", - ylab="Lunng cancer mortality per 1000 PY") +pr.0 <- ci.pred( ma, newdata = nd ) # mortality per 100 PY +pr.a <- ci.pred( Ma, newdata = nd )*1000 # mortality per 100 PY +summary(pr.0/pr.a) +matshade( nd$age, pr.a, plot=TRUE, + type="l", lty=1, + log="y", xlab="Age (years)", + ylab="DM mortality per 1000 PY") + + +################################################### +### code chunk number 30: flup.rnw:740-744 +################################################### +pm <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn) + + lex.Cst + sex, + family=poisreg, data = dmCs ) +round( ci.exp( pm ), 3 ) + + +################################################### +### code chunk number 31: flup.rnw:758-762 +################################################### +pm <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + lex.Cst + sex, + family=poisreg, data = tsNA20(dmCs) ) + + +################################################### +### code chunk number 32: flup.rnw:768-774 +################################################### +Pm <- glm.Lexis( tsNA20(dmCs), + form = ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + lex.Cst + sex ) +c( deviance(Pm), deviance(pm) ) +identical( model.matrix(Pm), model.matrix(pm) ) + + +################################################### +### code chunk number 33: flup.rnw:780-781 +################################################### +round( ci.exp( Pm, subset="ex" ), 3 ) + + +################################################### +### code chunk number 34: ins-time +################################################### +ndI <- data.frame( expand.grid( tfI=c(NA,seq(0,15,0.1)), + ai=seq(40,80,10) ), + sex="M", + lex.Cst="Ins" ) +ndI <- transform( ndI, age=ai+tfI ) +head( ndI ) +ndA <- data.frame( age= seq(40,100,0.1), tfI=0, lex.Cst="DM", sex="M" ) +pri <- ci.pred( Pm, ndI ) * 1000 +pra <- ci.pred( Pm, ndA ) * 1000 +matshade( ndI$age, pri, plot=TRUE, las=1, + xlab="Age (years)", ylab="DM mortality per 1000 PY", + log="y", lty=1, col="blue" ) +matshade( ndA$age, pra ) + + +################################################### +### code chunk number 35: flup.rnw:818-822 +################################################### +library( survival ) +cm <- coxph( Surv(age,age+lex.dur,lex.Xst=="Dead") ~ + Ns(tfI,knots=i.kn) + lex.Cst + sex, + data = tsNA20(dmCs) ) + + +################################################### +### code chunk number 36: flup.rnw:826-829 +################################################### +Cm <- coxph.Lexis( tsNA20(dmCs), + form= age ~ Ns(tfI,knots=i.kn) + lex.Cst + sex ) +cbind( ci.exp( cm ), ci.exp( Cm ) ) + + +################################################### +### code chunk number 37: flup.rnw:838-841 +################################################### +round( cbind( ci.exp( Pm ), + rbind( matrix(NA,5,3), + ci.exp( cm )[-6,] ) ), 3 ) + + +################################################### +### code chunk number 38: Ieff +################################################### +nd <- data.frame( tfI=seq(0,15,,151), lex.Cst="Ins", sex="M" ) +nr <- data.frame( tfI= 2 , lex.Cst="Ins", sex="M" ) +ppr <- ci.exp( pm, list(nd,nr), xvars="age" ) +cpr <- ci.exp( cm, list(nd,nr) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( nd$tfI, cbind(ppr,cpr), plot=T, + lty=c(1,2), log="y", + xlab="Time since insulin (years)", ylab="Rate ratio") +abline( h=1, lty=3 ) + + +################################################### +### code chunk number 39: IeffR +################################################### +nd <- data.frame( tfI=seq(0,15,,151), lex.Cst="Ins", sex="M" ) +nr <- data.frame( tfI= 0 , lex.Cst="DM" , sex="M" ) +ppr <- ci.exp( pm, list(nd,nr), xvars="age" ) +cpr <- ci.exp( cm, list(nd,nr) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( nd$tfI, cbind(ppr,cpr), + xlab="Time since insulin (years)", + ylab="Rate ratio relative to non-Insulin", + lty=c(1,2), log="y", plot=T ) + + +################################################### +### code chunk number 40: flup.rnw:946-951 +################################################### +imx <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age ,knots=a.kn) + + Ns( tfI,knots=i.kn) + + Ns(age-tfI,knots=a.kn) + + lex.Cst + sex ) + + +################################################### +### code chunk number 41: flup.rnw:961-971 +################################################### +Im <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age ,knots=a.kn) + + Ns( tfI,knots=i.kn) + + Ns((age-tfI)*(lex.Cst=="Ins"),knots=a.kn) + + lex.Cst + sex ) +im <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age ,knots=a.kn) + + Ns( tfI,knots=i.kn) + + lex.Cst:Ns(age-tfI,knots=a.kn) + + lex.Cst + sex ) + + +################################################### +### code chunk number 42: flup.rnw:986-987 +################################################### +anova( imx, Im, im, test='Chisq') ################################################### -### code chunk number 21: flup.rnw:445-447 +### code chunk number 43: dur-int ################################################### -mat <- update( ma, . ~ . + Ns(tfh,knots=t.kn) ) -summary( mat ) +pxi <- ci.pred( imx, ndI ) +pxa <- ci.pred( imx, ndA ) +pIi <- ci.pred( Im , ndI ) +pIa <- ci.pred( Im , ndA ) +pii <- ci.pred( im , ndI ) +pia <- ci.pred( im , ndA ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind( pxi, pIi, pii)*1000, plot=T, log="y", + xlab="Age", ylab="Mortality per 1000 PY", + lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) +matshade( ndA$age, cbind( pxa, pIa, pia)*1000, + lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) ################################################### -### code chunk number 22: flup.rnw:457-458 +### code chunk number 44: dur-int-RR ################################################### -summary( nickel$age1st ) +ndR <- transform( ndI, tfI=0, lex.Cst="DM" ) +cbind( head(ndI), head(ndR) ) +Rxi <- ci.exp( imx, list(ndI,ndR) ) +Rii <- ci.exp( im , list(ndI,ndR) ) +RIi <- ci.exp( Im , list(ndI,ndR) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind( Rxi, RIi, Rii), plot=T, log="y", + xlab="Age (years)", ylab="Rate ratio vs, non-Insulin", + lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) +abline( h=1 ) +abline( h=ci.exp(imx,subset="lex.Cst")[,1], lty="25", col="blue" ) ################################################### -### code chunk number 23: flup.rnw:462-468 +### code chunk number 45: splint ################################################### -nd <- data.frame( expand.grid( age=c(20:90,NA), age1st=seq(15,45,10) ) ) -nd <- transform( nd, tfh = ifelse( age > age1st, age-age1st, NA ), - lex.dur = 1000 ) -# makes no sense to have age < age1st -nd <- transform( nd, age = ifelse( age > age1st, age, NA ) ) -head( nd ) +gm <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + lex.Cst:Ns(age,knots=a.kn):Ns(tfI,knots=i.kn) + + lex.Cst + sex ) +pgi <- ci.pred( gm, ndI ) +pga <- ci.pred( gm, ndA ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind( pgi, pii )*1000, plot=T, + lty=c("solid","21"), lend="butt", lwd=2, log="y", + xlab="Age (years)", ylab="Mortality rates per 1000 PY", + alpha=c(0.2,0.1), col=c("black","red") ) +matshade( ndA$age, cbind( pga, pia )*1000, + lty=c("solid","21"), lend="butt", lwd=2, + alpha=c(0.2,0.1), col=c("black","red") ) ################################################### -### code chunk number 24: pr-at +### code chunk number 46: RR-int ################################################### -pr.at <- ci.pred( mat, newdata = nd ) -matplot( nd$age, pr.at, - type="l", lty=1, col=1, lwd=c(3,1,1), - log="y", xlab="Age (years)", - ylab="Lunng cancer mortality per 1000 PY") +ndR <- transform( ndI, lex.Cst="DM", tfI=0 ) +iRR <- ci.exp( im, ctr.mat=list(ndI,ndR) ) +gRR <- ci.exp( gm, ctr.mat=list(ndI,ndR) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind(gRR,iRR), lty=1, log="y", plot=TRUE, + xlab="Age (years)", ylab="Rate ratio: Ins vs. non-Ins", + col=c("black","red") ) +abline( h=1 ) ################################################### -### code chunk number 25: flup.rnw:492-493 +### code chunk number 47: flup.rnw:1110-1123 ################################################### -anova( ma, mat, test="Chisq" ) +dmd <- glm.Lexis( dmCs, + from="DM", to="Dead", + formula = ~ Ns(age,knots=a.kn) + + sex ) +ind <- glm.Lexis( dmCs, + from="Ins", to="Dead", + formula = ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + Ns(age-tfI,knots=a.kn) + + sex ) +ini <- ci.pred( ind, ndI ) +dmi <- ci.pred( dmd, ndI ) +dma <- ci.pred( dmd, ndA ) ################################################### -### code chunk number 26: flup.rnw:504-508 +### code chunk number 48: sep-mort ################################################### -( f.kn <- with( subset( nicM, lex.Xst==1 ), quantile( age1st, (1:5-0.5)/5 ) ) ) -maf <- update( ma, . ~ . + Ns(age1st,knots=f.kn) ) -summary( maf ) -anova( maf, ma, mat, test="Chisq" ) +par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1,bty="n") +matshade( ndI$age, ini*1000, plot=TRUE, log="y", + xlab="Age (years)", ylab="Mortality rates per 1000 PY", + lwd=2, col="red" ) +matshade( ndA$age, dma*1000, + lwd=2, col="black" ) ################################################### -### code chunk number 27: pr-at-af +### code chunk number 49: sep-HR ################################################### -pr.af <- ci.pred( maf, newdata = nd ) -matplot( nd$age, pr.at, - type="l", lty=1, col=1, lwd=c(3,1,1), - log="y", xlab="Age (years)", - ylab="Lunng cancer mortality per 1000 PY") -matlines( nd$age, pr.af, - type="l", lty=1, col=2, lwd=c(3,0,0) ) +par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1,bty="n") +matshade( ndI$age, ci.ratio(ini,dmi), plot=TRUE, log="y", + xlab="Age (years)", ylab="RR insulin vs. no insulin", + lwd=2, col="red" ) +abline( h=1 ) ################################################### -### code chunk number 28: flup.rnw:536-547 +### code chunk number 50: flup.rnw:1169-1177 ################################################### -maft <- update( mat, . ~ . + Ns(age1st,knots=f.kn) ) -summary( maft ) -mft <- update( maft, . ~ . - Ns(age,knots=a.kn) ) -mf <- update( maf , . ~ . - Ns(age,knots=a.kn) ) -mt <- update( mat , . ~ . - Ns(age,knots=a.kn) ) -allp <- anova( maft, mat, ma, maf, mf, mft, mt, mat, - maf, maft, mft, - test="Chisq" ) -mall <- as.matrix( allp ) -cbind( mod = c("maft","mat","ma","maf","mf","mft","mt","mat","maf","maft","mft"), - round( allp[,1:5], 3 ) ) +dmCs <- cutLexis( data = dmS2, + cut = dmS2$doins, + timescale = "per", + new.state = "Ins", + new.scale = "tfI", + precursor.states = "DM", + split.states = TRUE ) +summary( dmCs ) ################################################### -### code chunk number 29: flup.rnw:562-571 +### code chunk number 51: box4 ################################################### -data( nickel ) -nicL <- Lexis( entry = list( per = agein+dob, - age = agein, - tfh = agein-age1st ), - exit = list( age = ageout ), - exit.status = ( icd > 0 ) + ( icd %in% c(162,163) ), - data = nickel ) -summary( nicL ) -subset( nicL, id %in% 8:10 ) +boxes( dmCs, boxpos=list(x=c(15,15,85,85), + y=c(85,15,85,15)), + scale.R=1000, show.BE=TRUE ) ################################################### -### code chunk number 30: flup.rnw:576-585 +### code chunk number 52: flup.rnw:1207-1215 ################################################### -nicL <- Lexis( entry = list( per = agein+dob, - age = agein, - tfh = agein-age1st ), - exit = list( age = ageout ), - exit.status = ( icd > 0 ) + ( icd %in% c(162,163) ), - data = nickel, - states = c("Alive","D.oth","D.lung") ) -summary( nicL ) -str( nicL ) +dmM <- mcutLexis( dmL, + timescale = "per", + wh = c("doins","dooad"), + new.states = c("Ins","OAD"), + new.scales = c("tfI","tfO"), + precursor.states = "DM", + ties.resolve = TRUE ) +summary( dmM, t=T ) ################################################### -### code chunk number 31: flup.rnw:597-605 +### code chunk number 53: flup.rnw:1219-1225 ################################################### -nicL$agehi <- nicL$age1st + 50 / nicL$exposure -nicC <- cutLexis( data = nicL, - cut = nicL$agehi, - timescale = "age", - new.state = "HiExp", - precursor.states = "Alive" ) -subset( nicC, id %in% 8:10 ) -summary( nicC, scale=1000 ) +wh <- c(subset(dmM,lex.Cst=="Ins-OAD")$lex.id[1:2], + subset(dmM,lex.Cst=="OAD-Ins")$lex.id[1:2]) +options( width=110 ) +print( subset( dmM, lex.id %in% wh )[,c('lex.id',names(dmM[1:8]),c("doins","dooad"))], + digits=6, row.names=FALSE ) +summary( dmM, t=T ) ################################################### -### code chunk number 32: flup.rnw:624-633 +### code chunk number 54: mbox ################################################### -nicC <- cutLexis( data = nicL, - cut = nicL$agehi, - timescale = "age", - new.state = "HiExp", - new.scale = "tfe", - split.states = TRUE, - precursor.states = "Alive" ) -subset( nicC, id %in% 8:10 ) -summary( nicC, scale=1000, timeScales=TRUE ) +boxes( dmM, boxpos=list(x=c(15,80,40,40,85,85), + y=c(50,50,90,10,90,10)), + scale.R=1000, show.BE=TRUE ) ################################################### -### code chunk number 33: nic-box +### code chunk number 55: mboxr ################################################### -boxes( nicC, boxpos = list(x=c(10,10,80,80,80,80), - y=c(75,25,87,63,13,37)), - scale.Y = 1000, - show.BE = TRUE ) +summary( dmMr <- Relevel( dmM, list('OAD+Ins'=5:6), first=FALSE) ) +boxes( dmMr, boxpos=list(x=c(15,50,15,85,85), + y=c(85,50,15,85,15)), + scale.R=1000, show.BE=TRUE ) diff -Nru r-cran-epi-2.32/vignettes/flup.rnw r-cran-epi-2.37/vignettes/flup.rnw --- r-cran-epi-2.32/vignettes/flup.rnw 2018-03-05 16:59:16.000000000 +0000 +++ r-cran-epi-2.37/vignettes/flup.rnw 2019-03-01 10:07:59.000000000 +0000 @@ -1,21 +1,16 @@ \SweaveOpts{results=verbatim,keep.source=TRUE,include=FALSE,eps=FALSE} -%\VignetteIndexEntry{Follow-up data with R and Epi} +%\VignetteIndexEntry{Follow-up data with the Lexis functions in Epi} \documentclass[a4paper,dvipsnames,twoside,12pt]{report} - -<>= -options( width=90, - SweaveHooks=list( fig=function() - par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1,bty="n") ) ) -@ % -\newcommand{\Title}{Follow-up data with \R\ and \texttt{Epi}} -\newcommand{\Tit}{Follow-up} -\newcommand{\Version}{\ } -\newcommand{\Dates}{December 2017} +\newcommand{\Title}{Follow-up data with the\\ \texttt{Lexis} functions in + \texttt{Epi}} +\newcommand{\Tit}{Follow-up with \texttt{Lexis}} +\newcommand{\Version}{Version 3} +\newcommand{\Dates}{February 2019} \newcommand{\Where}{SDCC} \newcommand{\Homepage}{\url{http://bendixcarstensen.com/} } \newcommand{\Faculty}{\begin{tabular}{rl} Bendix Carstensen - & Steno Diabetes Center, Gentofte, Denmark\\ + & Steno Diabetes Center Copenhagen, Gentofte, Denmark\\ & {\small \& Department of Biostatistics, University of Copenhagen} \\ & \texttt{b@bxc.dk} \\ @@ -24,18 +19,50 @@ \input{topreport} \renewcommand{\rwpre}{./flup} +<>= +options( width=90, + SweaveHooks=list( fig=function() + par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1,bty="n") ) ) +@ % + +\chapter*{Introduction} +\addcontentsline{toc}{chapter}{Introduction} + +This is an introduction to the \texttt{Lexis} machinery in the +\texttt{Epi} package. The machinery is intended for representation and +manipulation of follow-up data (event history data) from studies where +exact dates of events are known. It accommodates follow-up through +multiple states and on multiple time scales. + +This vignette uses an example from the \texttt{Epi} package to +illustrate the set-up of a simple \texttt{Lexis} object (a data frame +of follow-up intervals), as well as the subdivision of follow-up +intervals needed for multistate representation and analysis of +transition rates. + +The first chapter is exclusively on manipulation of the follow-up +representation, but it points to the subsequent chapter where analysis +is based on a \texttt{Lexis} representation with very small follow-up +intervals. + +Chapter 2 uses analysis of mortality rates among Danish +diabetes patients currently on insulin treatment or not to illustrate +the use of the the \texttt{Lexis} machinery. -\chapter{Follow-up data in the \texttt{Epi} package} +I owe much thanks to my colleague Lars Jorge Diaz for careful reading +and many constructive suggestions. + +\chapter{Representation of follow-up data in the \texttt{Epi} package} In the \texttt{Epi}-package, follow-up data is represented by adding some extra variables to a data frame. Such a data frame is called a \texttt{Lexis} object. The tools for handling follow-up data then use -the structure of this for special plots, tabulations etc. +the structure of this for special plots, tabulations and modeling. Follow-up data basically consists of a time of entry, a time of exit and an indication of the status at exit (normally either ``alive'' or -``dead''). Implicitly is also assumed a status \emph{during} the -follow-up (usually ``alive''). +``dead'') for each person. Implicitly is also assumed a status +\emph{during} the follow-up (usually ``alive''). \begin{figure}[htbp] \centering @@ -78,24 +105,24 @@ \label{fig:fu2} \end{figure} -\section{Timescales} +\section{Time scales} -A timescale is a variable that varies deterministicly \emph{within} +A time scale is a variable that varies deterministically \emph{within} each person during follow-up, \textit{e.g.}: \begin{itemize} \item Age \item Calendar time - \item Time since treatment + \item Time since start of treatment \item Time since relapse \end{itemize} -All timescales advance at the same pace, so the time followed is the -same on all timescales. Therefore, it suffices to use only the entry -point on each of the time scale, for example: +All time scales advance at the same pace, so the time followed is the +same on all time scales. Therefore, it will suffice to use only the +entry point on each of the time scales, for example: \begin{itemize} - \item Age at entry. - \item Date of entry. - \item Time since treatment (\emph{at} treatment this is 0). - \item Time since relapse (\emph{at} relapse this is 0).. + \item Age at entry + \item Date of entry + \item Time at treatment (\emph{at} treatment this is 0) + \item Time at relapse (\emph{at} relapse this is 0) \end{itemize} For illustration we need to load the \texttt{Epi} package: <<>>= @@ -103,137 +130,175 @@ print( sessionInfo(), l=F ) @ % In the \texttt{Epi} package, follow-up in a cohort is represented in a -\texttt{Lexis} object. A \texttt{Lexis} object is a data frame with a -bit of extra structure representing the follow-up. For the -\texttt{nickel} data we would construct a \texttt{Lexis} object by: -<<>>= -data( nickel ) -nicL <- Lexis( entry = list( per=agein+dob, - age=agein, - tfh=agein-age1st ), - exit = list( age=ageout ), - exit.status = ( icd %in% c(162,163) )*1, - data = nickel ) +\texttt{Lexis} object. As mentioned, a \texttt{Lexis} object is a data +frame with some extra structure representing the follow-up. For the +\texttt{DMlate} data --- follow-up of diabetes patients in Denmark +recording date of birth, date of diabetes, date of insulin use, date +of first oral drug use and date of death --- we can construct a +\texttt{Lexis} object by: +<<>>= +data( DMlate ) +head( DMlate ) +dmL <- Lexis( entry = list( per=dodm, + age=dodm-dobth, + tfD=0 ), + exit = list( per=dox ), + exit.status = factor( !is.na(dodth), labels=c("DM","Dead") ), + data = DMlate ) +timeScales(dmL) @ % +(The excluded persons are persons with date of diabetes equal to date +of death.) + The \texttt{entry} argument is a \emph{named} list with the entry -points on each of the timescales we want to use. It defines the names -of the timescales and the entry points of the follow-up of each +points on each of the time scales we want to use. It defines the names +of the time scales and the entry points of the follow-up of each person. The \texttt{exit} argument gives the exit time on \emph{one} -of the timescales, so the name of the element in this list must match +of the time scales, so the name of the element in this list must match one of the names of the \texttt{entry} list. This is sufficient, because the follow-up time on all time scales is the same, in this -case \texttt{ageout - agein}. Now take a look at the result: -<<>>= -str( nickel ) -str( nicL ) -head( nicL ) -@ -The \texttt{Lexis} object \texttt{nicL} has a variable for each -timescale which is the entry point on this timescale. The follow-up -time is in the variable \texttt{lex.dur} (\textbf{dur}ation). - -There is a \texttt{summary} function for \texttt{Lexis} objects that -list the number of transitions and records as well as the total -amount of follow-up time: -<<>>= -summary( nicL ) -@ -We defined the exit status to be death from lung cancer (ICD7 -162,163), i.e. this variable is 1 if follow-up ended with a death from -this cause. If follow-up ended alive or by death from another cause, -the exit status is coded 0, i.e. as a censoring. +case $\mathtt{dox}$-$\mathtt{dodm}$. +The \texttt{exit.status} is a categorical variable (a \emph{factor}) +that indicates the exit status --- in this case whether the person +(still) is in state \texttt{DM} or exits to \texttt{Dead} at the end +of follow-up. In principle we should also indicate the +\texttt{entry.status}, but the default is to assume that all persons +enter in the \texttt{first} of the mentioned \texttt{exit.state}s --- +in this case \texttt{DM}, because $\mathtt{FALSE}<\mathtt{TRUE}$. + +Now take a look at the result: +<<>>= +str( dmL ) +head( dmL )[,1:10] +@ % +The \texttt{Lexis} object \texttt{dmL} has a variable for each +time scale which is the entry point on this time scale. The follow-up +time is in the variable \texttt{lex.dur} (\texttt{dur}ation). Note that the exit status is in the variable \texttt{lex.Xst} -(e\texttt{X}it \texttt{st}atus. The variable \texttt{lex.Cst} is the +(e\texttt{X}it \texttt{st}ate. The variable \texttt{lex.Cst} is the state where the follow-up takes place (\texttt{C}urrent -\texttt{st}atus), in this case 0 (alive). +\texttt{st}ate), in this case \texttt{DM} (alive with diabetes) for +all persons. This implies that \emph{censored} observations are +characterized by having $\mathtt{lex.Cst}=\mathtt{lex.Xst}$. +There is a \texttt{summary} function for \texttt{Lexis} objects that +lists the number of transitions and records as well as the total amount +of follow-up time; it also (optionally) prints information about the names of the +variables that constitute the time scales: +<<>>= +summary.Lexis( dmL, timeScales=TRUE ) +@ % It is possible to get a visualization of the follow-up along the -timescales chosen by using the \texttt{plot} method for \texttt{Lexis} -objects. \texttt{nicL} is an object of \emph{class} \texttt{Lexis}, so +time scales chosen by using the \texttt{plot} method for \texttt{Lexis} +objects. \texttt{dmL} is an object of \emph{class} \texttt{Lexis}, so using the function \texttt{plot()} on it means that \R\ will look for the function \texttt{plot.Lexis} and use this function. -<>= -plot( nicL ) +<>= +plot( dmL ) @ % The function allows quite a bit of control over the output, and a \texttt{points.Lexis} function allows plotting of the endpoints of follow-up: -<>= +<>= par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 ) -plot( nicL, 1:2, lwd=1, col=c("blue","red")[(nicL$exp>0)+1], +plot( dmL, 1:2, lwd=1, col=c("blue","red")[dmL$sex], grid=TRUE, lty.grid=1, col.grid=gray(0.7), - xlim=1900+c(0,90), xaxs="i", - ylim= 10+c(0,90), yaxs="i", las=1 ) -points( nicL, 1:2, pch=c(NA,3)[nicL$lex.Xst+1], - col="lightgray", lwd=3, cex=1.5 ) -points( nicL, 1:2, pch=c(NA,3)[nicL$lex.Xst+1], - col=c("blue","red")[(nicL$exp>0)+1], lwd=1, cex=1.5 ) -@ % -The results of these two plotting commands are in figure \ref{fig:Lexis-diagram}. + xlim=1960+c(0,60), xaxs="i", + ylim= 40+c(0,60), yaxs="i", las=1 ) +points( dmL, 1:2, pch=c(NA,3)[dmL$lex.Xst], + col="lightgray", lwd=3, cex=0.3 ) +points( dmL, 1:2, pch=c(NA,3)[dmL$lex.Xst], + col=c("blue","red")[dmL$sex], lwd=1, cex=0.3 ) +box(bty='o') +@ % +In the above code you will note that the values of the arguments +\texttt{col} and \texttt{pch} are indexed by factors, using the +convention in \R\ that the index is taken as \emph{number of the level} +of the supplied factor. Thus \texttt{c("blue","red")[dmL\$sex]} is +\texttt{"blue"} when \texttt{sex} is \texttt{M} (the first level). + +The results of these two plotting commands are in figure +\ref{fig:Lexis-diagram}, p. \pageref{fig:Lexis-diagram}. \begin{figure}[tb] \centering +\includegraphics[width=0.35\textwidth]{flup-dmL1} +\includegraphics[width=0.63\textwidth]{flup-dmL2} +\caption{\it Lexis diagram of the \textrm{\tt DMlate} dataset; left + panel is the default version, right panel: plot with some bells and + whistles. The red lines are for women, blue for men, crosses + indicate deaths.} \label{fig:Lexis-diagram} -\includegraphics[width=0.39\textwidth]{flup-nicL1} -\includegraphics[width=0.59\textwidth]{flup-nicL2} -\caption{\it Lexis diagram of the \textrm{\tt nickel} dataset; left - panel the default version, right panel with bells and whistles. The - red lines are for persons with exposure$>0$, so it is pretty evident - that the oldest ones are the exposed part of the cohort.} \end{figure} -\chapter{Subdividing follow-up for analysis} +\section{Splitting the follow-up time along a time scale} -\section{Splitting the follow-up time along a timescale} - -The follow-up time in a cohort can be subdivided by for example -current age. This is achieved by the \texttt{splitLexis} (note that it -is \emph{not} called \texttt{split.Lexis}). This requires that the -timescale and the breakpoints on this timescale are supplied. Try: -<<>>= -nicS1 <- splitLexis( nicL, "age", breaks=seq(0,100,10) ) -summary( nicL ) -summary( nicS1 ) -@ % -So we see that the number of events and the amount of follow-up is the -same in the two data sets; only the number of records differ --- the -extra records all have \texttt{lex.Cst}=0 and \texttt{lex.Xst}=0. +In next chapter we shall conduct statistical analysis of mortality +rates, and a prerequisite for parametric analysis of rates is that +follow-up time is subdivided in smaller intervals, where we can +reasonably assume that rates are constant. + +The follow-up time in a cohort can be subdivided (``split'') along a +time scale, for example current age. This is achieved by the +\texttt{splitLexis} (note that it is \emph{not} called +\texttt{split.Lexis}). This requires that the time scale and the +breakpoints on this time scale are supplied. Try: +<<>>= +dmS1 <- splitLexis( dmL, "age", breaks=seq(0,100,5) ) +summary( dmL ) +summary( dmS1 ) +@ % +We see that the number of persons and events and the amount of +follow-up is the same in the two data sets; only the number of records +differ --- the extra records all have \texttt{lex.Cst}=\texttt{DM} and +\texttt{lex.Xst}=\texttt{DM}. To see how records are split for each individual, it is useful to list -the results for a few individuals: +the results for a few individuals (whom we selected with a view to the +illustrative usefulness): <<>>= -round( subset( nicS1, id %in% 8:10 ), 2 ) -@ % -The resulting object, \texttt{nicS1}, is again a \texttt{Lexis} -object, and so follow-up may be split further along another -timescale. Subsequently we list the results for individuals 8, 9 and -10 again: +wh.id <- c(9,27,52,484) +subset( dmL , lex.id %in% wh.id )[,1:10] +subset( dmS1, lex.id %in% wh.id )[,1:10] +@ % +The resulting object, \texttt{dmS1}, is again a \texttt{Lexis} object, +and the follow-up may be split further along another time scale, for +example diabetes duration, \texttt{tfD}. Subsequently we list the +results for the chosen individuals: <<>>= -nicS2 <- splitLexis( nicS1, "tfh", breaks=c(0,1,5,10,20,30,100) ) -round( subset( nicS2, id %in% 8:10 ), 2 ) +dmS2 <- splitLexis( dmS1, "tfD", breaks=c(0,1,2,5,10,20,30,40) ) +subset( dmS2, lex.id %in% wh.id )[,1:10] @ % A more efficient (and more intuitive) way of making this double split is to use the \texttt{splitMulti} function from the \texttt{popEpi} package: <<>>= library( popEpi ) -nicM <- splitMulti( nicL, age = seq(0,100,10), - tfh = c(0,1,5,10,20,30,100) ) -summary( nicS2 ) -summary( nicM ) -@ % +dmM <- splitMulti( dmL, age = seq(0,100,5), + tfD = c(0,1,2,5,10,20,30,40), + drop = FALSE ) +summary( dmS2 ) +summary( dmM ) +@ % +Note we used the argument \texttt{drop=FALSE} which will retain +follow-up also outside the window defined by the breaks. Otherwise the +default for \texttt{splitMulti} would be to drop follow-up outside +\texttt{age} [0,100] and \texttt{tfD} [0,40]. This clipping behaviour +is not available in \texttt{splitLexis}, nevertheless this may be +exactly what we want in some situations. + So we see that the two ways of splitting data yields the same amount of follow-up, but the results are not identical: <<>>= -identical( nicS2, nicM ) -class( nicS2 ) -class( nicM ) +identical( dmS2, dmM ) +class( dmS2 ) +class( dmM ) @ % -As we see, this is because the \texttt{nicM} object also is a +As we see, this is because the \texttt{dmM} object also is a \texttt{data.table} object; the \texttt{splitMulti} uses the \texttt{data.table} machinery which makes the splitting substantially faster --- this is of particular interest if you operate on large data -sets ($>1,000,000$ records). +sets ($>100,000$ records). Thus the recommended way of splitting follow-up time is by \texttt{splitMulti}. But you should be aware that the result is a @@ -241,125 +306,246 @@ slightly different from \texttt{data.frame}s. See the manual for \texttt{data.table}. +\section{Cutting follow up time at dates of intermediate events} + +If we have a recording of the date of a specific event as for example +recovery or relapse, we may classify follow-up time as being before or +after this intermediate event, but it requires that follow-up records +that straddle the event be cut in two and placed in separate records, +one representing follow-up \emph{before} the intermediate event, and another +representing follow-up \emph{after} the intermediate event. This is +achieved with the function \texttt{cutLexis}, which takes three +arguments: the time point of the intermediate event, the time scale +that this point refers to, and the value of the (new) state following +the date. Optionally, we may also define a new time scale with the +argument \texttt{new.scale=}. + +We are interested in the time before and after inception of insulin +use, which occurs at the date \texttt{doins}: +<<>>= +whc <- c(names(dmL)[1:7],"dodm","doins") # WHich Columns do we want to see? +subset( dmL, lex.id %in% wh.id )[,whc] +dmC <- cutLexis( data = dmL, + cut = dmL$doins, + timescale = "per", + new.state = "Ins", + new.scale = "tfI", + precursor.states = "DM" ) +whc <- c(names(dmL)[1:8],"doins") # WHich Columns do we want to see? +subset( dmC, lex.id %in% wh.id )[,whc] +@ % +(The \texttt{precursor.states=} argument is explained below). + +Note that the process of cutting time is simplified by having all +types of events referred to the calendar time scale. This is a +generally applicable advice in handling follow-up data: Get all event +times as \emph{dates}, location of events and follow-up on other time +scales can then easily be derived from this. + +Note +that individual 52 has had his follow-up cut at 6.55 years from +diabetes diagnosis and individual 484 at 5.70 years from diabetes +diagnosis. This dataset could then be split along the time scales as we +did before with \texttt{dmL}. + +We can see which of the time scales that are defined as time since +entry into an intermediate state: +<<>>= +timeSince( dmC ) +@ % +The names of the vector are the time scales; each element is the name +of the state entry into which defines the origin of the time +scale. This resulting \texttt{Lexis} object can then be split along +one or more time scales. + +The result of this can however also be achieved by cutting the split +dataset \texttt{dmS2} instead of \texttt{dmL}: +<<>>= +dmS2C <- cutLexis( data = dmS2, + cut = dmS2$doins, + timescale = "per", + new.state = "Ins", + new.scale = "tfI", + precursor.states = "DM" ) +subset( dmS2C, lex.id %in% wh.id )[,whc] +@ % +Thus it does not matter in which order we use \texttt{splitLexis} and +\texttt{cutLexis}. Mathematicians would say that \texttt{splitLexis} +and \texttt{cutLexis} are commutative. + +Note in \texttt{lex.id}=484, that follow-up subsequent to the event is +classified as being in state \texttt{Ins}, but that the final +transition to state \texttt{Dead} is preserved. This is the point of +the \texttt{precursor.states=} argument. It names the states (in this +case \texttt{DM}) that will be over-written by \texttt{new.state} (in +this case \texttt{Ins}), while the state \texttt{Dead} should not be +updated even if it is after the time where the persons moves to state +\texttt{Ins}. In other words, only state \texttt{DM} is a precursor to +state \texttt{Ins}, state \texttt{Dead} is always subsequent to state +\texttt{Ins}. + +Note that we defined a new time scale, \texttt{tfI}, using the argument +\texttt{new.scale=tfI}. This has a special status relative to the other +three time scales, it is defined as time since entry into a state, +namely \texttt{Ins}, this is noted in the time scale part of the +summary of \texttt{Lexis} object --- the information sits in the +attribute \texttt{time.since} of the \texttt{Lexis} object, which can +be accessed by the function \texttt{timeSince()} or through the \texttt{summary()}: +<<>>= +summary( dmS2C, timeScales=TRUE ) +@ % +Finally we can get a quick overview of the states and transitions by +using \texttt{boxes} --- \texttt{scale.R} scales transition rates to +rates per 1000 PY: +<>= +boxes( dmC, boxpos=TRUE, scale.R=1000, show.BE=TRUE ) +@ % +\insfig{box1}{0.8}{States, person years, transitions and rates in the + cut dataset. The numbers \emph{in} the boxes are person-years and + the number of persons \texttt{B}eginning, resp. \texttt{E}nding + their follow-up in each state (triggered by \textrm{\tt + show.BE=TRUE}). The numbers at the arrows are the number of + transitions and transition rates per 1000 (triggered by \textrm{\tt + scale.R=1000}).} + +\chapter{Modeling rates from \texttt{Lexis} objects} + +\section{Covariates} + +In the dataset \texttt{dmS2C} there are three types of covariates that +can be used to describe mortality rates: +\begin{enumerate} +\item time-dependent covariates +\item time scales +\item fixed covariates +\end{enumerate} + +There is only one time-dependent covariate here, namely +\texttt{lex.Cst}, the current state of the person's follow up; it +takes the values \texttt{DM} and \texttt{Ins} according to whether the +person has ever purchased insulin at a given time of follow-up. + +The time-scales are obvious candidates for explanatory variables for +the rates, notably age and time from diagnosis (duration of diabetes) +and insulin. + \subsection{Time scales as covariates} -If we want to model the effect of these timescale variables on +If we want to model the effect of the time scale variables on occurrence rates, we will for each interval use either the value of the left endpoint in each interval or the middle. There is a function \texttt{timeBand} which returns either of these: <<>>= -timeBand( nicM, "age", "middle" )[1:20] -# For nice printing and column labelling use the data.frame() function: -data.frame( nicS2[,c("lex.id","per","age","tfh","lex.dur")], - mid.age=timeBand( nicS2, "age", "middle" ), - mid.t=timeBand( nicS2, "tfh", "middle" ), - left.t=timeBand( nicS2, "tfh", "left" ), - right.t=timeBand( nicS2, "tfh", "right" ), - fact.t=timeBand( nicS2, "tfh", "factor" ) )[1:20,] -@ % -Note that these are characteristics of the intervals defined by -\texttt{breaks=}, \emph{not} the midpoints nor left or right endpoints -of the actual follow-up intervals (which would be \texttt{tfh} and -\texttt{tfh+lex.dur}, respectively). +timeBand( dmS2C, "age", "middle" )[1:10] +# For nice printing and column labelling we use the data.frame() function: +data.frame( dmS2C[,c("per","age","tfD","lex.dur")], + mid.age=timeBand( dmS2C, "age", "middle" ), + mid.t=timeBand( dmS2C, "tfD", "middle" ), + left.t=timeBand( dmS2C, "tfD", "left" ), + right.t=timeBand( dmS2C, "tfD", "right" ), + fact.t=timeBand( dmS2C, "tfD", "factor" ) )[1:15,] +@ % +Note that the values of these functions are characteristics of the +intervals defined by \texttt{breaks=}, \emph{not} the midpoints nor +left or right endpoints of the actual follow-up intervals (which would +be \texttt{tfD} and \texttt{tfD+lex.dur}, respectively). -These functions are intended for modeling timescale variables as +These functions are intended for modeling time scale variables as factors (categorical variables) in which case the coding must be independent of the censoring and mortality pattern --- it should only -depend on the chosen grouping of the timescale. Modeling timescales as +depend on the chosen grouping of the time scale. Modeling time scales as \emph{quantitative} should not be based on these codings but directly -on the values of the time-scale variables. +on the values of the time-scale variables, notably the left endpoints +of the intervals. \subsection{Differences between time scales} -The midpoint (as well as the left and right interval endpoint) should -be used with caution if the variable \texttt{age1st} is modeled too; -the age at hire is logically equal to the difference between current -age (\texttt{age}) and time since hire (\texttt{thf}): +Apparently, the only fixed variable is \texttt{sex}, but formally the +dates of birth (\texttt{dobth}), diagnosis (\texttt{dodm}) and first +insulin purchase (\texttt{doins}) are fixed covariates too. They can +be constructed as origins of time scales referred to the calendar time +scale. Likewise, and possibly of greater interest, we can consider +these origins on the age scale, calculated as the difference between +age and another time scale. + +These would then be age at birth (hardly relevant), age at diabetes +diagnosis and age at insulin treatment. + +\subsection{Keeping the relation between time scales} + +The midpoint (as well as the right interval endpoint) should be used +with caution if the variable age at diagnosis \texttt{dodm-dobth} is +modeled too; the age at diabetes is logically equal to the difference +between current age (\texttt{age}) and time since diabetes diagnosis +(\texttt{tfD}): +<<>>= +summary( (dmS2$age-dmS2$tfD) - (dmS2$dodm-dmS2$dobth) ) +@ % +This calculation refers to the \emph{start} of each interval --- which +are in the time scale variables in the \texttt{Lexis} object. But when +using the middle of the intervals, this relationship is not preserved: <<>>= -summary( (nicS2$age-nicS2$tfh) - nicS2$age1st ) +summary( timeBand( dmS2, "age", "middle" ) - + timeBand( dmS2, "tfD", "middle" ) - (dmS2$dodm-dmS2$dobth) ) @ % -This calculation refer to the \emph{start} of each interval --- the -time scale variables in the \texttt{Lexis} object. But when using the -middle of the intervals, this relationship is not preserved: -<<>>= -summary( timeBand( nicS2, "age", "middle" ) - - timeBand( nicS2, "tfh", "middle" ) - nicS2$age1st ) -@ % -If all three variable are to be included in a model, you must make +If all three variables are to be included in a model, we must make sure that the \emph{substantial} relationship between the variables be -maintained. One way is to recompute age at first hire from the two +maintained. One way is to recompute age at diabetes diagnosis from the two midpoint variables, but more straightforward would be to use the left endpoint of the intervals, that is the time scale variables in the -\texttt{Lexis} object. The latter approach however requires that the -follow-up is split in fairly small chunks. - -\section{Cutting follow up time at a specific date} - -If we have a recording of the date of a specific event as for example -recovery or relapse, we may classify follow-up time as being before or -after this intermediate event, but it requires that follow-up records -that straddle the event be cut into two record. This is achieved with -the function \texttt{cutLexis}, which takes three arguments: the time -point, the timescale, and the value of the (new) state following the -date. - -Now we define the age for the nickel workers where the cumulative -exposure exceeds 50 exposure years: -<<>>= -subset( nicL, id %in% 8:10 ) -agehi <- nicL$age1st + 50 / nicL$exposure -nicC <- cutLexis( data = nicL, - cut = agehi, - timescale = "age", - new.state = 2, - precursor.states = 0 ) -subset( nicC, id %in% 8:10 ) -@ % -(The \texttt{precursor.states=} argument is explained below). Note -that individual 6 has had his follow-up split at 25 years since hire -where 50 exposure-years were attained. This could also have been -achieved in the split dataset \texttt{nicS2} instead of \texttt{nicL}, -try: -<<>>= -subset( nicS2, id %in% 8:10 ) -agehi <- nicS2$age1st + 50 / nicS2$exposure -nicS2C <- cutLexis( data = nicS2, - cut = agehi, - timescale = "age", - new.state = 2, - precursor.states = 0 ) -subset( nicS2C, id %in% 8:10 ) -@ % -The same results would have emerged if we had used the \texttt{nicM} -dataset (the \texttt{data.table} object). Mathematicians would say -that \texttt{splitLexis} and \texttt{cutLexis} are commutative. - -Note that follow-up subsequent to the event is classified as being in -state 2, but that the final transition to state 1 (death from lung -cancer) is preserved. This is the point of the -\texttt{precursor.states=} argument. It names the states (in this case -0, ``Alive'') that will be over-written by \texttt{new.state} (in this -case state 2, ``High exposure''), while state 1 (``Dead'') should not -be updated even if it is after the time where the persons moves to -state 2. In other words, only state 0 is a precursor to state 2, state -1 is always subsequent to state 2. Even if you at a high exposure -level, death is pretty final. - -If the intermediate event is to be used as a time-dependent variable -in a Cox-model, then \texttt{lex.Cst} should be used as the -time-dependent variable, and \texttt{lex.Xst==1} as the event. - -\chapter{Modeling rates} +\texttt{Lexis} object. -\section{Background} - -The purpose of subdividing follow-up data is to be able to model the -effects of the time scale variables as parametric functions. +If we dissolve the relationship between the variables \texttt{age}, +\texttt{tfD} and age at diagnosis by grouping we may obtain +identifiability of the three separate effects, but it will be at the +price of an arbitrary allocation of a linear trend between them. + +For the sake of clarity, consider current age, $a$, duration of +disease, $d$ and age at diagnosis $e$, where +\[ + \text{current age} = + \text{age at diagnosis} + + \text{disease duration}, + \quad \text{\ie} \quad a=e+d \quad + \Leftrightarrow \quad e+d-a=0 +\] +If we model the effect of the quantitative variables $a$, $e$ and $d$ +on the log-rates by three functions $f$, $g$ and $h$: +$ \log(\lambda)=f(a)+g(d)+h(e) $ +then for any $\kappa$: +\begin{align*} + \log(\lambda) & = f(a)+g(d)+h(e)+\kappa(e+d-a)\\ + & = + \big(f(a)-\kappa a \big)+ + \big(g(d)+\kappa d \big)+ + \big(h(e)+\kappa e \big) \\ +& = \tilde f(a)+ \tilde g(d)+ \tilde h(e) +\end{align*} +In practical modeling this will turn op as a singular model matrix +with one parameter aliased, corresponding to some arbitrarily chosen +value of $\kappa$ (depending on software conventions for singular +models). This is well known from age-period-cohort models. + +Thus we see that we can move any slope around between the three terms, +so if we achieve identifiability by using grouping of one of the +variables we will in reality have settled for a particular value of +$\kappa$, without known why we chose just that. The solution is to +resort to predictions which are independent of the particular +parametrization or choose a particular parametrization with explicit constraints. + +\section{Modeling of rates} + +As mentioned, the purpose of subdividing follow-up data in smaller +intervals is to be able to model effects of time scale variables as +parametric functions. When we split along a time scale we can get +intervals that are as small as we want; if they are sufficiently +small, an assumption of constant rates in each interval becomes +reasonable. In a model that assumes a constant occurrence rate in each of the intervals the likelihood contribution from each interval is the same as the likelihood contribution from a Poisson variate $D$, say, with -mean $\lambda\ell$ where $\lambda$ is the rate and $\ell$ is the +mean $\lambda \ell$ where $\lambda$ is the rate and $\ell$ is the interval length, and where the value of the variate $D$ is 1 or 0 according to whether an event has occurred or not. Moreover, the likelihood contributions from all follow-up intervals from a single @@ -367,319 +553,823 @@ survived till the start of the interval in question). This implies that the total contribution to the likelihood from a single person is a product of terms, and hence the same as the likelihood of a number -of independent Poisson terms, one from each interval. +of independent Poisson terms, one from each interval. -Parametric modeling of the rates is obtained by using the \emph{value} -of the timescale for each interval as quantitative explanatory -variables, using for example splines. Thus the model will be one where -the rate is assumed constant in each interval, but where a parametric -form of the \emph{size} of the rate in each interval is imposed by the -model, using the timescale as a covariate. - -\section{Practicalities} - -In the nickel worker study we might want to look at the effects of age -and time since hire. If we want to use splines we must allocate knots -for anchoring the splines at each of the time scales, either by some +Note that variables are neither Poisson distributed (\eg they can only +ever assume values 0 or 1) nor independent --- it is only the +likelihood for the follow-up data that happens to be the same as the +likelihood from independent Poisson variates. Different models can +have the same likelihood, a model cannot be inferred from the +likelihood. + +Parametric modeling of the rates is obtained by using the +\emph{values} of the time scales for each interval as \emph{quantitative} +explanatory variables, using for example splines. And of course also +the values of the fixed covariates and the time-dependent variables +for each interval. Thus the model will be one where the rate is +assumed constant in each (small) interval, but where a parametric form +of the \emph{size} of the rate in each interval is imposed by the +model, using the time scale as a quantitative covariate. + +\subsection{Interval length} + +In the first chapter we illustrated cutting and splitting by listing +the results for a few individuals across a number of intervals. For +illustrational purposes we used 5-year age bands to avoid excessive +listings, but since the doubling time for mortality on the age scale +is only slightly larger than 5 years, the assumption about constant +rates in each interval would be pretty far fetched if we were to use 5 +year intervals. + +Thus, for modeling purposes we split the follow-up in 3 month +intervals. When we use intervals of 3 month's length it is superfluous +to split along multiple time scales --- the precise location of +tightly spaced splits will be irrelevant from any practical point of +view. \texttt{splitLexis} and \texttt{splitMulti} will allocate the +actual split times for all of the time scale variables, so these can be +used directly in modeling. + +So we split the cut dataset in 3-month intervals along the age +scale: +<<>>= +dmCs <- splitMulti( dmC, age = seq(0,110,1/4) ) +summary( dmCs, t=T ) +@ % +We see that we now have 228,748 records and 9996 persons, so about 23 +records per person. The total risk time is 54,275 years, a bit less +than 3 months per record as expected. + +\subsection{Practicalities for splines} + +In this study we want to look at how mortality depend on age +(\texttt{age}) and time since start of insulin use (\texttt{tfI}). If +we want to use splines in the description we must allocate knots for +anchoring the splines at each of the time scales, either by some \textit{ad hoc} method or by using some sort of penalized splines. The -letter will not be treated here. +latter will not be treated here; it belongs in the realm of the +\texttt{mgcv} package. Here we shall use the former approach and allocate 5 knots on each of -the two time-scales. We allocate knots so that we have the event evenly -distributed between the knots: -<<>>= -( a.kn <- with( subset( nicM, lex.Xst==1 ), quantile( age+lex.dur, (1:5-0.5)/5 ) ) ) -( t.kn <- with( subset( nicM, lex.Xst==1 ), quantile( tfh+lex.dur, (1:5-0.5)/5 ) ) ) -@ % -In the \texttt{Epi} package there is a convenience wrapper for the -\texttt{n}atural \texttt{s}pline generator \texttt{ns}, \texttt{Ns}, -that takes the smallest and the largest of a set of supplied knots to -be the boundary knots. - -\section{Models for rates} - -\subsection{One time scale} - -A model that only models lung cancer mortality -rates as a function of age would then be: -<<>>= -ma <- glm( (lex.Xst==1) ~ Ns(age,knots=a.kn), - family = poisson, - offset = log(lex.dur), - data = nicM ) +the time-scales. We allocate knots so that we have the events evenly +distributed between the knots. Since the insulin state starts at 0 for +all individuals we include 0 as the first knot, such that any set of natural +splines with these knots will have the value 0 at 0 on the time +scale. +<<>>= +( a.kn <- with( subset( dmCs, lex.Xst=="Dead" ), + quantile( age+lex.dur, (1:5-0.5)/5 ) ) ) +( i.kn <- c( 0, + with( subset( dmCs, lex.Xst=="Dead" & lex.Cst=="Ins" ), + quantile( tfI+lex.dur, (1:4)/5 ) ) ) ) +@ % +In the \texttt{Epi} package there is a convenience wrapper, +\texttt{Ns}, for the \texttt{n}atural \texttt{s}pline generator +\texttt{ns}, that takes the smallest and the largest of a set of +supplied knots to be the boundary knots, so the explicit definition of +the boundary knots becomes superfluous. + +Note that it is a feature of the \texttt{Ns} (via the features of +\texttt{ns}) that any generated spline function is 0 at the leftmost +knot. + +\subsection{Poisson model} + +A model that describes mortality rates as only a function of age would +then be: +<<>>= +ma <- glm( (lex.Xst=="Dead") ~ Ns(age,knots=a.kn), + family = poisson, + offset = log(lex.dur), + data = dmCs ) summary( ma ) @ % The offset, \texttt{log(lex.dur)} comes from the fact that the -likelihood for the follow-up data is the same as that for independent -Poisson variates with mean $\lambda\ell$, and that the default link -function for the Poisson family is the log, so that we are using a -linear model for the log-mean, that is $\log(\lambda) + \log(\ell)$. -But when we want a model for the log-rate ($\log(\lambda)$), the term -$\log(\ell)$ must be included as a covariate with regression -coefficient fixed to 1; a so-called offset. +likelihood for the follow-up data during $\ell$ time is the same as +that for independent Poisson variates with mean $\lambda \ell$, and +that the default link function for the Poisson family is the log, so +that we are using a linear model for the log-mean, +$\log(\lambda) + \log(\ell)$. But when we want a model for the +log-rate ($\log(\lambda)$), the term $\log(\ell)$ must still be +included as a covariate, but with regression coefficient fixed to 1; a +so-called \emph{offset}. This is however a technicality; it just +exploits that the likelihood of a particular Poisson model and that of +the rates model is the same. + +In the \texttt{Epi} package is a \texttt{glm} family, \texttt{poisreg} +that has a more intuitive interface, where the response is a 2-column +matrix of events and person-time, respectively. This is in concert +with the fact that the outcome variable in follow-up studies is +bivariate: (event, risk time). +<<>>= +Ma <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn), + family = poisreg, data = dmCs ) +summary( Ma ) +@ % +Exploiting the multistate structure in the \texttt{Lexis} object +there is a multistate convenience wrapper for \texttt{glm} with the +\texttt{poisreg} family, that just requires specification of the +transitions in terms of \texttt{from} and \texttt{to}. Although it is +called \texttt{glm.Lexis} it is \emph{not} an S3 method for +\texttt{Lexis} objects: +<<>>= +Xa <- glm.Lexis( dmCs, from="DM", to="Dead", + formula = ~ Ns(age,knots=a.kn) ) +@ % +The result is a \texttt{glm} object but with an extra attribute, \texttt{Lexis}: +<<>>= +attr( Xa, "Lexis" ) +@ % +There are similar wrappers for \texttt{gam} and \texttt{coxph} models, +\texttt{gam.Lexis} and \texttt{coxph.Lexis}, but these will not be +elaborated in detail. + +The \texttt{from=} and \texttt{to=} can even be omitted, in which case +all possible transitions \emph{into} any of the absorbing states is +modeled: +<<>>= +xa <- glm.Lexis( dmCs, formula = ~ Ns(age,knots=a.kn) ) +@ +We can check if the four models fitted are the same: +<<>>= +c( deviance(ma), deviance(Ma), deviance(Xa), deviance(xa) ) +@ % +Oops! the model \texttt{Xa} is apparently not the same as the other +three? This is because the explicit specification +\verb|from="DM", to="Dead"|, omits modeling contributions from the +$\mathtt{Ins}\rightarrow\mathtt{Dead}$ transition --- the output +actually said so. The other three models all use both transitions --- +and assume that the two transition rates are the same, \ie that start +of insulin has no effect on mortality. We shall relax this assumption +later. The parameters from the model do not have any direct interpretation -\textit{per se}, but we can compute the estimated lung cancer incidence -rates for a range of ages using \texttt{ci.pred} with a suitably -defined prediction data frame. Note that we must specify all -covariates in the model, also the variable in the offset, -\texttt{lex.dur}. We set the latter to 1000, because we want the lung -cancer mortality rates per 1000 PY. By default \texttt{ci.pred} yields a -prediction on the response-scale, that is the rate-scale: +\textit{per se}, but we can compute the estimated mortality rates for +a range of ages using \texttt{ci.pred} with a suitably defined +prediction data frame. + +Note that if we use the \texttt{poisson} family of models, we must +specify all covariates in the model, including the variable in the +offset, \texttt{lex.dur} (remember that this was a covariate with +coefficient fixed at 1). We set the latter to 1000, because we want the +mortality rates per 1000 person-years. Using the \texttt{poisreg} +family, the prediction will ignore any value of \texttt{lex.dur} +specified in the prediction data frame, the returned rates will be per +unit in which \texttt{lex.dur} is recorded. <>= nd <- data.frame( age=40:85, lex.dur=1000 ) -pr.a <- ci.pred( ma, newdata = nd ) -matplot( nd$age, pr.a, - type="l", lty=1, col=1, lwd=c(3,1,1), - log="y", xlab="Age (years)", - ylab="Lunng cancer mortality per 1000 PY") -@ % -\insfig{pr-a}{0.8}{Lung cancer mortality among Nickel smelter workers - by age. We see that the rates increase till about 55 years, and from -then on is approximately flat.} - -\subsection{More time scales} - -There may however also be an effect of time since hire too, so we can -add this term to the model: -<<>>= -mat <- update( ma, . ~ . + Ns(tfh,knots=t.kn) ) -summary( mat ) -@ % -This model has two time scales, age and time since hire, so it makes -little sense to report the effect of age for a \emph{fixed} value of -time since hire --- the time since hire increases by age. -Instead we can show the mortality rates for persons hired at different -ages, and report the \emph{joint} effect of increasing age and time -since hire. +pr.0 <- ci.pred( ma, newdata = nd ) # mortality per 100 PY +pr.a <- ci.pred( Ma, newdata = nd )*1000 # mortality per 100 PY +summary(pr.0/pr.a) +matshade( nd$age, pr.a, plot=TRUE, + type="l", lty=1, + log="y", xlab="Age (years)", + ylab="DM mortality per 1000 PY") +@ % +\insfig{pr-a}{0.8}{Mortality among Danish diabetes patients by age + with 95\% CI as shaded area. We see that the rates increase linearly + on the log-scale, that is exponentially by age.} + +\section{Time dependent covariate} + +A Poisson model approach to mortality by insulin status, would be to +assume that the rate-ratio between patients on insulin and not on +insulin is a fixed quantity, independent of time since start of insulin, +independent of age. This is commonly termed a proportional hazards +assumption, because the rates (hazards) in the two groups are +proportional along the age (baseline time) scale. +<<>>= +pm <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn) + + lex.Cst + sex, + family=poisreg, data = dmCs ) +round( ci.exp( pm ), 3 ) +@ % +So we see that persons on insulin have about twice the mortality of +persons not on insulin and that women have 2/3 the mortality of men. + +\subsection{Time since insulin start} + +If we want to test whether the excess mortality depends on the time +since start if insulin treatment, we can add a spline terms in +\texttt{tfI}. But since \texttt{tfI} is a time scale defined as time +since entry into a new state (\texttt{Ins}), the variable \texttt{tfI} +will be missing for those in the \texttt{DM} state, so before modeling +we must set the \texttt{NA}s to 0, which we do with \texttt{tsNA20} +(acronym for \texttt{t}ime\texttt{s}cale \texttt{NA}s to zero): +<<>>= +pm <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + lex.Cst + sex, + family=poisreg, data = tsNA20(dmCs) ) +@ % +As noted before we could do this simpler with \texttt{glm.Lexis}, even +without the \texttt{from=} and \texttt{to=} arguments, because we are +modeling all transitions \emph{into} the absorbing state +(\texttt{Dead}): +<<>>= +Pm <- glm.Lexis( tsNA20(dmCs), + form = ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + lex.Cst + sex ) +c( deviance(Pm), deviance(pm) ) +identical( model.matrix(Pm), model.matrix(pm) ) +@ % +The coding of the effect of \texttt{tfI} is so that the value is 0 at +0, so the meaning of the estimate of the effect of \texttt{lex.Cst} is +the RR between persons with and without insulin, immediately after +start of insulin: +<<>>= +round( ci.exp( Pm, subset="ex" ), 3 ) +@ % +We see that the effect of sex is pretty much the same as before, but +the effect of \texttt{lex.Cst} is much larger, it now refers to a +different quantity, namely the RR at \texttt{tfI}=0. If we want to see +the effect of time since insulin, it is best viewed jointly with the +effect of age: +<>= +ndI <- data.frame( expand.grid( tfI=c(NA,seq(0,15,0.1)), + ai=seq(40,80,10) ), + sex="M", + lex.Cst="Ins" ) +ndI <- transform( ndI, age=ai+tfI ) +head( ndI ) +ndA <- data.frame( age= seq(40,100,0.1), tfI=0, lex.Cst="DM", sex="M" ) +pri <- ci.pred( Pm, ndI ) * 1000 +pra <- ci.pred( Pm, ndA ) * 1000 +matshade( ndI$age, pri, plot=TRUE, las=1, + xlab="Age (years)", ylab="DM mortality per 1000 PY", + log="y", lty=1, col="blue" ) +matshade( ndA$age, pra ) +@ % +\insfig{ins-time}{0.8}{Mortality rates of persons on insulin, starting +insulin at ages 40,50,\ldots,80 (blue), compared with persons not on +insulin (black curve). Shaded areas are 95\% CI.} + +In figure \ref{fig:ins-time}, p. \pageref{fig:ins-time}, we see that +mortality is high just after insulin start, but falls by almost a +factor 3 during the first year. Also we see that there is a tendency +that mortality in a given age is smallest for those with the longest +duration of insulin use. + +\section{The Cox model} + +Note that in the Cox-model the age is used as response variable, +slightly counter-intuitive. Hence the age part of the linear predictors +is not in that model: +<<>>= +library( survival ) +cm <- coxph( Surv(age,age+lex.dur,lex.Xst=="Dead") ~ + Ns(tfI,knots=i.kn) + lex.Cst + sex, + data = tsNA20(dmCs) ) +@ % +There is also a multistate wrapper for Cox models, requiring a +l.h.s. side for the \texttt{formula=} argument: +<<>>= +Cm <- coxph.Lexis( tsNA20(dmCs), + form= age ~ Ns(tfI,knots=i.kn) + lex.Cst + sex ) +cbind( ci.exp( cm ), ci.exp( Cm ) ) +@ % +We can compare the estimates from the Cox model with those from the +Poisson model --- we must add \texttt{NA}s because the Cox-model does +not give the parameters for the baseline time scale (\texttt{age}), but +also remove one of the parameters, because \texttt{coxph} parametrizes +factors (in this case \texttt{lex.Cst}) by all defined levels and not +only by the levels present in the dataset at hand (note the line of +\texttt{1.0000000}s in the print above): +<<>>= +round( cbind( ci.exp( Pm ), + rbind( matrix(NA,5,3), + ci.exp( cm )[-6,] ) ), 3 ) +@ % +Thus we see that the Poisson and Cox gives pretty much the same +results. You may argue that Cox requires a smaller dataset, because +there is no need to subdivide data in small intervals \emph{before} +insulin use. But certainly the time \emph{after} insulin inception need +to be if the effect of this time should be modeled. + +The drawback of the Cox-modeling is that it is not possible to show +the absolute rates as we did in the graph above. + +\section{Marginal effect of time since insulin} + +When we plot the marginal effect of \texttt{tfI} from the two models +we get pretty much the same; here we plot the RR relative to +\texttt{tfI}=2 years. Note that we are deriving the RR as the ratio of +two sets of predictions, from the data frames \texttt{nd} and +\texttt{nr} --- for further details consult the help page for +\texttt{ci.lin}, specifically the use of a list as the +\texttt{ctr.mat} argument: +<>= +nd <- data.frame( tfI=seq(0,15,,151), lex.Cst="Ins", sex="M" ) +nr <- data.frame( tfI= 2 , lex.Cst="Ins", sex="M" ) +ppr <- ci.exp( pm, list(nd,nr), xvars="age" ) +cpr <- ci.exp( cm, list(nd,nr) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( nd$tfI, cbind(ppr,cpr), plot=T, + lty=c(1,2), log="y", + xlab="Time since insulin (years)", ylab="Rate ratio") +abline( h=1, lty=3 ) +@ % +\insfig{Ieff}{0.8}{The naked duration effects relative to 2 years of + duration, black from Poisson model, red from Cox model. The two sets + of estimates are identical, and so are the standard errors, so the + two shaded areas overlap almost perfectly.} + +In figure \ref{fig:Ieff}, p. \pageref{fig:Ieff}, we see that the +duration effect is exactly the same from the two modeling approaches. + +We will also want the RR relative to the non-insulin users --- recall these +are coded 0 on the \texttt{tfI} variable: +<>= +nd <- data.frame( tfI=seq(0,15,,151), lex.Cst="Ins", sex="M" ) +nr <- data.frame( tfI= 0 , lex.Cst="DM" , sex="M" ) +ppr <- ci.exp( pm, list(nd,nr), xvars="age" ) +cpr <- ci.exp( cm, list(nd,nr) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( nd$tfI, cbind(ppr,cpr), + xlab="Time since insulin (years)", + ylab="Rate ratio relative to non-Insulin", + lty=c(1,2), log="y", plot=T ) +@ % +\insfig{IeffR}{0.8}{Insulin duration effect (state \textrm{\tt Ins}) + relative to no insulin (state \textrm{\tt DM}), black from Poisson + model, red from Cox model. The \emph{shape} is the same as the + previous figure, but the RR is now relative to non-insulin, instead + of relative to insulin users at 2 years duration. The two sets of + estimates are identical, and so are the standard errors, so the two + shaded areas overlap almost perfectly.} + +In figure \ref{fig:IeffR}, p. \pageref{fig:IeffR}, we see the effect +of increasing duration of insulin use \emph{for a fixed age} which is +a bit artificial, so we would like to see the \emph{joint} effects of +age and insulin duration. What we cannot see is how the duration +affects mortality relative to \texttt{current} age (at the age +attained at the same time as the attained \texttt{tfI}). + +Another way of interpreting this curve is as the rate ratio relative to a +person not on insulin, so we see that the RR (or hazard ratio, HR as +some call it) is over 5 at the start of insulin (the \texttt{lex.Cst} +estimate), and decreases to about 1.5 in the long term. + +Both figure \ref{fig:Ieff} and \ref{fig:IeffR} indicate a declining +RR by insulin duration, but only from figure \ref{fig:ins-time} it is +visible that mortality actually is \emph{in}creasing by age after some +2 years after insulin start. This point would not be available if we +had only fitted a Cox model where we did not have access to the +baseline hazard as a function of age. + +\section{Age$\times$duration interaction} + +The model we fitted assumes that the RR is the same regardless of the +age at start of insulin --- the hazards are multiplicative. Sometimes +this is termed the proportional hazards assumption: For \emph{any} +fixed age the HR is the same as a function of time since insulin, and +vice versa. + +A more correct term would be ``main effects model'' --- there is no +interaction between age (the baseline time scale) and other +covariates. So there is really no need for the term ``proportional +hazards''; well defined and precise statistical terms for it has +existed for aeons. + +\subsection{Age at insulin start} + +In order to check the consistency of the multiplicativity assumption +across the spectrum of age at insulin inception, we can fit an +interaction model. One approach to this would be using a non-linear +effect of age at insulin use (for convenience we use the same knots as +for age) --- note that the prediction data frames are the same as we +used above, because we do not compute age at insulin use as a separate +variable, but rather enter it as the difference between current age +(\texttt{age}) and insulin duration (\texttt{tfI}). + +At first glance we might think of doing: +<<>>= +imx <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age ,knots=a.kn) + + Ns( tfI,knots=i.kn) + + Ns(age-tfI,knots=a.kn) + + lex.Cst + sex ) +@ % +But this will fit a model with a rate-ratio between persons with and +without insulin that depends only on age at insulin start for the time +\emph{after} insulin start, the RR at \texttt{tfI}=0 will be the same +at any age, which really is not the type of interaction we wanted. + +We want the \texttt{age-tfI} term to be specific for the insulin +exposed so we may use one of two other approaches, that are +conceptually alike but mathematically different: +<<>>= +Im <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age ,knots=a.kn) + + Ns( tfI,knots=i.kn) + + Ns((age-tfI)*(lex.Cst=="Ins"),knots=a.kn) + + lex.Cst + sex ) +im <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age ,knots=a.kn) + + Ns( tfI,knots=i.kn) + + lex.Cst:Ns(age-tfI,knots=a.kn) + + lex.Cst + sex ) +@ % +The first model (\texttt{Im}) has a common age-effect (\texttt{Ns(age,...}) for +persons with and without diabetes and a RR depending on insulin +duration \texttt{tfI} and age at insulin (\texttt{age-tfI}). Since the +linear effect of these two terms are in the model as well, a linear +trend in the RR by current age (\texttt{age}) is accommodated as well. + +The second model allows age-effects that differ non-linearly between +person with and without insulin, because the interaction term +\texttt{lex.Cst:Ns(age-tfI...} for persons not on insulin is merely an +age term (since \texttt{tfI} is coded 0 for all follow-up not on +insulin). -In order to get a feeling for the values that can be use we look at \texttt{age1st} +We can compare the models fitted: <<>>= -summary( nickel$age1st ) +anova( imx, Im, im, test='Chisq') @ -Thus we shall show mortality rates in ages 20--90 for persons hired in -ages 15, 25, 35 and 45: -<<>>= -nd <- data.frame( expand.grid( age=c(20:90,NA), age1st=seq(15,45,10) ) ) -nd <- transform( nd, tfh = ifelse( age > age1st, age-age1st, NA ), - lex.dur = 1000 ) -# makes no sense to have age < age1st -nd <- transform( nd, age = ifelse( age > age1st, age, NA ) ) -head( nd ) -@ % -With this in place we can plot the estimated rates as before, only -now we will get 4 separate lines. The purpose of inserting an -\texttt{NA} on the age-scale in the \texttt{expand.grid} is that the -different instances of \texttt{age1st} be separated by \texttt{NA}s, and -hence will not be connected when we plot the curves. The downside of -this trick is that lines cannot be plotted with different colors or -symbols. -<>= -pr.at <- ci.pred( mat, newdata = nd ) -matplot( nd$age, pr.at, - type="l", lty=1, col=1, lwd=c(3,1,1), - log="y", xlab="Age (years)", - ylab="Lunng cancer mortality per 1000 PY") -@ % -\insfig{pr-at}{0.8}{Lung cancer mortality among Nickel smelter workers - by age and age at hire 15, 25,35 and 45. Each line (except the - first) starts at the age of hire; we see that the later in life you - are hired, the smaller the initial risk, but the higher the eventual - risk of lung cancer death.} - -We can check whether the effect of time since hire is actually -improving the model: -<<>>= -anova( ma, mat, test="Chisq" ) -@ % -We see a pretty strong indication that this is the case. - -\subsection{Difference between time scales} - -However it might be the case that it really is the age at first hire -that is the main determinant (recall that -$\mathtt{age}-\mathtt{thf}=\mathtt{age1st}$), so we could fit a model -with this variable instead --- a model with only 1 timescale, namely -\texttt{age}. -<<>>= -( f.kn <- with( subset( nicM, lex.Xst==1 ), quantile( age1st, (1:5-0.5)/5 ) ) ) -maf <- update( ma, . ~ . + Ns(age1st,knots=f.kn) ) -summary( maf ) -anova( maf, ma, mat, test="Chisq" ) -@ % -We see that there is much less indication that the age at first hire has -an effect. - -For the sake of completeness we can draw the predicted values from the -\texttt{maf} model on top of the ones from the \textrm{mat} model: -<>= -pr.af <- ci.pred( maf, newdata = nd ) -matplot( nd$age, pr.at, - type="l", lty=1, col=1, lwd=c(3,1,1), - log="y", xlab="Age (years)", - ylab="Lunng cancer mortality per 1000 PY") -matlines( nd$age, pr.af, - type="l", lty=1, col=2, lwd=c(3,0,0) ) -@ % -\insfig{pr-at-af}{0.8}{Lung cancer mortality among Nickel smelter - workers by age and age at hire 15, 25,35 and 45. Each line (except - the first) starts at the age of hire; we see that the later in life - you are hired, the smaller the initial risk, but the higher the - eventual risk of lung cancer death. The red lines are from the model - \textrm{\tt maf} where the lines are constrained to be parallel, and - which gives a worse fit to data.} - -\subsection{The complete picture --- exercise} - -We could fit the remaining models where one or more of the three -variables are included, and compare all of them: -<<>>= -maft <- update( mat, . ~ . + Ns(age1st,knots=f.kn) ) -summary( maft ) -mft <- update( maft, . ~ . - Ns(age,knots=a.kn) ) -mf <- update( maf , . ~ . - Ns(age,knots=a.kn) ) -mt <- update( mat , . ~ . - Ns(age,knots=a.kn) ) -allp <- anova( maft, mat, ma, maf, mf, mft, mt, mat, - maf, maft, mft, - test="Chisq" ) -mall <- as.matrix( allp ) -cbind( mod = c("maft","mat","ma","maf","mf","mft","mt","mat","maf","maft","mft"), - round( allp[,1:5], 3 ) ) +so we see that the models indeed are different, and moreover that the +last model does not provide substantial further improvement, by +allowing non-linear RR along the age-scale. + +We can illustrate the different estimated rates from the three models +in figure \ref{fig:dur-int}, p. \pageref{fig:dur-int}: +<>= +pxi <- ci.pred( imx, ndI ) +pxa <- ci.pred( imx, ndA ) +pIi <- ci.pred( Im , ndI ) +pIa <- ci.pred( Im , ndA ) +pii <- ci.pred( im , ndI ) +pia <- ci.pred( im , ndA ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind( pxi, pIi, pii)*1000, plot=T, log="y", + xlab="Age", ylab="Mortality per 1000 PY", + lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) +matshade( ndA$age, cbind( pxa, pIa, pia)*1000, + lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) +@ % +\insfig{dur-int}{0.8}{Age at insulin as interaction between age and + duration. Blue curves are from the naive interaction model + \textrm{\tt imx} with identical $\RR$ at \textrm{\tt tfI}=0 at any + age; green curves are from the interaction model with age at + insulin, from the model \textrm{\tt Im} with only linear + differences by age, and red lines from the full interaction model + \textrm{\tt im}.} + +We can also plot the RRs only from these models (figure +\ref{fig:dur-int-RR}, p. \pageref{fig:dur-int-RR}); for this we need +the reference frames, and the machinery from \texttt{ci.exp} allowing +a list of two data frames: +<>= +ndR <- transform( ndI, tfI=0, lex.Cst="DM" ) +cbind( head(ndI), head(ndR) ) +Rxi <- ci.exp( imx, list(ndI,ndR) ) +Rii <- ci.exp( im , list(ndI,ndR) ) +RIi <- ci.exp( Im , list(ndI,ndR) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind( Rxi, RIi, Rii), plot=T, log="y", + xlab="Age (years)", ylab="Rate ratio vs, non-Insulin", + lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) +abline( h=1 ) +abline( h=ci.exp(imx,subset="lex.Cst")[,1], lty="25", col="blue" ) +@ % +\insfig{dur-int-RR}{0.9}{RR from three different interaction + models. The horizontal dotted line is at the estimated effect of + \textrm{\tt lex.Cst}, to illustrate that the first model (blue) + constrains this initial HR to be constant across age. The green + curves are the extended interaction model, and the red the full + one.} + +\clearpage + +\subsection{General interaction} + +As a final illustration we may want to explore a different kind of +interaction, not defined from the duration --- here we simplify the +interaction by not using the second-last knot in the interaction terms +--- figure \ref{fig:splint}, p. \pageref{fig:splint}. Note again that +the prediction code is the same: +<>= +gm <- glm.Lexis( tsNA20(dmCs), + formula = ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + lex.Cst:Ns(age,knots=a.kn):Ns(tfI,knots=i.kn) + + lex.Cst + sex ) +pgi <- ci.pred( gm, ndI ) +pga <- ci.pred( gm, ndA ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind( pgi, pii )*1000, plot=T, + lty=c("solid","21"), lend="butt", lwd=2, log="y", + xlab="Age (years)", ylab="Mortality rates per 1000 PY", + alpha=c(0.2,0.1), col=c("black","red") ) +matshade( ndA$age, cbind( pga, pia )*1000, + lty=c("solid","21"), lend="butt", lwd=2, + alpha=c(0.2,0.1), col=c("black","red") ) +@ % +\insfig{splint}{0.8}{Spline-by-spline interaction between age and + duration (model \textrm{\tt gm}, black), and the interaction using a + non-linear effect of age at entry (model \textrm{\tt im}, red), + corresponding to the red curves in figure \ref{fig:dur-int}.} +This is in figure \ref{fig:splint}, p. \pageref{fig:splint}. + +\subsection{Evaluating interactions} + +Here we see that the interaction effect is such that in the older ages +the length of insulin use has an increasing effect on mortality. + +Even though there is no statistically significant interaction between +age and time since start of insulin, it would be illustrative to show +the RR as a function of age at insulin and age at follow-up: +<>= +ndR <- transform( ndI, lex.Cst="DM", tfI=0 ) +iRR <- ci.exp( im, ctr.mat=list(ndI,ndR) ) +gRR <- ci.exp( gm, ctr.mat=list(ndI,ndR) ) +par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +matshade( ndI$age, cbind(gRR,iRR), lty=1, log="y", plot=TRUE, + xlab="Age (years)", ylab="Rate ratio: Ins vs. non-Ins", + col=c("black","red") ) +abline( h=1 ) +@ % $ +\insfig{RR-int}{0.8}{The effect of duration of insulin use at + different ages of follow-up (and age at insulin start). Estimates + are from the model with an interaction term using a non-linear + effect of age at insulin start (model \textrm{\tt im}, red) and + using a general spline interactions (model \textrm{\tt gm}, + black). It appears that the general interaction over-models a bit.} +This is in figure \ref{fig:RR-int}, p. \pageref{fig:RR-int}. + +The advantage of the parametric modeling (be that with age at insulin +or general spline interaction) is that it is straight-forward to +\emph{test} whether we have an interaction. + +\section{Separate models} + +In the above we insisted on making a joint model for the +\texttt{DM}$\rightarrow$\texttt{Dead} and the +\texttt{Ins}$\rightarrow$\texttt{Dead} +transitions, but with the complications demonstrated it would actually +have been more sensible to model the two transitions separately: +<<>>= +dmd <- glm.Lexis( dmCs, + from="DM", to="Dead", + formula = ~ Ns(age,knots=a.kn) + + sex ) +ind <- glm.Lexis( dmCs, + from="Ins", to="Dead", + formula = ~ Ns(age,knots=a.kn) + + Ns(tfI,knots=i.kn) + + Ns(age-tfI,knots=a.kn) + + sex ) +ini <- ci.pred( ind, ndI ) +dmi <- ci.pred( dmd, ndI ) +dma <- ci.pred( dmd, ndA ) +@ % +The estimated mortality rates are shown in figure \ref{fig:sep-mort}, +p. \pageref{fig:sep-mort}, using: +<>= +par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1,bty="n") +matshade( ndI$age, ini*1000, plot=TRUE, log="y", + xlab="Age (years)", ylab="Mortality rates per 1000 PY", + lwd=2, col="red" ) +matshade( ndA$age, dma*1000, + lwd=2, col="black" ) +@ % +The estimated RRs are computed using that the estimates from the two +models are uncorrelated, and hence qualify for \texttt{ci.ratio} (this +and the previous graph +appear in figure \ref{fig:Ins-noIns}, p. \pageref{fig:Ins-noIns}) +<>= +par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1,bty="n") +matshade( ndI$age, ci.ratio(ini,dmi), plot=TRUE, log="y", + xlab="Age (years)", ylab="RR insulin vs. no insulin", + lwd=2, col="red" ) +abline( h=1 ) @ % -\begin{enumerate} -\item Explain why there are \texttt{NA}s among the parameters in the - model \texttt{maf}. -\item Draw a graph (a ``DAG'') with the models as nodes and the tests - as vertices, put the p-values on the vertices and use the result to - argue that the model with age an time since hire is actually the - most sensible description in this case. -\end{enumerate} - -\chapter{Competing risks --- multiple types of events} - -If we want to consider death from lung cancer and death from other -causes as separate events we can code these as for example 1 and 2. -<<>>= -data( nickel ) -nicL <- Lexis( entry = list( per = agein+dob, - age = agein, - tfh = agein-age1st ), - exit = list( age = ageout ), - exit.status = ( icd > 0 ) + ( icd %in% c(162,163) ), - data = nickel ) -summary( nicL ) -subset( nicL, id %in% 8:10 ) -@ % -In order to have a more readable output we can label the states, we -can enter the names of these in the \texttt{states} parameter, try for -example: -<<>>= -nicL <- Lexis( entry = list( per = agein+dob, - age = agein, - tfh = agein-age1st ), - exit = list( age = ageout ), - exit.status = ( icd > 0 ) + ( icd %in% c(162,163) ), - data = nickel, - states = c("Alive","D.oth","D.lung") ) -summary( nicL ) -str( nicL ) -@ % -Note that the \texttt{Lexis} function automatically assumes that all -persons enter in the first level (given in the \texttt{states=} -argument), corresponding to the numerical values given in -\texttt{exit.status}. - -When we cut at a date as in this case, the date where cumulative -exposure exceeds 50 exposure-years, we get the follow-up \emph{after} -the date classified as being in the new state if the exit -(\texttt{lex.Xst}) was to a state we defined as one of the -\texttt{precursor.states}: -<<>>= -nicL$agehi <- nicL$age1st + 50 / nicL$exposure -nicC <- cutLexis( data = nicL, - cut = nicL$agehi, - timescale = "age", - new.state = "HiExp", - precursor.states = "Alive" ) -subset( nicC, id %in% 8:10 ) -summary( nicC, scale=1000 ) -@ % -Note that the persons-years is the same, but that the number of -events has changed. This is because events are now defined as any -transition, including the transitions to \texttt{HiExp}. +\begin{figure}[tb] +\centering +\includegraphics[width=0.49\textwidth]{flup-sep-mort} +\includegraphics[width=0.49\textwidth]{flup-sep-HR} +\caption{\it Left panel: Mortality rates from separate models for the + two mortality transitions; the \textrm{\tt + DM}$\rightarrow$\textrm{\tt Dead} transition modeled by age alone; + \textrm{\tt Ins}$\rightarrow$\textrm{\tt Dead} transition modeled + with spline effects of current age, time since insulin and and age + at insulin. \newline Right panel: Mortality HR of insulin vs. no insulin.} +\label{fig:Ins-noIns} +\end{figure} -Also note that (so far) it is necessary to specify the variable with -the cut points in full, using only \texttt{cut=agehi} would give an error. +\chapter{More states} \section{Subdividing states} It may be of interest to subdivide the states following the intermediate event according to whether the event has occurred or -not. That is done by the argument \texttt{split.states=TRUE}. +not. This will enable us to address the question of the fraction of +the patients that ever go on insulin. -Moreover, it will also often be of interest to introduce a new -timescale indicating the time since intermediate event. This can be -done by the argument \texttt{new.scale=TRUE}, alternatively -\texttt{new.scale="tfe"}, as illustrated here: -<<>>= -nicC <- cutLexis( data = nicL, - cut = nicL$agehi, - timescale = "age", - new.state = "HiExp", - new.scale = "tfe", - split.states = TRUE, - precursor.states = "Alive" ) -subset( nicC, id %in% 8:10 ) -summary( nicC, scale=1000, timeScales=TRUE ) +This is done by the argument \texttt{split.states=TRUE}. +<<>>= +dmCs <- cutLexis( data = dmS2, + cut = dmS2$doins, + timescale = "per", + new.state = "Ins", + new.scale = "tfI", + precursor.states = "DM", + split.states = TRUE ) +summary( dmCs ) +@ % +We can illustrate the numbers and the transitions (figure +\ref{fig:box4}, p. \pageref{fig:box4}) +<>= +boxes( dmCs, boxpos=list(x=c(15,15,85,85), + y=c(85,15,85,15)), + scale.R=1000, show.BE=TRUE ) @ % $ -Note that the \texttt{timeScales=TRUE} to \texttt{summary} lists the -timescales available in the object, and also indicates which of them -that are defined as time since entry to a particular state. This -facility is not used here, but it is needed when simulating follow-up -data --- see the vignette on \textrm{simLexis}. - -With 6 different states it is quite difficult to get an overview of -the transitions between states from the \texttt{summary()}. Therefore -there is function that gives a graphical display of the states showing -the transitions between the states: -<>= -boxes( nicC, boxpos = list(x=c(10,10,80,80,80,80), - y=c(75,25,87,63,13,37)), - scale.Y = 1000, - show.BE = TRUE ) -@ % -\insfig{nic-box}{0.9}{Transitions between states; the number in the - middle of each box is the person-years (in 1000s --- since - \textrm{\tt scale.Y=1000}), the numbers at the bottom of the boxes - are the number that start, respectively end their follow-up in each - state. The numbers on the arrows are the number of transitions and - crude transition rates (the latter in events per 1000 PY).\newline - The function \textrm{\tt boxes.Lexis} has a zillion arguments to - fine-tune the appearance of the display in terms of colors etc.} - -%% \section{Multiple events of the same type (recurrent events)} -%% Sometimes more events of the same type are recorded for each person and -%% one would then like to count these and put follow-up time in states accordingly. -%% Essentially, each set of cutpoints represents progressions from one -%% state to the next. Therefore the states should be numbered, and the -%% numbering of states subsequently occupied be increased accordingly. - -%% This is a behaviour different from the one outlined above, and it is -%% achieved by the argument \texttt{count=TRUE} to -%% \texttt{cutLexis}. When \texttt{count} is set to \texttt{TRUE}, the -%% value of the arguments \texttt{new.state} and -%% \texttt{precursor.states} are ignored. Actually, when using the -%% argument \texttt{count=TRUE}, the function \texttt{countLexis} is -%% called, so an alternative is to use this directly. - -%% \renewcommand{\bibname}{References} -%% \bibliographystyle{plain} -%% \bibliography{% -%% /home/bendix/art/bibtex/BxC,% -%% /home/bendix/art/bibtex/Stat,% -%% /home/bendix/art/bibtex/DMCa,% -%% /home/bendix/art/bibtex/Diabetes% -%% } -%% \addcontentsline{toc}{section}{\bibname} +\insfig{box4}{0.7}{Transitions between 4 states: the numbers \emph{in} + the boxes are person-years (middle), and below the number of persons + who start, respectively end their follow-up in each of the states.} + +Note that it is only the mortality rates that we have been modeling, +namely the transitions \texttt{DM}$\rightarrow$\texttt{Dead} +and \texttt{Ins}$\rightarrow$\texttt{Dead(Ins)}. +If we were to model the cumulative risk of using insulin we would also +need a model for the DM$\rightarrow$Ins +transition. Subsequent to that we would then compute the probability +of being in each state conditional on suitable starting +conditions. With models where transition rates depend on several time +scales this is not a trivial task. This is treated in more detail in the +vignette on \texttt{simLexis}. + +\section{Multiple intermediate events} + +We may be interested in starting either insulin or OAD (oral +anti-diabetic drugs), thus giving rise to more states and more +time scales. This can be accomplished by the \texttt{mcutLexis} +function, that generalizes \texttt{cutLexis}: +<<>>= +dmM <- mcutLexis( dmL, + timescale = "per", + wh = c("doins","dooad"), + new.states = c("Ins","OAD"), + new.scales = c("tfI","tfO"), + precursor.states = "DM", + ties.resolve = TRUE ) +summary( dmM, t=T ) +@ +We see that we now have two time scales defined as entry since into +states. +<<>>= +wh <- c(subset(dmM,lex.Cst=="Ins-OAD")$lex.id[1:2], + subset(dmM,lex.Cst=="OAD-Ins")$lex.id[1:2]) +options( width=110 ) +print( subset( dmM, lex.id %in% wh )[,c('lex.id',names(dmM[1:8]),c("doins","dooad"))], + digits=6, row.names=FALSE ) +summary( dmM, t=T ) +@ % +We can also illustrate the transitions to the different states, as in +figure \ref{fig:mbox}: +<>= +boxes( dmM, boxpos=list(x=c(15,80,40,40,85,85), + y=c(50,50,90,10,90,10)), + scale.R=1000, show.BE=TRUE ) +@ % +\insfig{mbox}{1.0}{Boxes for the \textrm{\tt dmM} object. The numbers + \emph{in} the boxes are person-years (middle), and below the number + of persons who start, respectively end their follow-up in each of + the states.} +We may not be interested in whether persons were prescribed insulin +before OAD or vice versa, in which case we would combine the levels +with both insulin and OAD to one, regardless of order (figure +\ref{fig:mboxr}): +<>= +summary( dmMr <- Relevel( dmM, list('OAD+Ins'=5:6), first=FALSE) ) +boxes( dmMr, boxpos=list(x=c(15,50,15,85,85), + y=c(85,50,15,85,15)), + scale.R=1000, show.BE=TRUE ) +@ % +\insfig{mboxr}{1.0}{Boxes for the \textrm{\tt dmMr} object with + collapsed states. The numbers \emph{in} the boxes are person-years + (middle), and below the number of persons who start, respectively + end their follow-up in each of the states.} + +Diagrams as those in figures +\ref{fig:mbox} and +\ref{fig:mboxr} gives an overview of the possible transitions, +which states it might be relevant to collapse, and which transitions +to model and how. + +\chapter{\texttt{Lexis} functions} + +The \texttt{Lexis} machinery has evolved over time since it was first +introduced in a workable version in \texttt{Epi\_1.0.5} in August 2008. + +Over the years there have been additions of tools for handling +multistate data. Here is a list of the current functions relating to +\texttt{Lexis} objects with a very brief description; it does not +replace the documentation. Unless otherwise stated, functions named +\texttt{something.Lexis} (with a ``\texttt{.}'') are S3 methods for +\texttt{Lexis} objects, so you can skip the ``\texttt{.Lexis}'' in +daily use. + +\setlist{noitemsep} +\begin{description} + +\item[Define]\ \\ +\begin{description} +\item[\texttt{Lexis}] defines a \texttt{Lexis} object +\end{description} + +\item[Cut and split]\ \\ +\begin{description} +\item[\texttt{cutLexis}] cut follow-up at intermediate event +\item[\texttt{mcutLexis}] cut follow-up at several intermediate events +\item[\texttt{countLexis}] cut follow-up at intermediate event count + the no. events so far +\item[\texttt{splitLexis}] split follow up along a time scale +\item[\texttt{splitMulti}] split follow up along a time scale --- from + the \texttt{popEpi} package, faster and has simpler syntax than + \texttt{splitLexis} +\item[\texttt{addCov.Lexis}] add clinical measurements at a given date to a + \texttt{Lexis} object +\end{description} + +\item[Boxes and plots]\ \\ +\begin{description} +\item[\texttt{boxes.Lexis}] draw a diagram of states and transitions +\item[\texttt{plot.Lexis}] draw a standard Lexis diagram +\item[\texttt{points.Lexis}] add points to a Lexis diagram +\item[\texttt{lines.Lexis}] add lines to a Lexis diagram +\item[\texttt{PY.ann.Lexis}] annotate life lines in a Lexis diagram +\end{description} + +\item[Summarize and query]\ \\ +\begin{description} +\item[\texttt{summary.Lexis}] overview of transitions, risk time etc. +\item[\texttt{levels.Lexis}] what are the states in the \texttt{Lexis} object +\item[\texttt{nid.Lexis}] number of persons in the \texttt{Lexis} + object --- how many unique values of \texttt{lex.id} are present +\item[\texttt{entry}] entry time +\item[\texttt{exit}] exit time +\item[\texttt{status}] status at entry or exit +\item[\texttt{timeBand}] factor of time bands +\item[\texttt{timeScales}] what time scales are in the \texttt{Lexis} object +\item[\texttt{timeSince}] what time scales are defined as time since a given state +\item[\texttt{breaks}] what breaks are currently defined +\item[\texttt{absorbing}] what are the absorbing states +\item[\texttt{transient}] what are the transient states +\item[\texttt{preceding}, \texttt{before}] which states precede this +\item[\texttt{succeeding}, \texttt{after}] which states can follow this +\item[\texttt{tmat.Lexis}] transition matrix for the \texttt{Lexis} object +\end{description} + +\item[Manipulate]\ \\ +\begin{description} +\item[\texttt{subset.Lexis}, \texttt{[}] subset of a \texttt{Lexis} object +\item[\texttt{merge.Lexis}] merges a \texttt{Lexis} objects with a + \texttt{data.frame} +\item[\texttt{cbind.Lexis}] bind a \texttt{data.frame} to a \texttt{Lexis} object +\item[\texttt{rbind.Lexis}] put two \texttt{Lexis} objects head-to-foot +\item[\texttt{transform.Lexis}] transform and add variables +\item[\texttt{tsNA20}] turn \texttt{NA}s to 0s for time scales +\item[\texttt{Relevel.Lexis}, \texttt{factorize.Lexis}] reorder and + combine states +\item[\texttt{bootLexis}] bootstrap sample of \emph{persons} + (\texttt{lex.id}) in the \texttt{Lexis} object +\end{description} + +\item[Simulate]\ \\ +\begin{description} +\item[\texttt{simLexis}] simulate a \texttt{Lexis} object from + specified transition rate models +\item[\texttt{nState}, \texttt{pState}] count state occupancy from a + simulated \texttt{Lexis} object +\item[\texttt{plot.pState}, \texttt{lines.pState}] plot state occupancy from a + \texttt{pState} object +\end{description} + +\item[Stack]\ \\ +\begin{description} +\item[\texttt{stack.Lexis}] make a stacked object for simultaneous + analysis of transitions --- returns a \texttt{stacked.Lexis} object +\item[\texttt{subset.stacked.Lexis}] subsets of a \texttt{stacked.Lexis} object +\item[\texttt{transform.stacked.Lexis}] transform a \texttt{stacked.Lexis} object +\end{description} + +\item[Interface to other packages]\ \\ +\begin{description} +\item[\texttt{msdata.Lexis}] interface to \texttt{mstate} package +\item[\texttt{etm.Lexis}] interface to \texttt{etm} package +\item[\texttt{crr.Lexis}] interface to \texttt{cmprsk} package +\end{description} + +\item[Statistical models] --- these are \emph{not} S3 methods +\begin{description} +\item[\texttt{glm.Lexis}] fit a \texttt{glm} model using the + \texttt{poisreg} family to (hopefully) time split data +\item[\texttt{gam.Lexis}] fit a \texttt{gam} model (from the + \texttt{mgcv} package) using the \texttt{poisreg} family to + (hopefully) time split data +\item[\texttt{coxph.Lexis}] fit a Cox model to follow-up in a + \texttt{Lexis} object +\end{description} +\end{description} \end{document} Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-RR-int.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-RR-int.pdf differ diff -Nru r-cran-epi-2.32/vignettes/flup.rwl r-cran-epi-2.37/vignettes/flup.rwl --- r-cran-epi-2.32/vignettes/flup.rwl 2018-05-03 14:34:57.000000000 +0000 +++ r-cran-epi-2.37/vignettes/flup.rwl 2019-05-23 08:21:03.000000000 +0000 @@ -1,48 +1,70 @@ -R version 3.4.4 (2018-03-15) +R version 3.6.0 (2019-04-26) --------------------------------------------- Folder: /home/bendix/stat/R/lib.src/Epi/pkg/vignettes - Started: Thursday 03. May 2018, 16:34:54 + Started: Thursday 23. May 2019, 10:20:21 --------------------------------------------- Writing to file flup.tex Processing code chunks with options ... - 1 : keep.source term verbatim (flup.rnw:5) - 2 : echo keep.source term verbatim (flup.rnw:101) - 3 : echo keep.source term verbatim (flup.rnw:109) - 4 : echo keep.source term verbatim (flup.rnw:126) - 5 : echo keep.source term verbatim (flup.rnw:138) - 6 : echo keep.source term verbatim pdf (label = nicL1, flup.rnw:156) - 7 : echo keep.source term verbatim pdf (label = nicL2, flup.rnw:162) - 8 : echo keep.source term verbatim (flup.rnw:193) - 9 : echo keep.source term verbatim (flup.rnw:204) -10 : echo keep.source term verbatim (flup.rnw:211) -11 : echo keep.source term verbatim (flup.rnw:218) -12 : echo keep.source term verbatim (flup.rnw:227) -13 : echo keep.source term verbatim (flup.rnw:250) -14 : echo keep.source term verbatim (flup.rnw:278) -15 : echo keep.source term verbatim (flup.rnw:284) -16 : echo keep.source term verbatim (flup.rnw:308) -17 : echo keep.source term verbatim (flup.rnw:323) -18 : echo keep.source term verbatim (flup.rnw:390) -19 : echo keep.source term verbatim (flup.rnw:405) -20 : echo keep.source term verbatim pdf (label = pr-a, flup.rnw:429) -21 : echo keep.source term verbatim (flup.rnw:445) -22 : echo keep.source term verbatim (flup.rnw:457) -23 : echo keep.source term verbatim (flup.rnw:462) -24 : echo keep.source term verbatim pdf (label = pr-at, flup.rnw:477) -25 : echo keep.source term verbatim (flup.rnw:492) -26 : echo keep.source term verbatim (flup.rnw:504) -27 : echo keep.source term verbatim pdf (label = pr-at-af, flup.rnw:515) -28 : echo keep.source term verbatim (flup.rnw:536) -29 : echo keep.source term verbatim (flup.rnw:562) -30 : echo keep.source term verbatim (flup.rnw:576) -31 : echo keep.source term verbatim (flup.rnw:597) -32 : echo keep.source term verbatim (flup.rnw:624) -33 : echo keep.source term verbatim pdf (label = nic-box, flup.rnw:645) + 1 : keep.source term verbatim (flup.rnw:22) + 2 : echo keep.source term verbatim (flup.rnw:128) + 3 : echo keep.source term verbatim (flup.rnw:139) + 4 : echo keep.source term verbatim (flup.rnw:171) + 5 : echo keep.source term verbatim (flup.rnw:189) + 6 : echo keep.source term verbatim pdf (label = dmL1, flup.rnw:197) + 7 : echo keep.source term verbatim pdf (label = dmL2, flup.rnw:203) + 8 : echo keep.source term verbatim (flup.rnw:246) + 9 : echo keep.source term verbatim (flup.rnw:259) +10 : echo keep.source term verbatim (flup.rnw:268) +11 : echo keep.source term verbatim (flup.rnw:275) +12 : echo keep.source term verbatim (flup.rnw:292) +13 : echo keep.source term verbatim (flup.rnw:325) +14 : echo keep.source term verbatim (flup.rnw:353) +15 : echo keep.source term verbatim (flup.rnw:363) +16 : echo keep.source term verbatim (flup.rnw:394) +17 : echo keep.source term verbatim pdf (label = box1, flup.rnw:400) +18 : echo keep.source term verbatim (flup.rnw:438) +19 : echo keep.source term verbatim (flup.rnw:481) +20 : echo keep.source term verbatim (flup.rnw:487) +21 : echo keep.source term verbatim (flup.rnw:594) +22 : echo keep.source term verbatim (flup.rnw:618) +23 : echo keep.source term verbatim (flup.rnw:639) +24 : echo keep.source term verbatim (flup.rnw:663) +25 : echo keep.source term verbatim (flup.rnw:674) +26 : echo keep.source term verbatim (flup.rnw:679) +27 : echo keep.source term verbatim (flup.rnw:689) +28 : echo keep.source term verbatim (flup.rnw:693) +29 : echo keep.source term verbatim pdf (label = pr-a, flup.rnw:718) +30 : echo keep.source term verbatim (flup.rnw:740) +31 : echo keep.source term verbatim (flup.rnw:758) +32 : echo keep.source term verbatim (flup.rnw:768) +33 : echo keep.source term verbatim (flup.rnw:780) +34 : echo keep.source term verbatim pdf (label = ins-time, flup.rnw:788) +35 : echo keep.source term verbatim (flup.rnw:818) +36 : echo keep.source term verbatim (flup.rnw:826) +37 : echo keep.source term verbatim (flup.rnw:838) +38 : echo keep.source term verbatim pdf (label = Ieff, flup.rnw:861) +39 : echo keep.source term verbatim pdf (label = IeffR, flup.rnw:882) +40 : echo keep.source term verbatim (flup.rnw:946) +41 : echo keep.source term verbatim (flup.rnw:961) +42 : echo keep.source term verbatim (flup.rnw:986) +43 : echo keep.source term verbatim pdf (label = dur-int, flup.rnw:995) +44 : echo keep.source term verbatim pdf (label = dur-int-RR, flup.rnw:1021) +45 : echo keep.source term verbatim pdf (label = splint, flup.rnw:1050) +46 : echo keep.source term verbatim pdf (label = RR-int, flup.rnw:1081) +47 : echo keep.source term verbatim (flup.rnw:1110) +48 : echo keep.source term verbatim pdf (label = sep-mort, flup.rnw:1127) +49 : echo keep.source term verbatim pdf (label = sep-HR, flup.rnw:1139) +50 : echo keep.source term verbatim (flup.rnw:1169) +51 : echo keep.source term verbatim pdf (label = box4, flup.rnw:1181) +52 : echo keep.source term verbatim (flup.rnw:1207) +53 : echo keep.source term verbatim (flup.rnw:1219) +54 : echo keep.source term verbatim pdf (label = mbox, flup.rnw:1229) +55 : echo keep.source term verbatim pdf (label = mboxr, flup.rnw:1242) You can now run (pdf)latex on ‘flup.tex’ --------------------------------------------- Folder: /home/bendix/stat/R/lib.src/Epi/pkg/vignettes - Ended: Thursday 03. May 2018, 16:34:57 - Elapsed: 00:00:03 + Ended: Thursday 23. May 2019, 10:21:03 + Elapsed: 00:00:41 --------------------------------------------- Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-sep-HR.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-sep-HR.pdf differ diff -Nru r-cran-epi-2.32/vignettes/flup-sep-mort.pdf r-cran-epi-2.37/vignettes/flup-sep-mort.pdf --- r-cran-epi-2.32/vignettes/flup-sep-mort.pdf 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-epi-2.37/vignettes/flup-sep-mort.pdf 2019-05-23 08:21:00.000000000 +0000 @@ -0,0 +1,155 @@ +%PDF-1.4 +%ρ\r +1 0 obj +<< +/CreationDate (D:20190523102100) +/ModDate (D:20190523102100) +/Title (R Graphics Output) +/Producer (R 3.6.0) +/Creator (R) +>> +endobj +2 0 obj +<< /Type /Catalog /Pages 3 0 R >> +endobj +7 0 obj +<< /Type /Page /Parent 3 0 R /Contents 8 0 R /Resources 4 0 R >> +endobj +8 0 obj +<< +/Length 22253 /Filter /FlateDecode +>> +stream +xM5Y9_qcؔJ)! JE]$Ŀo7[~nPB=|}lYw_OW/q]_~7?~~*=\WGWF|5Z\?ڏ޿K|^Q?~N?o/Y?/Y}VG}hc}O_w?KϿ[JǗ9~\?o{/?o{/}|y/%cUIjF+cgzn{֏[{?]?/{kGyoz7}1J.l-![JZ~/Nj-}e=q.uۿ۽ۿo7|{V΍ovߊ?}ǯg~ǿ?}}'~tU{>(I?bEſ><5矿׿z~~/SSj1O\BUm{|evQ~i\ +˓Һf{򣯯(RQXf##=3FGf<|BcJZ>'S=<%߾BzTKI)og⓷ڏT*qWHVKGZqMmxcHHz#gEҝ|+Jɑ׼:Og}FjGzgiVIk|TsySzWJH5z=kPCj[ҸsS".Iśk>CG'|o3O\5.MIc.83}~) +v#w?)Y}%~ڐ4[Hn'C5-ճkJң;yJZ+w\=tg4󓹂{\ڃ=DK)sXՔbx<敛pkUrncn8ѱwr-7N;,)=.)5MckyWm[\yW10q̻k-[•λBKx[|x,qn(ԥZ8:Ź]I,";#qӲ3Dbl&ęsr+7Q 1vwY^#>6i_Kzئjj)qjˍ:BH\yj7j5GJ'm3UrRG8>,c1LwXGGݜ'6|lSX|ͯ`7Gx7gzīX|^Gn1iGK4#G̻if{|Vd]8gz{:4y L)#xD,*G^?<P,BhcJ瑪 m5ƫB.UTFyXVUR5W #%TE:4~Ula4pWtuwׇzid|ڎ1MK?22L,V٬??z!(?z!k?ʤ=N]^Tu|LKvմZRgp[QUpb"c%zJ1K&D7P(I)3ʍ0b BTlX+˨. 3-&,o_dyPfg ⴘh+zM1jϖ@Ľ!TPy;оk;z o~B؄͢wBP<)iJ<2]T1+4"aNnipƇ9zgV`Ҝ1w{CY\ʘӰ2ܖ1ms^oj܁⭈_܉ au'{!wKӊ+~5*o2}͐MGl4]io'n^N_Ԓ~|O{{>r>\NqG=8#6]H<4w9Uz,k +srMx_s9}/ê܏#_c?=jG\vc?.ٯ~[+Q?0#Cjf:B= +!W[EtS 3CX=}%>a"|"=7Qlhj=SLڻȎ0ܞa/He{Uk~㛢?=t7Gnxq;Q?J_g؟<+C7jKKSʟu?zV5#+?Ig_|vOv]7}?_َ?i?#K؟~ݏ~[?7Gɟ<O^2|a,ob}x5+rU|7:#;W/Ӻ/'s?zȟ~QQW|@~w>w!EW>77WhY9(WCb?S|I|J#FGyUmOy z|X]_>m6ӧUry*8yܤe߳~;ħt>gLp;LpLLq;%截zcS-̦)-Cǎgm=d3uOrpq$)M-@C>O|Ɛ>*jϣ3|>MZ|ޚP|<9ob{#{)f)L,O_S }OK,~˂Mg<זA]?k!| R1^ p#O=c#CݖCﭴ%@W~͜MYKM}c{akʙH=(xK\Qꇼ<#y6PUL"}72چXo~u~C (4!OScc8f閇bwt߷k,ud0SyqZ^>F<8+K1 nC߷!;5px;ev%;~ȣb$Df:KG0S8B.PO!#Ƌm9,G ؝3r)69iY?E1'e_rۈ+kRMR~R~C<|7Ըޗ# +3 w>}Uj~i}ٺfM92헐~ +-dӌw\w?VRRy-}f)̾g+ϻ&ZY<{C}>HC_2^}IpsG!ӳ&Wl.*ԇvW_[&/}_}riCM3b8|!_k/͔){M77j=e*۫dEJZpׇLy4k6lqȭ|kA^{Ƕ{cl܍u@ɟ'6f wY&mi;fğIomË65 ken r5q@5}Mc~>a#}\?'l/6d |7z ˟de>">sO,㍵\q<,/3V~\#Y>oH&k$sd5j$ND ɴ~F 5A5ƗY#Ը\#qHn+j$_j$k$O/#-Y#Y{W"x5;yf9r',07,rPG,0W,0 g<0w<ʇ,jΗȧ<5[ؿc|59L={jBf|QKio*ߔLQQHU"]C6E1͹]7&uc +Y dv|$ۼhX*1mvLTAlq&w|ll6uc/>eo|(1ʞ8Qa(!(eOt(u&lblSl|\(wxQ[Q6>cKl궎QٔU eb}e}QFʇVG9b}+FƱ-Grlc}|v(!bO9t}(!|c~+u{?+u+gו:S^SCNr<nݽCYns}eRޖ| o )|~ @Asgr/Uy^@|G't]-*tpggDtя,o(D-fr!gڧS>1%jg[>ֲOÇmlWv3>3]%gY:?,|"Θ=$bLsShWGܐwS̠ ++)op+FC!ϒr\G +9mt{1Q+,$^tkLNe VRaܙo6v<-ɍM!V5^Ѭ[-\c9c6.?ru-ΏγZ?m0C,$7 1 *܎O}6BMǠ,r@Nx6xŘkpd CH8TŰƞ \7Pr`^ g1n8G=V>?co^GXQk#~0e|(ƗaF}b#&;DX%[JaOτ 2̓Rׇ?EDoE*qzE<C^ ٟT#{\ +nbʟ gnH%hqjĸʹȟTrwCp實r+}ICzԢ_z#(W~~|]sx!SG2i9S݊YHB晴4jH[3 9Xj$^k$F~S#^j$ևF2U#D5I=j$^#STHs0HeFO5ڨFfFRH*9[H*5hDTwR#75 H*k$uPU5JM5;_RG8RSK>'kqsZ'?|O)^+_ԉלOM_DD[M=gd.r\ؔ):6g$8Yi}cznfC65llnMnU{wގo3Tb 9uccwr~>M#s>QgjbSNL$+!C'‡ Ȳ|u5|>cxQT^zcfVbfb՞G_Q:>cn,41 +g屑>bވ9tj(9-b~b(`QxciVcCm=P M>ʒC!D-@5ez6ϣ':dS,˱29|G}`pyk?jytQb谻:vR 7΍9s!"&N#{8LUY;UR:i"*8Æj#>~OPqʹ>Ԯ=yLYOcޯy} .y}XIX^zWyyЂ +}ܖc1Wrwʁ}6o?@!AXμP?lx޴ِ̦f`*6b,Z7)VcK11&ȌI.)MI1&<y+C~T/cb9MNiF뵆c>>OohZKZqb('^M/1sqjJ懾Wr +J3,W߅Ϙ0~믾lOEpC7Xcp{dߤ-׫ӭZ=7L["ܶlm8g-{n>=/:^)O,V_ٻpkm,D2fôvp#{ny >-a" #F.'˔sԡGR +ׇ?oEjP_X氛=\dOEOU`_Ij}c\nnwu65oKrqp._,j.W jܯU?;^YƵ,-,rqhK$SeIZϚ4xoxpo5LQxr6G434Ŀq<{Ww!%/cRcQ<=a >qhѨq*0QCT>aP#th~BJ>c|GI//S`M%5 kQ>gSB'XtS#Frs'(ߴQ+_iR3)ү>K?#12$bdHKޜj5GXuV b;ea &zTyvc(ϖ`Ň ҀCIm9(2L@lEEfeo[NmO޷%R8('HiE'l5]Hb[U:Q)Md-f,ڼ{Iol|?Ur'?V ˛\GV܁D,\2Q]S+2e!e>N .CőZ˰jVsQ/.˸ .i#pW1:$Ee6n46>).e.E.*3lYn"2I`$S?1[.4rC{dV"$[暾fyl.+P`\!lL쥙{\tܡkHԢḳmCǞYziM~[|T%IləOrCT&d 'fX|Ծܛ-:VQDm؟D3iWlG>0[D FAΠmW|lr|RAqʕW|}jC%k6)o}7wj;~ sgΕ3D_9.f|C-V& ؇97؆E5 [&,z,s=153&Y†O\ +{x SʷMxaÞ\9O憛_$wk}:bCh"3Lgp*/z_<E4bn a޸=q]\[Lf.. f3\sUSI.H\r 'Kx]pɘal)R9 )-d.7&zhUDO[\ :saCT垎`p^T. SE2 TjsS/P:܊&H:I\%8L^Y\'ŅRܧJpeK}qTB:Ku}\ s0WL\2ᚩ|J߈OL5'.ubMNL]Љ\>1 'ƉˉqbH7WPl#}^\BjPq~Um2jQ+~_B:\I`Rjɘkc.f\M .^s9fGpC(LJpE5%L T3\T .$sUeָ<-jKH3l#q+W).WepE}=\cg<~p5淘ˬfs5Ø+n3q=jZ\l .s2s5γ\p1!k%\[\_>=iקi^vW3 }`ms}G>~ >>>rEߝܺuFOܣp}Շj&әJ>מ{O;;ϰs>N{/Q}~s>P ώTM_X;<sۇ1Z` Gۍ;^Q( t:vFu&Fٰl#<81 =^v"6sLYC3+o atg–k웕NVsVn3`tEba%"]|}e󇽴nkv 2*pbfaGLP>1?a_ՉNkqb&>1|3{BJN̬ܟN̬܏N<'^'f30#L#*81|33;:1]*dWgt돬wg}v?sҝ;wO#̟Nhѹ?hwO@ 0]ч9`kfN9u3Lأ\0UtBQ0AYaN~O }33s=vjO-_3Y9b¼bf J3)T0W ӕ-f0__1cL̜1a1ւ)LX fZ30a-̄Ä.ń +(]1a-w!„`*3ւLXLX e&e} :L%bZ.n1a-#`Z ]LXk`"ItHÄ*(muJGW΄<.evUWŭ.HKV9 +tv|,uô@9%Wʋغe|.s +.t/e2$P2_t=|.uWU.st4.EW]02_uu/G8t/G8t<.5PaOw2.sC~>80f2Fr(~NLe>r‚T0TVOL7Z<*x1pW "ueB#•uчb.-:o:<_py]rpWd_AM9\c)p>R)'LPJP)/zgA}F>0!Jߑ11í>b}PZO󹏪D}a4 +}+*p߸ϋ>d +1 +]#c~ }fŘg + +\yc>`rW}pOcF]|^}v4Fx(4}|c淦Oz^,}.QV܇XGSDcX?IV,(1YGEf'}Mg5/ }0}~ϴ]`]oj#>VuP+9M`yy ('/CG=O c7FM>/ Ky}/'^߻NN ln{N591|'f|$wǸY'fk=1퓹ω0Ġ191fottws}b.QNL+'191tqb1sbusk0N Js'usb*L#>1hN ]G'lΠv]]F2 +‚fcU<-Qǃ.V']F-OPF5Z,8jz-TDu= B=y~A = =su N+%H..AKp%9P-{t .OģK]t ."%ܙN lt %NWF 7vt E#?,^%djA"s]@w n%a&r]].mx ]UN7֕%\sgj]1K]3]z]pFFW\5㮺Zu O{FW^5]{ 设S߹/x + 3ρY_֯φTkѫF댯ΆiA]ͧ& z cbMDo9# z3 TDoQy6ZD@]4}:{m  N>?όs1[uЀ_4ш9;3 tg̙gg,d3sƕ3s悳3g3NkϜ Ь̜i^Of8 pftc>fsv{ 3a9>g̙3#f073p3sf3sƜ`",A4bm:3mL~?6QytmlE7Mg悱MWgFMjc.`73_e!5H#5^h? 5‚8B#5ZBj 2x&myzf'Rܟ\/ +R͗y_Fj4 >j#56H!5:|Fj ~f!5:Fjt 5+1 5z|k Rw7|BjRG#5Ϲ3CH "#5:b#5:Fj>gs> +96z s>g>gs>g[3/ >?H9Ç3>g+s^|Λyw!5"^A#~yO9<ssy|XhG >qȍ-$@j$KH 9ΧXR9<<@yh>y$t> |gOW|γ~M7[M4_|$isY_s!>iIsH +9%>i7|ΔsroLҒ|,Q]ĩuG"^H3?'L EH{iٯL$lX r2 9^+Tn!^>BuruIFy?i> yu!_A"v/6|r3zau;^%͐rz;m^S1'r<=,WaK^^T +9`d뵉/3BN>YS6eC|CMLɇ]P9گui&Xۥn'3ߌ-!2|H| ­nߐ~y#dGW壽nѵ!_] B<3#̘!{s4 cx[ȵ1gwF$}rޘШgDX{m>o\؈}^#L~y^y?3 " !_$?MȺ_{uW&~9Y^}3я9#h iZ6}cF̫3^~=a/أ j7 Ȟ<ٻ]g {?&^=.ma߶o2YEk[ ~ycsCֆWg im6E4oNGNggc>3CO/Æ?=9=饳>?^,<N/1AN+;^ xetgxgYxhAD}xj;L(oN'yvl}Lb3CDAo|YJsޜ趷;B>:_ҙ|Jgb󹘩ao_e>Hx:<_X3|2]ԛɇ^QW>1ow^q'?|]>;kown|翹|1Õqgr9:3gxw1NXf0=ϭ'|/3<W9_\ч<3=?=_pFfϮ7w{osJl$+yW|~e?;ngf-<S_Xo޻PO}A,3wMBrzU/9R\O)tR/Q9!Ϯ:C])FS*׋|&ԣW\?]1XRv$\o* lfڰ}h v"W1v#fl~4;XO`_Q\d6&7?P;;7vo_6"65*z, +˜y:e~lv0<΄]xgvwO'xBPx7GAn וwɼO7S)ofoy2|ysSg +z6if<**v?v\ƻ9o5u>V1n;g>\\s>|W>^z; _#>` |f9|p'//~B[^_ TEDSPlKߤ~J-οӯE~.*oVhby y p-2Aj3yQy0IPaĵy#<2^yעcbo hIG'L=#L1L6gτD:4o2y$J.Z?=b2\GC*=/g +[`<)k< {$ T>mdp-Ø֨9֘'<̹шɇ {>ew4?*Rs^wFb 5Na[kWLqUK!_isaF 0>bCz0^xƝgϴ8uE{sGe;<&VYRgS} eP{ @v/W_cPZ=#wtSz `?hw31|4vzH9Wd<߯Hy<2Ńf(K4sKY ?myF$sm餤(%2CnӀLQQLOLM4(,K(iسh1A̘6 +y$Hi.`!QtLC1D1hc-ڏ&Qb s'*ƫ 4i)j/tuhlgQA9㤿ۧP{63y;Jg~DGgIй ?=-ݨz[s,+to7JP7~M8,AJz9"Dۥ?DNqVYQ$t#{]zs\2ҩ1t1@pvKJL]&$`ܸ0ɓl d;CF^6j8m!5͛m``MM{9 -&ᓝXȴ0[!ʥgmɈ=ya-8;r&RAr8&޼ܑ ;+ӹ,21C7EAةH'ivfs$5!5ow{P:vs|s &v +v5PNnNN+[A3^Gvs{b'x.rC|/c횯 +Q: ֯N8+|D/a;bԖ2Y71@*'VJ,-hP - +"{ޡ=rJ#m'A>"T@jᢛ &H{net vS52ׄ5Ɔ7Ik(wM]򩞿;h܋h:0 vϴGR$mN{8"&Psuit8iH{:Ő`eoFJס^R"-eUVֺS g$Kh3 A  Y F6hHZZ҆ҐV#=iIH[M4=i"-|,I6KҪ{HSҲe.ҶtZ > uh 4(iBd +CD$&CIɰC iuȞGG=do]&:iJZexC)+_گiT*kTR 4N 1344HX 1 ^Mi64IA$i{)K] itʰSSr +c=]6spjL(٨mΘP:Sٯ* Fd Qfȡixh$hXAiASdMieY ٵ:4H ,)R椬K/eR l̈'"Pvn atYفS6 w/-vYet yh| #yAS{h74H 4H<4H' 4BMa c؂k`EX"l>Ult`4۸F +Ai 4( AK Lh(C2A =4 fa3n= cr;f#& ؐi I/Fj5,J+C ;eH0Tgl zFi3t$y`3  i|i +52DvU`C4-WFPR`gaJM3ܰ Ű1A $T  1$S L4H ɅCm4HXaׯ`&>ްydoګۇX{.6:s[ʇfinLA2eA  h&#LD4H@Y2 v4Hai a m#WN 46Tm+gZ*4Dn{i r[ ^hiN[ +m7@m4H  6 e$nA$h5A:Lh2 m wHBImUij$۴ܖiۂ.ڀi-CӤ3me-mg hLzh 94HBաA:mzA:m|A:m~A:mAڦm -폁DJ`} h淶C\9m m30Jm+=2 f Ҡ4HOh$AD4H ,eBimԴL)mh[=z(i픶NiO$4HY 9<4HCk #A 2 h;fmɴӶL!m?O[t}ۤڒM;5- s`4H܄iȟ?4H6o Ar A i VN[ŀ';q;z9mg 'hn۟?n6z`> /hmAV eY mgh&@4_.@-C>2m +6wO.X!CAQD;`iZ5m2Z5 s0 Ғ=4Hw$h|ݸ:_AڣEhvH4H,hvI4H4Hm m  Ck"$P mA mA˴4ARm;rڗ/z mUe32~m|+6WvG7РBs9 +m:˥\Drڞ \ ϵa-Pmh-iJ1miۜ-6\Ac +QO<ũendstream +endobj +3 0 obj +<< /Type /Pages /Kids [ 7 0 R ] /Count 1 /MediaBox [0 0 432 432] >> +endobj +4 0 obj +<< +/ProcSet [/PDF /Text] +/Font <> +/ExtGState << /GS1 11 0 R /GS257 12 0 R >> +/ColorSpace << /sRGB 5 0 R >> +>> +endobj +5 0 obj +[/ICCBased 6 0 R] +endobj +6 0 obj +<< /Alternate /DeviceRGB /N 3 /Length 2596 /Filter /FlateDecode >> +stream +xwTSϽ7PkhRH H.*1 J"6DTpDQ2(C"QDqpId߼y͛~kg}ֺLX Xňg` lpBF|،l *?Y"1P\8=W%Oɘ4M0J"Y2Vs,[|e92<se'9`2&ctI@o|N6(.sSdl-c(2-yH_/XZ.$&\SM07#1ؙYrfYym";8980m-m(]v^DW~ emi]P`/u}q|^R,g+\Kk)/C_|Rax8t1C^7nfzDp 柇u$/ED˦L L[B@ٹЖX!@~(* {d+} G͋љς}WL$cGD2QZ4 E@@A(q`1D `'u46ptc48.`R0) +@Rt CXCP%CBH@Rf[(t CQhz#0 Zl`O828.p|O×X +?:0FBx$ !i@ڐH[EE1PL ⢖V6QP>U(j +MFkt,:.FW8c1L&ӎ9ƌaX: rbl1 +{{{;}#tp8_\8"Ey.,X%%Gщ1-9ҀKl.oo/O$&'=JvMޞxǥ{=Vs\x ‰N柜>ucKz=s/ol|ϝ?y ^d]ps~:;/;]7|WpQoH!ɻVsnYs}ҽ~4] =>=:`;cܱ'?e~!ańD#G&}'/?^xI֓?+\wx20;5\ӯ_etWf^Qs-mw3+?~O~endstream +endobj +9 0 obj +<< +/Type /Encoding /BaseEncoding /WinAnsiEncoding +/Differences [ 45/minus 96/quoteleft +144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent +/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] +>> +endobj +10 0 obj +<< /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Helvetica +/Encoding 9 0 R >> +endobj +11 0 obj +<< +/Type /ExtGState +/CA 1.000 >> +endobj +12 0 obj +<< +/Type /ExtGState +/ca 0.149 +>> +endobj +xref +0 13 +0000000000 65535 f +0000000021 00000 n +0000000163 00000 n +0000022618 00000 n +0000022701 00000 n +0000022839 00000 n +0000022872 00000 n +0000000212 00000 n +0000000292 00000 n +0000025567 00000 n +0000025824 00000 n +0000025921 00000 n +0000025970 00000 n +trailer +<< /Size 13 /Info 1 0 R /Root 2 0 R >> +startxref +26019 +%%EOF Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/flup-splint.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/flup-splint.pdf differ diff -Nru r-cran-epi-2.32/vignettes/flup.tex r-cran-epi-2.37/vignettes/flup.tex --- r-cran-epi-2.32/vignettes/flup.tex 2018-05-03 14:34:57.000000000 +0000 +++ r-cran-epi-2.37/vignettes/flup.tex 2019-05-23 08:21:03.000000000 +0000 @@ -1,16 +1,16 @@ -%\VignetteIndexEntry{Follow-up data with R and Epi} +%\VignetteIndexEntry{Follow-up data with the Lexis functions in Epi} \documentclass[a4paper,dvipsnames,twoside,12pt]{report} - -\newcommand{\Title}{Follow-up data with \R\ and \texttt{Epi}} -\newcommand{\Tit}{Follow-up} -\newcommand{\Version}{\ } -\newcommand{\Dates}{December 2017} +\newcommand{\Title}{Follow-up data with the\\ \texttt{Lexis} functions in + \texttt{Epi}} +\newcommand{\Tit}{Follow-up with \texttt{Lexis}} +\newcommand{\Version}{Version 3} +\newcommand{\Dates}{February 2019} \newcommand{\Where}{SDCC} \newcommand{\Homepage}{\url{http://bendixcarstensen.com/} } \newcommand{\Faculty}{\begin{tabular}{rl} Bendix Carstensen - & Steno Diabetes Center, Gentofte, Denmark\\ + & Steno Diabetes Center Copenhagen, Gentofte, Denmark\\ & {\small \& Department of Biostatistics, University of Copenhagen} \\ & \texttt{b@bxc.dk} \\ @@ -20,17 +20,44 @@ \input{topreport} \renewcommand{\rwpre}{./flup} -\chapter{Follow-up data in the \texttt{Epi} package} +\chapter*{Introduction} +\addcontentsline{toc}{chapter}{Introduction} + +This is an introduction to the \texttt{Lexis} machinery in the +\texttt{Epi} package. The machinery is intended for representation and +manipulation of follow-up data (event history data) from studies where +exact dates of events are known. It accommodates follow-up through +multiple states and on multiple time scales. + +This vignette uses an example from the \texttt{Epi} package to +illustrate the set-up of a simple \texttt{Lexis} object (a data frame +of follow-up intervals), as well as the subdivision of follow-up +intervals needed for multistate representation and analysis of +transition rates. + +The first chapter is exclusively on manipulation of the follow-up +representation, but it points to the subsequent chapter where analysis +is based on a \texttt{Lexis} representation with very small follow-up +intervals. + +Chapter 2 uses analysis of mortality rates among Danish +diabetes patients currently on insulin treatment or not to illustrate +the use of the the \texttt{Lexis} machinery. + +I owe much thanks to my colleague Lars Jorge Diaz for careful reading +and many constructive suggestions. + +\chapter{Representation of follow-up data in the \texttt{Epi} package} In the \texttt{Epi}-package, follow-up data is represented by adding some extra variables to a data frame. Such a data frame is called a \texttt{Lexis} object. The tools for handling follow-up data then use -the structure of this for special plots, tabulations etc. +the structure of this for special plots, tabulations and modeling. Follow-up data basically consists of a time of entry, a time of exit and an indication of the status at exit (normally either ``alive'' or -``dead''). Implicitly is also assumed a status \emph{during} the -follow-up (usually ``alive''). +``dead'') for each person. Implicitly is also assumed a status +\emph{during} the follow-up (usually ``alive''). \begin{figure}[htbp] \centering @@ -73,24 +100,24 @@ \label{fig:fu2} \end{figure} -\section{Timescales} +\section{Time scales} -A timescale is a variable that varies deterministicly \emph{within} +A time scale is a variable that varies deterministically \emph{within} each person during follow-up, \textit{e.g.}: \begin{itemize} \item Age \item Calendar time - \item Time since treatment + \item Time since start of treatment \item Time since relapse \end{itemize} -All timescales advance at the same pace, so the time followed is the -same on all timescales. Therefore, it suffices to use only the entry -point on each of the time scale, for example: +All time scales advance at the same pace, so the time followed is the +same on all time scales. Therefore, it will suffice to use only the +entry point on each of the time scales, for example: \begin{itemize} - \item Age at entry. - \item Date of entry. - \item Time since treatment (\emph{at} treatment this is 0). - \item Time since relapse (\emph{at} relapse this is 0).. + \item Age at entry + \item Date of entry + \item Time at treatment (\emph{at} treatment this is 0) + \item Time at relapse (\emph{at} relapse this is 0) \end{itemize} For illustration we need to load the \texttt{Epi} package: \begin{Schunk} @@ -99,149 +126,166 @@ > print( sessionInfo(), l=F ) \end{Sinput} \begin{Soutput} -R version 3.4.4 (2018-03-15) +R version 3.6.0 (2019-04-26) Platform: x86_64-pc-linux-gnu (64-bit) -Running under: Ubuntu 14.04.5 LTS +Running under: Ubuntu 14.04.6 LTS Matrix products: default -BLAS: /usr/lib/openblas-base/libopenblas.so.0 +BLAS: /usr/lib/openblas-base/libopenblas.so.0 LAPACK: /usr/lib/lapack/liblapack.so.3.0 attached base packages: [1] utils datasets graphics grDevices stats methods base other attached packages: -[1] Epi_2.29 +[1] Epi_2.37 loaded via a namespace (and not attached): - [1] cmprsk_2.2-7 zoo_1.8-0 MASS_7.3-49 compiler_3.4.4 - [5] Matrix_1.2-14 plyr_1.8.4 parallel_3.4.4 survival_2.42-3 - [9] etm_1.0.1 Rcpp_0.12.12 splines_3.4.4 grid_3.4.4 -[13] data.table_1.10.4 numDeriv_2016.8-1 lattice_0.20-35 + [1] Rcpp_1.0.0 lattice_0.20-38 zoo_1.8-4 MASS_7.3-51.1 + [5] grid_3.6.0 plyr_1.8.4 nlme_3.1-139 etm_1.0.4 + [9] data.table_1.12.0 Matrix_1.2-17 splines_3.6.0 tools_3.6.0 +[13] cmprsk_2.2-7 numDeriv_2016.8-1 survival_2.44-1.1 parallel_3.6.0 +[17] compiler_3.6.0 mgcv_1.8-28 \end{Soutput} \end{Schunk} In the \texttt{Epi} package, follow-up in a cohort is represented in a -\texttt{Lexis} object. A \texttt{Lexis} object is a data frame with a -bit of extra structure representing the follow-up. For the -\texttt{nickel} data we would construct a \texttt{Lexis} object by: +\texttt{Lexis} object. As mentioned, a \texttt{Lexis} object is a data +frame with some extra structure representing the follow-up. For the +\texttt{DMlate} data --- follow-up of diabetes patients in Denmark +recording date of birth, date of diabetes, date of insulin use, date +of first oral drug use and date of death --- we can construct a +\texttt{Lexis} object by: \begin{Schunk} \begin{Sinput} -> data( nickel ) -> nicL <- Lexis( entry = list( per=agein+dob, -+ age=agein, -+ tfh=agein-age1st ), -+ exit = list( age=ageout ), -+ exit.status = ( icd %in% c(162,163) )*1, -+ data = nickel ) +> data( DMlate ) +> head( DMlate ) +\end{Sinput} +\begin{Soutput} + sex dobth dodm dodth dooad doins dox +50185 F 1940.256 1998.917 NA NA NA 2009.997 +307563 M 1939.218 2003.309 NA 2007.446 NA 2009.997 +294104 F 1918.301 2004.552 NA NA NA 2009.997 +336439 F 1965.225 2009.261 NA NA NA 2009.997 +245651 M 1932.877 2008.653 NA NA NA 2009.997 +216824 F 1927.870 2007.886 2009.923 NA NA 2009.923 +\end{Soutput} +\begin{Sinput} +> dmL <- Lexis( entry = list( per=dodm, ++ age=dodm-dobth, ++ tfD=0 ), ++ exit = list( per=dox ), ++ exit.status = factor( !is.na(dodth), labels=c("DM","Dead") ), ++ data = DMlate ) +\end{Sinput} +\begin{Soutput} +NOTE: entry.status has been set to "DM" for all. +NOTE: Dropping 4 rows with duration of follow up < tol +\end{Soutput} +\begin{Sinput} +> timeScales(dmL) \end{Sinput} \begin{Soutput} -NOTE: entry.status has been set to 0 for all. +[1] "per" "age" "tfD" \end{Soutput} \end{Schunk} +(The excluded persons are persons with date of diabetes equal to date +of death.) + The \texttt{entry} argument is a \emph{named} list with the entry -points on each of the timescales we want to use. It defines the names -of the timescales and the entry points of the follow-up of each +points on each of the time scales we want to use. It defines the names +of the time scales and the entry points of the follow-up of each person. The \texttt{exit} argument gives the exit time on \emph{one} -of the timescales, so the name of the element in this list must match +of the time scales, so the name of the element in this list must match one of the names of the \texttt{entry} list. This is sufficient, because the follow-up time on all time scales is the same, in this -case \texttt{ageout - agein}. Now take a look at the result: -\begin{Schunk} -\begin{Sinput} -> str( nickel ) -\end{Sinput} -\begin{Soutput} -'data.frame': 679 obs. of 7 variables: - $ id : num 3 4 6 8 9 10 15 16 17 18 ... - $ icd : num 0 162 163 527 150 163 334 160 420 12 ... - $ exposure: num 5 5 10 9 0 2 0 0.5 0 0 ... - $ dob : num 1889 1886 1881 1886 1880 ... - $ age1st : num 17.5 23.2 25.2 24.7 30 ... - $ agein : num 45.2 48.3 53 47.9 54.7 ... - $ ageout : num 93 63.3 54.2 69.7 76.8 ... -\end{Soutput} -\begin{Sinput} -> str( nicL ) -\end{Sinput} -\begin{Soutput} -Classes ‘Lexis’ and 'data.frame': 679 obs. of 14 variables: - $ per : num 1934 1934 1934 1934 1934 ... - $ age : num 45.2 48.3 53 47.9 54.7 ... - $ tfh : num 27.7 25.1 27.7 23.2 24.8 ... - $ lex.dur : num 47.75 15 1.17 21.77 22.1 ... - $ lex.Cst : num 0 0 0 0 0 0 0 0 0 0 ... - $ lex.Xst : num 0 1 1 0 0 1 0 0 0 0 ... - $ lex.id : int 1 2 3 4 5 6 7 8 9 10 ... - $ id : num 3 4 6 8 9 10 15 16 17 18 ... - $ icd : num 0 162 163 527 150 163 334 160 420 12 ... - $ exposure: num 5 5 10 9 0 2 0 0.5 0 0 ... - $ dob : num 1889 1886 1881 1886 1880 ... - $ age1st : num 17.5 23.2 25.2 24.7 30 ... - $ agein : num 45.2 48.3 53 47.9 54.7 ... - $ ageout : num 93 63.3 54.2 69.7 76.8 ... - - attr(*, "time.scales")= chr "per" "age" "tfh" +case $\mathtt{dox}$-$\mathtt{dodm}$. + +The \texttt{exit.status} is a categorical variable (a \emph{factor}) +that indicates the exit status --- in this case whether the person +(still) is in state \texttt{DM} or exits to \texttt{Dead} at the end +of follow-up. In principle we should also indicate the +\texttt{entry.status}, but the default is to assume that all persons +enter in the \texttt{first} of the mentioned \texttt{exit.state}s --- +in this case \texttt{DM}, because $\mathtt{FALSE}<\mathtt{TRUE}$. + +Now take a look at the result: +\begin{Schunk} +\begin{Sinput} +> str( dmL ) +\end{Sinput} +\begin{Soutput} +Classes ‘Lexis’ and 'data.frame': 9996 obs. of 14 variables: + $ per : num 1999 2003 2005 2009 2009 ... + $ age : num 58.7 64.1 86.3 44 75.8 ... + $ tfD : num 0 0 0 0 0 0 0 0 0 0 ... + $ lex.dur: num 11.08 6.689 5.446 0.736 1.344 ... + $ lex.Cst: Factor w/ 2 levels "DM","Dead": 1 1 1 1 1 1 1 1 1 1 ... + $ lex.Xst: Factor w/ 2 levels "DM","Dead": 1 1 1 1 1 2 1 1 2 1 ... + $ lex.id : int 1 2 3 4 5 6 7 8 9 10 ... + $ sex : Factor w/ 2 levels "M","F": 2 1 2 2 1 2 1 1 2 1 ... + $ dobth : num 1940 1939 1918 1965 1933 ... + $ dodm : num 1999 2003 2005 2009 2009 ... + $ dodth : num NA NA NA NA NA ... + $ dooad : num NA 2007 NA NA NA ... + $ doins : num NA NA NA NA NA NA NA NA NA NA ... + $ dox : num 2010 2010 2010 2010 2010 ... + - attr(*, "time.scales")= chr "per" "age" "tfD" - attr(*, "time.since")= chr "" "" "" - attr(*, "breaks")=List of 3 ..$ per: NULL ..$ age: NULL - ..$ tfh: NULL + ..$ tfD: NULL \end{Soutput} \begin{Sinput} -> head( nicL ) +> head( dmL )[,1:10] \end{Sinput} \begin{Soutput} - per age tfh lex.dur lex.Cst lex.Xst lex.id id icd exposure dob -1 1934.246 45.2273 27.7465 47.7535 0 0 1 3 0 5 1889.019 -2 1934.246 48.2684 25.0820 15.0028 0 1 2 4 162 5 1885.978 -3 1934.246 52.9917 27.7465 1.1727 0 1 3 6 163 10 1881.255 -4 1934.246 47.9067 23.1861 21.7727 0 0 4 8 527 9 1886.340 -5 1934.246 54.7465 24.7890 22.0977 0 0 5 9 150 0 1879.500 -6 1934.246 44.3314 23.0437 18.2099 0 1 6 10 163 2 1889.915 - age1st agein ageout -1 17.4808 45.2273 92.9808 -2 23.1864 48.2684 63.2712 -3 25.2452 52.9917 54.1644 -4 24.7206 47.9067 69.6794 -5 29.9575 54.7465 76.8442 -6 21.2877 44.3314 62.5413 -\end{Soutput} -\end{Schunk} -The \texttt{Lexis} object \texttt{nicL} has a variable for each -timescale which is the entry point on this timescale. The follow-up -time is in the variable \texttt{lex.dur} (\textbf{dur}ation). + per age tfD lex.dur lex.Cst lex.Xst lex.id sex dobth dodm +50185 1998.917 58.66119 0 11.0800821 DM DM 1 F 1940.256 1998.917 +307563 2003.309 64.09035 0 6.6885695 DM DM 2 M 1939.218 2003.309 +294104 2004.552 86.25051 0 5.4455852 DM DM 3 F 1918.301 2004.552 +336439 2009.261 44.03559 0 0.7364819 DM DM 4 F 1965.225 2009.261 +245651 2008.653 75.77550 0 1.3442847 DM DM 5 M 1932.877 2008.653 +216824 2007.886 80.01643 0 2.0369610 DM Dead 6 F 1927.870 2007.886 +\end{Soutput} +\end{Schunk} +The \texttt{Lexis} object \texttt{dmL} has a variable for each +time scale which is the entry point on this time scale. The follow-up +time is in the variable \texttt{lex.dur} (\texttt{dur}ation). +Note that the exit status is in the variable \texttt{lex.Xst} +(e\texttt{X}it \texttt{st}ate. The variable \texttt{lex.Cst} is the +state where the follow-up takes place (\texttt{C}urrent +\texttt{st}ate), in this case \texttt{DM} (alive with diabetes) for +all persons. This implies that \emph{censored} observations are +characterized by having $\mathtt{lex.Cst}=\mathtt{lex.Xst}$. There is a \texttt{summary} function for \texttt{Lexis} objects that -list the number of transitions and records as well as the total -amount of follow-up time: +lists the number of transitions and records as well as the total amount +of follow-up time; it also (optionally) prints information about the names of the +variables that constitute the time scales: \begin{Schunk} \begin{Sinput} -> summary( nicL ) +> summary.Lexis( dmL, timeScales=TRUE ) \end{Sinput} \begin{Soutput} Transitions: To -From 0 1 Records: Events: Risk time: Persons: - 0 542 137 679 137 15348.06 679 +From DM Dead Records: Events: Risk time: Persons: + DM 7497 2499 9996 2499 54273.27 9996 + +Timescales: +per age tfD + "" "" "" \end{Soutput} \end{Schunk} -We defined the exit status to be death from lung cancer (ICD7 -162,163), i.e. this variable is 1 if follow-up ended with a death from -this cause. If follow-up ended alive or by death from another cause, -the exit status is coded 0, i.e. as a censoring. - -Note that the exit status is in the variable \texttt{lex.Xst} -(e\texttt{X}it \texttt{st}atus. The variable \texttt{lex.Cst} is the -state where the follow-up takes place (\texttt{C}urrent -\texttt{st}atus), in this case 0 (alive). - It is possible to get a visualization of the follow-up along the -timescales chosen by using the \texttt{plot} method for \texttt{Lexis} -objects. \texttt{nicL} is an object of \emph{class} \texttt{Lexis}, so +time scales chosen by using the \texttt{plot} method for \texttt{Lexis} +objects. \texttt{dmL} is an object of \emph{class} \texttt{Lexis}, so using the function \texttt{plot()} on it means that \R\ will look for the function \texttt{plot.Lexis} and use this function. \begin{Schunk} \begin{Sinput} -> plot( nicL ) +> plot( dmL ) \end{Sinput} \end{Schunk} The function allows quite a bit of control over the output, and a @@ -250,126 +294,145 @@ \begin{Schunk} \begin{Sinput} > par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 ) -> plot( nicL, 1:2, lwd=1, col=c("blue","red")[(nicL$exp>0)+1], +> plot( dmL, 1:2, lwd=1, col=c("blue","red")[dmL$sex], + grid=TRUE, lty.grid=1, col.grid=gray(0.7), -+ xlim=1900+c(0,90), xaxs="i", -+ ylim= 10+c(0,90), yaxs="i", las=1 ) -> points( nicL, 1:2, pch=c(NA,3)[nicL$lex.Xst+1], -+ col="lightgray", lwd=3, cex=1.5 ) -> points( nicL, 1:2, pch=c(NA,3)[nicL$lex.Xst+1], -+ col=c("blue","red")[(nicL$exp>0)+1], lwd=1, cex=1.5 ) -\end{Sinput} -\end{Schunk} -The results of these two plotting commands are in figure \ref{fig:Lexis-diagram}. ++ xlim=1960+c(0,60), xaxs="i", ++ ylim= 40+c(0,60), yaxs="i", las=1 ) +> points( dmL, 1:2, pch=c(NA,3)[dmL$lex.Xst], ++ col="lightgray", lwd=3, cex=0.3 ) +> points( dmL, 1:2, pch=c(NA,3)[dmL$lex.Xst], ++ col=c("blue","red")[dmL$sex], lwd=1, cex=0.3 ) +> box(bty='o') +\end{Sinput} +\end{Schunk} +In the above code you will note that the values of the arguments +\texttt{col} and \texttt{pch} are indexed by factors, using the +convention in \R\ that the index is taken as \emph{number of the level} +of the supplied factor. Thus \texttt{c("blue","red")[dmL\$sex]} is +\texttt{"blue"} when \texttt{sex} is \texttt{M} (the first level). + +The results of these two plotting commands are in figure +\ref{fig:Lexis-diagram}, p. \pageref{fig:Lexis-diagram}. \begin{figure}[tb] \centering +\includegraphics[width=0.35\textwidth]{flup-dmL1} +\includegraphics[width=0.63\textwidth]{flup-dmL2} +\caption{\it Lexis diagram of the \textrm{\tt DMlate} dataset; left + panel is the default version, right panel: plot with some bells and + whistles. The red lines are for women, blue for men, crosses + indicate deaths.} \label{fig:Lexis-diagram} -\includegraphics[width=0.39\textwidth]{flup-nicL1} -\includegraphics[width=0.59\textwidth]{flup-nicL2} -\caption{\it Lexis diagram of the \textrm{\tt nickel} dataset; left - panel the default version, right panel with bells and whistles. The - red lines are for persons with exposure$>0$, so it is pretty evident - that the oldest ones are the exposed part of the cohort.} \end{figure} -\chapter{Subdividing follow-up for analysis} - -\section{Splitting the follow-up time along a timescale} +\section{Splitting the follow-up time along a time scale} -The follow-up time in a cohort can be subdivided by for example -current age. This is achieved by the \texttt{splitLexis} (note that it -is \emph{not} called \texttt{split.Lexis}). This requires that the -timescale and the breakpoints on this timescale are supplied. Try: +In next chapter we shall conduct statistical analysis of mortality +rates, and a prerequisite for parametric analysis of rates is that +follow-up time is subdivided in smaller intervals, where we can +reasonably assume that rates are constant. + +The follow-up time in a cohort can be subdivided (``split'') along a +time scale, for example current age. This is achieved by the +\texttt{splitLexis} (note that it is \emph{not} called +\texttt{split.Lexis}). This requires that the time scale and the +breakpoints on this time scale are supplied. Try: \begin{Schunk} \begin{Sinput} -> nicS1 <- splitLexis( nicL, "age", breaks=seq(0,100,10) ) -> summary( nicL ) +> dmS1 <- splitLexis( dmL, "age", breaks=seq(0,100,5) ) +> summary( dmL ) \end{Sinput} \begin{Soutput} Transitions: To -From 0 1 Records: Events: Risk time: Persons: - 0 542 137 679 137 15348.06 679 +From DM Dead Records: Events: Risk time: Persons: + DM 7497 2499 9996 2499 54273.27 9996 \end{Soutput} \begin{Sinput} -> summary( nicS1 ) +> summary( dmS1 ) \end{Sinput} \begin{Soutput} Transitions: To -From 0 1 Records: Events: Risk time: Persons: - 0 2073 137 2210 137 15348.06 679 +From DM Dead Records: Events: Risk time: Persons: + DM 18328 2499 20827 2499 54273.27 9996 \end{Soutput} \end{Schunk} -So we see that the number of events and the amount of follow-up is the -same in the two data sets; only the number of records differ --- the -extra records all have \texttt{lex.Cst}=0 and \texttt{lex.Xst}=0. +We see that the number of persons and events and the amount of +follow-up is the same in the two data sets; only the number of records +differ --- the extra records all have \texttt{lex.Cst}=\texttt{DM} and +\texttt{lex.Xst}=\texttt{DM}. To see how records are split for each individual, it is useful to list -the results for a few individuals: +the results for a few individuals (whom we selected with a view to the +illustrative usefulness): \begin{Schunk} \begin{Sinput} -> round( subset( nicS1, id %in% 8:10 ), 2 ) +> wh.id <- c(9,27,52,484) +> subset( dmL , lex.id %in% wh.id )[,1:10] \end{Sinput} \begin{Soutput} - lex.id per age tfh lex.dur lex.Cst lex.Xst id icd exposure dob age1st -11 4 1934.25 47.91 23.19 2.09 0 0 8 527 9 1886.34 24.72 -12 4 1936.34 50.00 25.28 10.00 0 0 8 527 9 1886.34 24.72 -13 4 1946.34 60.00 35.28 9.68 0 0 8 527 9 1886.34 24.72 -14 5 1934.25 54.75 24.79 5.25 0 0 9 150 0 1879.50 29.96 -15 5 1939.50 60.00 30.04 10.00 0 0 9 150 0 1879.50 29.96 -16 5 1949.50 70.00 40.04 6.84 0 0 9 150 0 1879.50 29.96 -17 6 1934.25 44.33 23.04 5.67 0 0 10 163 2 1889.91 21.29 -18 6 1939.91 50.00 28.71 10.00 0 0 10 163 2 1889.91 21.29 -19 6 1949.91 60.00 38.71 2.54 0 1 10 163 2 1889.91 21.29 - agein ageout -11 47.91 69.68 -12 47.91 69.68 -13 47.91 69.68 -14 54.75 76.84 -15 54.75 76.84 -16 54.75 76.84 -17 44.33 62.54 -18 44.33 62.54 -19 44.33 62.54 -\end{Soutput} -\end{Schunk} -The resulting object, \texttt{nicS1}, is again a \texttt{Lexis} -object, and so follow-up may be split further along another -timescale. Subsequently we list the results for individuals 8, 9 and -10 again: -\begin{Schunk} -\begin{Sinput} -> nicS2 <- splitLexis( nicS1, "tfh", breaks=c(0,1,5,10,20,30,100) ) -> round( subset( nicS2, id %in% 8:10 ), 2 ) -\end{Sinput} -\begin{Soutput} - lex.id per age tfh lex.dur lex.Cst lex.Xst id icd exposure dob age1st -13 4 1934.25 47.91 23.19 2.09 0 0 8 527 9 1886.34 24.72 -14 4 1936.34 50.00 25.28 4.72 0 0 8 527 9 1886.34 24.72 -15 4 1941.06 54.72 30.00 5.28 0 0 8 527 9 1886.34 24.72 -16 4 1946.34 60.00 35.28 9.68 0 0 8 527 9 1886.34 24.72 -17 5 1934.25 54.75 24.79 5.21 0 0 9 150 0 1879.50 29.96 -18 5 1939.46 59.96 30.00 0.04 0 0 9 150 0 1879.50 29.96 -19 5 1939.50 60.00 30.04 10.00 0 0 9 150 0 1879.50 29.96 -20 5 1949.50 70.00 40.04 6.84 0 0 9 150 0 1879.50 29.96 -21 6 1934.25 44.33 23.04 5.67 0 0 10 163 2 1889.91 21.29 -22 6 1939.91 50.00 28.71 1.29 0 0 10 163 2 1889.91 21.29 -23 6 1941.20 51.29 30.00 8.71 0 0 10 163 2 1889.91 21.29 -24 6 1949.91 60.00 38.71 2.54 0 1 10 163 2 1889.91 21.29 - agein ageout -13 47.91 69.68 -14 47.91 69.68 -15 47.91 69.68 -16 47.91 69.68 -17 54.75 76.84 -18 54.75 76.84 -19 54.75 76.84 -20 54.75 76.84 -21 44.33 62.54 -22 44.33 62.54 -23 44.33 62.54 -24 44.33 62.54 + per age tfD lex.dur lex.Cst lex.Xst lex.id sex dobth dodm +430048 1998.956 61.87269 0 9.508556 DM Dead 9 F 1937.083 1998.956 +22671 2000.042 52.71184 0 9.954825 DM DM 27 M 1947.331 2000.042 +338459 1998.249 61.85626 0 11.748118 DM DM 52 F 1936.393 1998.249 +274124 1998.260 62.37919 0 10.929500 DM Dead 484 F 1935.881 1998.260 +\end{Soutput} +\begin{Sinput} +> subset( dmS1, lex.id %in% wh.id )[,1:10] +\end{Sinput} +\begin{Soutput} + lex.id per age tfD lex.dur lex.Cst lex.Xst sex dobth dodm +14 9 1998.956 61.87269 0.000000 3.127310 DM DM F 1937.083 1998.956 +15 9 2002.083 65.00000 3.127310 5.000000 DM DM F 1937.083 1998.956 +16 9 2007.083 70.00000 8.127310 1.381246 DM Dead F 1937.083 1998.956 +54 27 2000.042 52.71184 0.000000 2.288159 DM DM M 1947.331 2000.042 +55 27 2002.331 55.00000 2.288159 5.000000 DM DM M 1947.331 2000.042 +56 27 2007.331 60.00000 7.288159 2.666667 DM DM M 1947.331 2000.042 +108 52 1998.249 61.85626 0.000000 3.143737 DM DM F 1936.393 1998.249 +109 52 2001.393 65.00000 3.143737 5.000000 DM DM F 1936.393 1998.249 +110 52 2006.393 70.00000 8.143737 3.604381 DM DM F 1936.393 1998.249 +1004 484 1998.260 62.37919 0.000000 2.620808 DM DM F 1935.881 1998.260 +1005 484 2000.881 65.00000 2.620808 5.000000 DM DM F 1935.881 1998.260 +1006 484 2005.881 70.00000 7.620808 3.308693 DM Dead F 1935.881 1998.260 +\end{Soutput} +\end{Schunk} +The resulting object, \texttt{dmS1}, is again a \texttt{Lexis} object, +and the follow-up may be split further along another time scale, for +example diabetes duration, \texttt{tfD}. Subsequently we list the +results for the chosen individuals: +\begin{Schunk} +\begin{Sinput} +> dmS2 <- splitLexis( dmS1, "tfD", breaks=c(0,1,2,5,10,20,30,40) ) +> subset( dmS2, lex.id %in% wh.id )[,1:10] +\end{Sinput} +\begin{Soutput} + lex.id per age tfD lex.dur lex.Cst lex.Xst sex dobth dodm +31 9 1998.956 61.87269 0.000000 1.0000000 DM DM F 1937.083 1998.956 +32 9 1999.956 62.87269 1.000000 1.0000000 DM DM F 1937.083 1998.956 +33 9 2000.956 63.87269 2.000000 1.1273101 DM DM F 1937.083 1998.956 +34 9 2002.083 65.00000 3.127310 1.8726899 DM DM F 1937.083 1998.956 +35 9 2003.956 66.87269 5.000000 3.1273101 DM DM F 1937.083 1998.956 +36 9 2007.083 70.00000 8.127310 1.3812457 DM Dead F 1937.083 1998.956 +111 27 2000.042 52.71184 0.000000 1.0000000 DM DM M 1947.331 2000.042 +112 27 2001.042 53.71184 1.000000 1.0000000 DM DM M 1947.331 2000.042 +113 27 2002.042 54.71184 2.000000 0.2881588 DM DM M 1947.331 2000.042 +114 27 2002.331 55.00000 2.288159 2.7118412 DM DM M 1947.331 2000.042 +115 27 2005.042 57.71184 5.000000 2.2881588 DM DM M 1947.331 2000.042 +116 27 2007.331 60.00000 7.288159 2.6666667 DM DM M 1947.331 2000.042 +229 52 1998.249 61.85626 0.000000 1.0000000 DM DM F 1936.393 1998.249 +230 52 1999.249 62.85626 1.000000 1.0000000 DM DM F 1936.393 1998.249 +231 52 2000.249 63.85626 2.000000 1.1437372 DM DM F 1936.393 1998.249 +232 52 2001.393 65.00000 3.143737 1.8562628 DM DM F 1936.393 1998.249 +233 52 2003.249 66.85626 5.000000 3.1437372 DM DM F 1936.393 1998.249 +234 52 2006.393 70.00000 8.143737 1.8562628 DM DM F 1936.393 1998.249 +235 52 2008.249 71.85626 10.000000 1.7481177 DM DM F 1936.393 1998.249 +2084 484 1998.260 62.37919 0.000000 1.0000000 DM DM F 1935.881 1998.260 +2085 484 1999.260 63.37919 1.000000 1.0000000 DM DM F 1935.881 1998.260 +2086 484 2000.260 64.37919 2.000000 0.6208077 DM DM F 1935.881 1998.260 +2087 484 2000.881 65.00000 2.620808 2.3791923 DM DM F 1935.881 1998.260 +2088 484 2003.260 67.37919 5.000000 2.6208077 DM DM F 1935.881 1998.260 +2089 484 2005.881 70.00000 7.620808 2.3791923 DM DM F 1935.881 1998.260 +2090 484 2008.260 72.37919 10.000000 0.9295003 DM Dead F 1935.881 1998.260 \end{Soutput} \end{Schunk} A more efficient (and more intuitive) way of making this double split @@ -378,53 +441,61 @@ \begin{Schunk} \begin{Sinput} > library( popEpi ) -> nicM <- splitMulti( nicL, age = seq(0,100,10), -+ tfh = c(0,1,5,10,20,30,100) ) -> summary( nicS2 ) +> dmM <- splitMulti( dmL, age = seq(0,100,5), ++ tfD = c(0,1,2,5,10,20,30,40), ++ drop = FALSE ) +> summary( dmS2 ) \end{Sinput} \begin{Soutput} Transitions: To -From 0 1 Records: Events: Risk time: Persons: - 0 2992 137 3129 137 15348.06 679 +From DM Dead Records: Events: Risk time: Persons: + DM 40897 2499 43396 2499 54273.27 9996 \end{Soutput} \begin{Sinput} -> summary( nicM ) +> summary( dmM ) \end{Sinput} \begin{Soutput} Transitions: To -From 0 1 Records: Events: Risk time: Persons: - 0 2992 137 3129 137 15348.06 679 +From DM Dead Records: Events: Risk time: Persons: + DM 40897 2499 43396 2499 54273.27 9996 \end{Soutput} \end{Schunk} +Note we used the argument \texttt{drop=FALSE} which will retain +follow-up also outside the window defined by the breaks. Otherwise the +default for \texttt{splitMulti} would be to drop follow-up outside +\texttt{age} [0,100] and \texttt{tfD} [0,40]. This clipping behaviour +is not available in \texttt{splitLexis}, nevertheless this may be +exactly what we want in some situations. + So we see that the two ways of splitting data yields the same amount of follow-up, but the results are not identical: \begin{Schunk} \begin{Sinput} -> identical( nicS2, nicM ) +> identical( dmS2, dmM ) \end{Sinput} \begin{Soutput} [1] FALSE \end{Soutput} \begin{Sinput} -> class( nicS2 ) +> class( dmS2 ) \end{Sinput} \begin{Soutput} [1] "Lexis" "data.frame" \end{Soutput} \begin{Sinput} -> class( nicM ) +> class( dmM ) \end{Sinput} \begin{Soutput} [1] "Lexis" "data.table" "data.frame" \end{Soutput} \end{Schunk} -As we see, this is because the \texttt{nicM} object also is a +As we see, this is because the \texttt{dmM} object also is a \texttt{data.table} object; the \texttt{splitMulti} uses the \texttt{data.table} machinery which makes the splitting substantially faster --- this is of particular interest if you operate on large data -sets ($>1,000,000$ records). +sets ($>100,000$ records). Thus the recommended way of splitting follow-up time is by \texttt{splitMulti}. But you should be aware that the result is a @@ -432,255 +503,358 @@ slightly different from \texttt{data.frame}s. See the manual for \texttt{data.table}. +\section{Cutting follow up time at dates of intermediate events} + +If we have a recording of the date of a specific event as for example +recovery or relapse, we may classify follow-up time as being before or +after this intermediate event, but it requires that follow-up records +that straddle the event be cut in two and placed in separate records, +one representing follow-up \emph{before} the intermediate event, and another +representing follow-up \emph{after} the intermediate event. This is +achieved with the function \texttt{cutLexis}, which takes three +arguments: the time point of the intermediate event, the time scale +that this point refers to, and the value of the (new) state following +the date. Optionally, we may also define a new time scale with the +argument \texttt{new.scale=}. + +We are interested in the time before and after inception of insulin +use, which occurs at the date \texttt{doins}: +\begin{Schunk} +\begin{Sinput} +> whc <- c(names(dmL)[1:7],"dodm","doins") # WHich Columns do we want to see? +> subset( dmL, lex.id %in% wh.id )[,whc] +\end{Sinput} +\begin{Soutput} + per age tfD lex.dur lex.Cst lex.Xst lex.id dodm doins +430048 1998.956 61.87269 0 9.508556 DM Dead 9 1998.956 NA +22671 2000.042 52.71184 0 9.954825 DM DM 27 2000.042 NA +338459 1998.249 61.85626 0 11.748118 DM DM 52 1998.249 2004.804 +274124 1998.260 62.37919 0 10.929500 DM Dead 484 1998.260 2003.960 +\end{Soutput} +\begin{Sinput} +> dmC <- cutLexis( data = dmL, ++ cut = dmL$doins, ++ timescale = "per", ++ new.state = "Ins", ++ new.scale = "tfI", ++ precursor.states = "DM" ) +> whc <- c(names(dmL)[1:8],"doins") # WHich Columns do we want to see? +> subset( dmC, lex.id %in% wh.id )[,whc] +\end{Sinput} +\begin{Soutput} + per age tfD lex.dur lex.Cst lex.Xst lex.id sex doins +9 1998.956 61.87269 0.000000 9.508556 DM Dead 9 F NA +27 2000.042 52.71184 0.000000 9.954825 DM DM 27 M NA +52 1998.249 61.85626 0.000000 6.554415 DM Ins 52 F 2004.804 +10048 2004.804 68.41068 6.554415 5.193703 Ins Ins 52 F 2004.804 +484 1998.260 62.37919 0.000000 5.700205 DM Ins 484 F 2003.960 +10480 2003.960 68.07940 5.700205 5.229295 Ins Dead 484 F 2003.960 +\end{Soutput} +\end{Schunk} +(The \texttt{precursor.states=} argument is explained below). + +Note that the process of cutting time is simplified by having all +types of events referred to the calendar time scale. This is a +generally applicable advice in handling follow-up data: Get all event +times as \emph{dates}, location of events and follow-up on other time +scales can then easily be derived from this. + +Note +that individual 52 has had his follow-up cut at 6.55 years from +diabetes diagnosis and individual 484 at 5.70 years from diabetes +diagnosis. This dataset could then be split along the time scales as we +did before with \texttt{dmL}. + +We can see which of the time scales that are defined as time since +entry into an intermediate state: +\begin{Schunk} +\begin{Sinput} +> timeSince( dmC ) +\end{Sinput} +\begin{Soutput} + per age tfD tfI + "" "" "" "Ins" +\end{Soutput} +\end{Schunk} +The names of the vector are the time scales; each element is the name +of the state entry into which defines the origin of the time +scale. This resulting \texttt{Lexis} object can then be split along +one or more time scales. + +The result of this can however also be achieved by cutting the split +dataset \texttt{dmS2} instead of \texttt{dmL}: +\begin{Schunk} +\begin{Sinput} +> dmS2C <- cutLexis( data = dmS2, ++ cut = dmS2$doins, ++ timescale = "per", ++ new.state = "Ins", ++ new.scale = "tfI", ++ precursor.states = "DM" ) +> subset( dmS2C, lex.id %in% wh.id )[,whc] +\end{Sinput} +\begin{Soutput} + per age tfD lex.dur lex.Cst lex.Xst lex.id sex doins +31 1998.956 61.87269 0.000000 1.0000000 DM DM 9 F NA +32 1999.956 62.87269 1.000000 1.0000000 DM DM 9 F NA +33 2000.956 63.87269 2.000000 1.1273101 DM DM 9 F NA +34 2002.083 65.00000 3.127310 1.8726899 DM DM 9 F NA +35 2003.956 66.87269 5.000000 3.1273101 DM DM 9 F NA +36 2007.083 70.00000 8.127310 1.3812457 DM Dead 9 F NA +111 2000.042 52.71184 0.000000 1.0000000 DM DM 27 M NA +112 2001.042 53.71184 1.000000 1.0000000 DM DM 27 M NA +113 2002.042 54.71184 2.000000 0.2881588 DM DM 27 M NA +114 2002.331 55.00000 2.288159 2.7118412 DM DM 27 M NA +115 2005.042 57.71184 5.000000 2.2881588 DM DM 27 M NA +116 2007.331 60.00000 7.288159 2.6666667 DM DM 27 M NA +229 1998.249 61.85626 0.000000 1.0000000 DM DM 52 F 2004.804 +230 1999.249 62.85626 1.000000 1.0000000 DM DM 52 F 2004.804 +231 2000.249 63.85626 2.000000 1.1437372 DM DM 52 F 2004.804 +232 2001.393 65.00000 3.143737 1.8562628 DM DM 52 F 2004.804 +233 2003.249 66.85626 5.000000 1.5544148 DM Ins 52 F 2004.804 +43629 2004.804 68.41068 6.554415 1.5893224 Ins Ins 52 F 2004.804 +43630 2006.393 70.00000 8.143737 1.8562628 Ins Ins 52 F 2004.804 +43631 2008.249 71.85626 10.000000 1.7481177 Ins Ins 52 F 2004.804 +2084 1998.260 62.37919 0.000000 1.0000000 DM DM 484 F 2003.960 +2085 1999.260 63.37919 1.000000 1.0000000 DM DM 484 F 2003.960 +2086 2000.260 64.37919 2.000000 0.6208077 DM DM 484 F 2003.960 +2087 2000.881 65.00000 2.620808 2.3791923 DM DM 484 F 2003.960 +2088 2003.260 67.37919 5.000000 0.7002053 DM Ins 484 F 2003.960 +45484 2003.960 68.07940 5.700205 1.9206023 Ins Ins 484 F 2003.960 +45485 2005.881 70.00000 7.620808 2.3791923 Ins Ins 484 F 2003.960 +45486 2008.260 72.37919 10.000000 0.9295003 Ins Dead 484 F 2003.960 +\end{Soutput} +\end{Schunk} +Thus it does not matter in which order we use \texttt{splitLexis} and +\texttt{cutLexis}. Mathematicians would say that \texttt{splitLexis} +and \texttt{cutLexis} are commutative. + +Note in \texttt{lex.id}=484, that follow-up subsequent to the event is +classified as being in state \texttt{Ins}, but that the final +transition to state \texttt{Dead} is preserved. This is the point of +the \texttt{precursor.states=} argument. It names the states (in this +case \texttt{DM}) that will be over-written by \texttt{new.state} (in +this case \texttt{Ins}), while the state \texttt{Dead} should not be +updated even if it is after the time where the persons moves to state +\texttt{Ins}. In other words, only state \texttt{DM} is a precursor to +state \texttt{Ins}, state \texttt{Dead} is always subsequent to state +\texttt{Ins}. + +Note that we defined a new time scale, \texttt{tfI}, using the argument +\texttt{new.scale=tfI}. This has a special status relative to the other +three time scales, it is defined as time since entry into a state, +namely \texttt{Ins}, this is noted in the time scale part of the +summary of \texttt{Lexis} object --- the information sits in the +attribute \texttt{time.since} of the \texttt{Lexis} object, which can +be accessed by the function \texttt{timeSince()} or through the \texttt{summary()}: +\begin{Schunk} +\begin{Sinput} +> summary( dmS2C, timeScales=TRUE ) +\end{Sinput} +\begin{Soutput} +Transitions: + To +From DM Ins Dead Records: Events: Risk time: Persons: + DM 35135 1694 2048 38877 3742 45885.49 9899 + Ins 0 5762 451 6213 451 8387.77 1791 + Sum 35135 7456 2499 45090 4193 54273.27 9996 + +Timescales: + per age tfD tfI + "" "" "" "Ins" +\end{Soutput} +\end{Schunk} +Finally we can get a quick overview of the states and transitions by +using \texttt{boxes} --- \texttt{scale.R} scales transition rates to +rates per 1000 PY: +\begin{Schunk} +\begin{Sinput} +> boxes( dmC, boxpos=TRUE, scale.R=1000, show.BE=TRUE ) +\end{Sinput} +\end{Schunk} +\insfig{box1}{0.8}{States, person years, transitions and rates in the + cut dataset. The numbers \emph{in} the boxes are person-years and + the number of persons \texttt{B}eginning, resp. \texttt{E}nding + their follow-up in each state (triggered by \textrm{\tt + show.BE=TRUE}). The numbers at the arrows are the number of + transitions and transition rates per 1000 (triggered by \textrm{\tt + scale.R=1000}).} + +\chapter{Modeling rates from \texttt{Lexis} objects} + +\section{Covariates} + +In the dataset \texttt{dmS2C} there are three types of covariates that +can be used to describe mortality rates: +\begin{enumerate} +\item time-dependent covariates +\item time scales +\item fixed covariates +\end{enumerate} + +There is only one time-dependent covariate here, namely +\texttt{lex.Cst}, the current state of the person's follow up; it +takes the values \texttt{DM} and \texttt{Ins} according to whether the +person has ever purchased insulin at a given time of follow-up. + +The time-scales are obvious candidates for explanatory variables for +the rates, notably age and time from diagnosis (duration of diabetes) +and insulin. + \subsection{Time scales as covariates} -If we want to model the effect of these timescale variables on +If we want to model the effect of the time scale variables on occurrence rates, we will for each interval use either the value of the left endpoint in each interval or the middle. There is a function \texttt{timeBand} which returns either of these: \begin{Schunk} \begin{Sinput} -> timeBand( nicM, "age", "middle" )[1:20] +> timeBand( dmS2C, "age", "middle" )[1:10] \end{Sinput} \begin{Soutput} - [1] 45 45 55 65 75 85 95 45 55 55 65 55 45 55 55 65 55 55 65 75 + [1] 57.5 57.5 62.5 62.5 62.5 67.5 67.5 62.5 67.5 67.5 \end{Soutput} \begin{Sinput} -> # For nice printing and column labelling use the data.frame() function: -> data.frame( nicS2[,c("lex.id","per","age","tfh","lex.dur")], -+ mid.age=timeBand( nicS2, "age", "middle" ), -+ mid.t=timeBand( nicS2, "tfh", "middle" ), -+ left.t=timeBand( nicS2, "tfh", "left" ), -+ right.t=timeBand( nicS2, "tfh", "right" ), -+ fact.t=timeBand( nicS2, "tfh", "factor" ) )[1:20,] -\end{Sinput} -\begin{Soutput} - lex.id per age tfh lex.dur mid.age mid.t left.t right.t fact.t -1 1 1934.246 45.2273 27.7465 2.2535 45 25 20 30 (20,30] -2 1 1936.500 47.4808 30.0000 2.5192 45 65 30 100 (30,100] -3 1 1939.019 50.0000 32.5192 10.0000 55 65 30 100 (30,100] -4 1 1949.019 60.0000 42.5192 10.0000 65 65 30 100 (30,100] -5 1 1959.019 70.0000 52.5192 10.0000 75 65 30 100 (30,100] -6 1 1969.019 80.0000 62.5192 10.0000 85 65 30 100 (30,100] -7 1 1979.019 90.0000 72.5192 2.9808 95 65 30 100 (30,100] -8 2 1934.246 48.2684 25.0820 1.7316 45 25 20 30 (20,30] -9 2 1935.978 50.0000 26.8136 3.1864 55 25 20 30 (20,30] -10 2 1939.164 53.1864 30.0000 6.8136 55 65 30 100 (30,100] -11 2 1945.978 60.0000 36.8136 3.2712 65 65 30 100 (30,100] -12 3 1934.246 52.9917 27.7465 1.1727 55 25 20 30 (20,30] -13 4 1934.246 47.9067 23.1861 2.0933 45 25 20 30 (20,30] -14 4 1936.340 50.0000 25.2794 4.7206 55 25 20 30 (20,30] -15 4 1941.060 54.7206 30.0000 5.2794 55 65 30 100 (30,100] -16 4 1946.340 60.0000 35.2794 9.6794 65 65 30 100 (30,100] -17 5 1934.246 54.7465 24.7890 5.2110 55 25 20 30 (20,30] -18 5 1939.457 59.9575 30.0000 0.0425 55 65 30 100 (30,100] -19 5 1939.500 60.0000 30.0425 10.0000 65 65 30 100 (30,100] -20 5 1949.500 70.0000 40.0425 6.8442 75 65 30 100 (30,100] -\end{Soutput} -\end{Schunk} -Note that these are characteristics of the intervals defined by -\texttt{breaks=}, \emph{not} the midpoints nor left or right endpoints -of the actual follow-up intervals (which would be \texttt{tfh} and -\texttt{tfh+lex.dur}, respectively). +> # For nice printing and column labelling we use the data.frame() function: +> data.frame( dmS2C[,c("per","age","tfD","lex.dur")], ++ mid.age=timeBand( dmS2C, "age", "middle" ), ++ mid.t=timeBand( dmS2C, "tfD", "middle" ), ++ left.t=timeBand( dmS2C, "tfD", "left" ), ++ right.t=timeBand( dmS2C, "tfD", "right" ), ++ fact.t=timeBand( dmS2C, "tfD", "factor" ) )[1:15,] +\end{Sinput} +\begin{Soutput} + per age tfD lex.dur mid.age mid.t left.t right.t fact.t +1 1998.917 58.66119 0.0000000 1.00000000 57.5 0.5 0 1 (0,1] +2 1999.917 59.66119 1.0000000 0.33880903 57.5 1.5 1 2 (1,2] +3 2000.256 60.00000 1.3388090 0.66119097 62.5 1.5 1 2 (1,2] +4 2000.917 60.66119 2.0000000 3.00000000 62.5 3.5 2 5 (2,5] +5 2003.917 63.66119 5.0000000 1.33880903 62.5 7.5 5 10 (5,10] +6 2005.256 65.00000 6.3388090 3.66119097 67.5 7.5 5 10 (5,10] +7 2008.917 68.66119 10.0000000 1.08008214 67.5 15.0 10 20 (10,20] +8 2003.309 64.09035 0.0000000 0.90965092 62.5 0.5 0 1 (0,1] +9 2004.218 65.00000 0.9096509 0.09034908 67.5 0.5 0 1 (0,1] +10 2004.309 65.09035 1.0000000 1.00000000 67.5 1.5 1 2 (1,2] +11 2005.309 66.09035 2.0000000 3.00000000 67.5 3.5 2 5 (2,5] +12 2008.309 69.09035 5.0000000 0.90965092 67.5 7.5 5 10 (5,10] +13 2009.218 70.00000 5.9096509 0.77891855 72.5 7.5 5 10 (5,10] +14 2004.552 86.25051 0.0000000 1.00000000 87.5 0.5 0 1 (0,1] +15 2005.552 87.25051 1.0000000 1.00000000 87.5 1.5 1 2 (1,2] +\end{Soutput} +\end{Schunk} +Note that the values of these functions are characteristics of the +intervals defined by \texttt{breaks=}, \emph{not} the midpoints nor +left or right endpoints of the actual follow-up intervals (which would +be \texttt{tfD} and \texttt{tfD+lex.dur}, respectively). -These functions are intended for modeling timescale variables as +These functions are intended for modeling time scale variables as factors (categorical variables) in which case the coding must be independent of the censoring and mortality pattern --- it should only -depend on the chosen grouping of the timescale. Modeling timescales as +depend on the chosen grouping of the time scale. Modeling time scales as \emph{quantitative} should not be based on these codings but directly -on the values of the time-scale variables. +on the values of the time-scale variables, notably the left endpoints +of the intervals. \subsection{Differences between time scales} -The midpoint (as well as the left and right interval endpoint) should -be used with caution if the variable \texttt{age1st} is modeled too; -the age at hire is logically equal to the difference between current -age (\texttt{age}) and time since hire (\texttt{thf}): +Apparently, the only fixed variable is \texttt{sex}, but formally the +dates of birth (\texttt{dobth}), diagnosis (\texttt{dodm}) and first +insulin purchase (\texttt{doins}) are fixed covariates too. They can +be constructed as origins of time scales referred to the calendar time +scale. Likewise, and possibly of greater interest, we can consider +these origins on the age scale, calculated as the difference between +age and another time scale. + +These would then be age at birth (hardly relevant), age at diabetes +diagnosis and age at insulin treatment. + +\subsection{Keeping the relation between time scales} + +The midpoint (as well as the right interval endpoint) should be used +with caution if the variable age at diagnosis \texttt{dodm-dobth} is +modeled too; the age at diabetes is logically equal to the difference +between current age (\texttt{age}) and time since diabetes diagnosis +(\texttt{tfD}): \begin{Schunk} \begin{Sinput} -> summary( (nicS2$age-nicS2$tfh) - nicS2$age1st ) +> summary( (dmS2$age-dmS2$tfD) - (dmS2$dodm-dmS2$dobth) ) \end{Sinput} \begin{Soutput} - Min. 1st Qu. Median Mean 3rd Qu. Max. --7.105e-15 0.000e+00 0.000e+00 2.214e-17 0.000e+00 7.105e-15 + Min. 1st Qu. Median Mean 3rd Qu. Max. + 0 0 0 0 0 0 \end{Soutput} \end{Schunk} -This calculation refer to the \emph{start} of each interval --- the -time scale variables in the \texttt{Lexis} object. But when using the -middle of the intervals, this relationship is not preserved: +This calculation refers to the \emph{start} of each interval --- which +are in the time scale variables in the \texttt{Lexis} object. But when +using the middle of the intervals, this relationship is not preserved: \begin{Schunk} \begin{Sinput} -> summary( timeBand( nicS2, "age", "middle" ) - -+ timeBand( nicS2, "tfh", "middle" ) - nicS2$age1st ) +> summary( timeBand( dmS2, "age", "middle" ) - ++ timeBand( dmS2, "tfD", "middle" ) - (dmS2$dodm-dmS2$dobth) ) \end{Sinput} \begin{Soutput} Min. 1st Qu. Median Mean 3rd Qu. Max. --39.958 -24.178 -5.103 -10.129 2.575 12.519 +-7.4870 -2.0862 -0.3765 Inf 1.3641 Inf \end{Soutput} \end{Schunk} -If all three variable are to be included in a model, you must make +If all three variables are to be included in a model, we must make sure that the \emph{substantial} relationship between the variables be -maintained. One way is to recompute age at first hire from the two +maintained. One way is to recompute age at diabetes diagnosis from the two midpoint variables, but more straightforward would be to use the left endpoint of the intervals, that is the time scale variables in the -\texttt{Lexis} object. The latter approach however requires that the -follow-up is split in fairly small chunks. +\texttt{Lexis} object. -\section{Cutting follow up time at a specific date} - -If we have a recording of the date of a specific event as for example -recovery or relapse, we may classify follow-up time as being before or -after this intermediate event, but it requires that follow-up records -that straddle the event be cut into two record. This is achieved with -the function \texttt{cutLexis}, which takes three arguments: the time -point, the timescale, and the value of the (new) state following the -date. - -Now we define the age for the nickel workers where the cumulative -exposure exceeds 50 exposure years: -\begin{Schunk} -\begin{Sinput} -> subset( nicL, id %in% 8:10 ) -\end{Sinput} -\begin{Soutput} - per age tfh lex.dur lex.Cst lex.Xst lex.id id icd exposure dob -4 1934.246 47.9067 23.1861 21.7727 0 0 4 8 527 9 1886.340 -5 1934.246 54.7465 24.7890 22.0977 0 0 5 9 150 0 1879.500 -6 1934.246 44.3314 23.0437 18.2099 0 1 6 10 163 2 1889.915 - age1st agein ageout -4 24.7206 47.9067 69.6794 -5 29.9575 54.7465 76.8442 -6 21.2877 44.3314 62.5413 -\end{Soutput} -\begin{Sinput} -> agehi <- nicL$age1st + 50 / nicL$exposure -> nicC <- cutLexis( data = nicL, -+ cut = agehi, -+ timescale = "age", -+ new.state = 2, -+ precursor.states = 0 ) -> subset( nicC, id %in% 8:10 ) -\end{Sinput} -\begin{Soutput} - per age tfh lex.dur lex.Cst lex.Xst lex.id id icd exposure dob -683 1934.246 47.9067 23.1861 21.7727 2 2 4 8 527 9 1886.340 -5 1934.246 54.7465 24.7890 22.0977 0 0 5 9 150 0 1879.500 -6 1934.246 44.3314 23.0437 1.9563 0 2 6 10 163 2 1889.915 -685 1936.203 46.2877 25.0000 16.2536 2 1 6 10 163 2 1889.915 - age1st agein ageout -683 24.7206 47.9067 69.6794 -5 29.9575 54.7465 76.8442 -6 21.2877 44.3314 62.5413 -685 21.2877 44.3314 62.5413 -\end{Soutput} -\end{Schunk} -(The \texttt{precursor.states=} argument is explained below). Note -that individual 6 has had his follow-up split at 25 years since hire -where 50 exposure-years were attained. This could also have been -achieved in the split dataset \texttt{nicS2} instead of \texttt{nicL}, -try: -\begin{Schunk} -\begin{Sinput} -> subset( nicS2, id %in% 8:10 ) -\end{Sinput} -\begin{Soutput} - lex.id per age tfh lex.dur lex.Cst lex.Xst id icd exposure dob -13 4 1934.246 47.9067 23.1861 2.0933 0 0 8 527 9 1886.340 -14 4 1936.340 50.0000 25.2794 4.7206 0 0 8 527 9 1886.340 -15 4 1941.060 54.7206 30.0000 5.2794 0 0 8 527 9 1886.340 -16 4 1946.340 60.0000 35.2794 9.6794 0 0 8 527 9 1886.340 -17 5 1934.246 54.7465 24.7890 5.2110 0 0 9 150 0 1879.500 -18 5 1939.457 59.9575 30.0000 0.0425 0 0 9 150 0 1879.500 -19 5 1939.500 60.0000 30.0425 10.0000 0 0 9 150 0 1879.500 -20 5 1949.500 70.0000 40.0425 6.8442 0 0 9 150 0 1879.500 -21 6 1934.246 44.3314 23.0437 5.6686 0 0 10 163 2 1889.915 -22 6 1939.915 50.0000 28.7123 1.2877 0 0 10 163 2 1889.915 -23 6 1941.203 51.2877 30.0000 8.7123 0 0 10 163 2 1889.915 -24 6 1949.915 60.0000 38.7123 2.5413 0 1 10 163 2 1889.915 - age1st agein ageout -13 24.7206 47.9067 69.6794 -14 24.7206 47.9067 69.6794 -15 24.7206 47.9067 69.6794 -16 24.7206 47.9067 69.6794 -17 29.9575 54.7465 76.8442 -18 29.9575 54.7465 76.8442 -19 29.9575 54.7465 76.8442 -20 29.9575 54.7465 76.8442 -21 21.2877 44.3314 62.5413 -22 21.2877 44.3314 62.5413 -23 21.2877 44.3314 62.5413 -24 21.2877 44.3314 62.5413 -\end{Soutput} -\begin{Sinput} -> agehi <- nicS2$age1st + 50 / nicS2$exposure -> nicS2C <- cutLexis( data = nicS2, -+ cut = agehi, -+ timescale = "age", -+ new.state = 2, -+ precursor.states = 0 ) -> subset( nicS2C, id %in% 8:10 ) -\end{Sinput} -\begin{Soutput} - lex.id per age tfh lex.dur lex.Cst lex.Xst id icd exposure dob -3142 4 1934.246 47.9067 23.1861 2.0933 2 2 8 527 9 1886.340 -3143 4 1936.340 50.0000 25.2794 4.7206 2 2 8 527 9 1886.340 -3144 4 1941.060 54.7206 30.0000 5.2794 2 2 8 527 9 1886.340 -3145 4 1946.340 60.0000 35.2794 9.6794 2 2 8 527 9 1886.340 -17 5 1934.246 54.7465 24.7890 5.2110 0 0 9 150 0 1879.500 -18 5 1939.457 59.9575 30.0000 0.0425 0 0 9 150 0 1879.500 -19 5 1939.500 60.0000 30.0425 10.0000 0 0 9 150 0 1879.500 -20 5 1949.500 70.0000 40.0425 6.8442 0 0 9 150 0 1879.500 -21 6 1934.246 44.3314 23.0437 1.9563 0 2 10 163 2 1889.915 -3150 6 1936.203 46.2877 25.0000 3.7123 2 2 10 163 2 1889.915 -3151 6 1939.915 50.0000 28.7123 1.2877 2 2 10 163 2 1889.915 -3152 6 1941.203 51.2877 30.0000 8.7123 2 2 10 163 2 1889.915 -3153 6 1949.915 60.0000 38.7123 2.5413 2 1 10 163 2 1889.915 - age1st agein ageout -3142 24.7206 47.9067 69.6794 -3143 24.7206 47.9067 69.6794 -3144 24.7206 47.9067 69.6794 -3145 24.7206 47.9067 69.6794 -17 29.9575 54.7465 76.8442 -18 29.9575 54.7465 76.8442 -19 29.9575 54.7465 76.8442 -20 29.9575 54.7465 76.8442 -21 21.2877 44.3314 62.5413 -3150 21.2877 44.3314 62.5413 -3151 21.2877 44.3314 62.5413 -3152 21.2877 44.3314 62.5413 -3153 21.2877 44.3314 62.5413 -\end{Soutput} -\end{Schunk} -The same results would have emerged if we had used the \texttt{nicM} -dataset (the \texttt{data.table} object). Mathematicians would say -that \texttt{splitLexis} and \texttt{cutLexis} are commutative. - -Note that follow-up subsequent to the event is classified as being in -state 2, but that the final transition to state 1 (death from lung -cancer) is preserved. This is the point of the -\texttt{precursor.states=} argument. It names the states (in this case -0, ``Alive'') that will be over-written by \texttt{new.state} (in this -case state 2, ``High exposure''), while state 1 (``Dead'') should not -be updated even if it is after the time where the persons moves to -state 2. In other words, only state 0 is a precursor to state 2, state -1 is always subsequent to state 2. Even if you at a high exposure -level, death is pretty final. - -If the intermediate event is to be used as a time-dependent variable -in a Cox-model, then \texttt{lex.Cst} should be used as the -time-dependent variable, and \texttt{lex.Xst==1} as the event. - -\chapter{Modeling rates} - -\section{Background} - -The purpose of subdividing follow-up data is to be able to model the -effects of the time scale variables as parametric functions. +If we dissolve the relationship between the variables \texttt{age}, +\texttt{tfD} and age at diagnosis by grouping we may obtain +identifiability of the three separate effects, but it will be at the +price of an arbitrary allocation of a linear trend between them. + +For the sake of clarity, consider current age, $a$, duration of +disease, $d$ and age at diagnosis $e$, where +\[ + \text{current age} = + \text{age at diagnosis} + + \text{disease duration}, + \quad \text{\ie} \quad a=e+d \quad + \Leftrightarrow \quad e+d-a=0 +\] +If we model the effect of the quantitative variables $a$, $e$ and $d$ +on the log-rates by three functions $f$, $g$ and $h$: +$ \log(\lambda)=f(a)+g(d)+h(e) $ +then for any $\kappa$: +\begin{align*} + \log(\lambda) & = f(a)+g(d)+h(e)+\kappa(e+d-a)\\ + & = + \big(f(a)-\kappa a \big)+ + \big(g(d)+\kappa d \big)+ + \big(h(e)+\kappa e \big) \\ +& = \tilde f(a)+ \tilde g(d)+ \tilde h(e) +\end{align*} +In practical modeling this will turn op as a singular model matrix +with one parameter aliased, corresponding to some arbitrarily chosen +value of $\kappa$ (depending on software conventions for singular +models). This is well known from age-period-cohort models. + +Thus we see that we can move any slope around between the three terms, +so if we achieve identifiability by using grouping of one of the +variables we will in reality have settled for a particular value of +$\kappa$, without known why we chose just that. The solution is to +resort to predictions which are independent of the particular +parametrization or choose a particular parametrization with explicit constraints. + +\section{Modeling of rates} + +As mentioned, the purpose of subdividing follow-up data in smaller +intervals is to be able to model effects of time scale variables as +parametric functions. When we split along a time scale we can get +intervals that are as small as we want; if they are sufficiently +small, an assumption of constant rates in each interval becomes +reasonable. In a model that assumes a constant occurrence rate in each of the intervals the likelihood contribution from each interval is the same as the likelihood contribution from a Poisson variate $D$, say, with -mean $\lambda\ell$ where $\lambda$ is the rate and $\ell$ is the +mean $\lambda \ell$ where $\lambda$ is the rate and $\ell$ is the interval length, and where the value of the variate $D$ is 1 or 0 according to whether an event has occurred or not. Moreover, the likelihood contributions from all follow-up intervals from a single @@ -688,652 +862,1218 @@ survived till the start of the interval in question). This implies that the total contribution to the likelihood from a single person is a product of terms, and hence the same as the likelihood of a number -of independent Poisson terms, one from each interval. +of independent Poisson terms, one from each interval. + +Note that variables are neither Poisson distributed (\eg they can only +ever assume values 0 or 1) nor independent --- it is only the +likelihood for the follow-up data that happens to be the same as the +likelihood from independent Poisson variates. Different models can +have the same likelihood, a model cannot be inferred from the +likelihood. + +Parametric modeling of the rates is obtained by using the +\emph{values} of the time scales for each interval as \emph{quantitative} +explanatory variables, using for example splines. And of course also +the values of the fixed covariates and the time-dependent variables +for each interval. Thus the model will be one where the rate is +assumed constant in each (small) interval, but where a parametric form +of the \emph{size} of the rate in each interval is imposed by the +model, using the time scale as a quantitative covariate. + +\subsection{Interval length} + +In the first chapter we illustrated cutting and splitting by listing +the results for a few individuals across a number of intervals. For +illustrational purposes we used 5-year age bands to avoid excessive +listings, but since the doubling time for mortality on the age scale +is only slightly larger than 5 years, the assumption about constant +rates in each interval would be pretty far fetched if we were to use 5 +year intervals. + +Thus, for modeling purposes we split the follow-up in 3 month +intervals. When we use intervals of 3 month's length it is superfluous +to split along multiple time scales --- the precise location of +tightly spaced splits will be irrelevant from any practical point of +view. \texttt{splitLexis} and \texttt{splitMulti} will allocate the +actual split times for all of the time scale variables, so these can be +used directly in modeling. + +So we split the cut dataset in 3-month intervals along the age +scale: +\begin{Schunk} +\begin{Sinput} +> dmCs <- splitMulti( dmC, age = seq(0,110,1/4) ) +> summary( dmCs, t=T ) +\end{Sinput} +\begin{Soutput} +Transitions: + To +From DM Ins Dead Records: Events: Risk time: Persons: + DM 189669 1694 2048 193411 3742 45885.49 9899 + Ins 0 34886 451 35337 451 8387.77 1791 + Sum 189669 36580 2499 228748 4193 54273.27 9996 -Parametric modeling of the rates is obtained by using the \emph{value} -of the timescale for each interval as quantitative explanatory -variables, using for example splines. Thus the model will be one where -the rate is assumed constant in each interval, but where a parametric -form of the \emph{size} of the rate in each interval is imposed by the -model, using the timescale as a covariate. - -\section{Practicalities} - -In the nickel worker study we might want to look at the effects of age -and time since hire. If we want to use splines we must allocate knots -for anchoring the splines at each of the time scales, either by some +Timescales: + per age tfD tfI + "" "" "" "Ins" +\end{Soutput} +\end{Schunk} +We see that we now have 228,748 records and 9996 persons, so about 23 +records per person. The total risk time is 54,275 years, a bit less +than 3 months per record as expected. + +\subsection{Practicalities for splines} + +In this study we want to look at how mortality depend on age +(\texttt{age}) and time since start of insulin use (\texttt{tfI}). If +we want to use splines in the description we must allocate knots for +anchoring the splines at each of the time scales, either by some \textit{ad hoc} method or by using some sort of penalized splines. The -letter will not be treated here. +latter will not be treated here; it belongs in the realm of the +\texttt{mgcv} package. Here we shall use the former approach and allocate 5 knots on each of -the two time-scales. We allocate knots so that we have the event evenly -distributed between the knots: +the time-scales. We allocate knots so that we have the events evenly +distributed between the knots. Since the insulin state starts at 0 for +all individuals we include 0 as the first knot, such that any set of natural +splines with these knots will have the value 0 at 0 on the time +scale. \begin{Schunk} \begin{Sinput} -> ( a.kn <- with( subset( nicM, lex.Xst==1 ), quantile( age+lex.dur, (1:5-0.5)/5 ) ) ) +> ( a.kn <- with( subset( dmCs, lex.Xst=="Dead" ), ++ quantile( age+lex.dur, (1:5-0.5)/5 ) ) ) \end{Sinput} \begin{Soutput} 10% 30% 50% 70% 90% -50.11874 55.61674 61.09590 64.88704 73.32220 +60.29350 71.31937 77.72758 82.72745 89.86393 \end{Soutput} \begin{Sinput} -> ( t.kn <- with( subset( nicM, lex.Xst==1 ), quantile( tfh+lex.dur, (1:5-0.5)/5 ) ) ) +> ( i.kn <- c( 0, ++ with( subset( dmCs, lex.Xst=="Dead" & lex.Cst=="Ins" ), ++ quantile( tfI+lex.dur, (1:4)/5 ) ) ) ) \end{Sinput} \begin{Soutput} - 10% 30% 50% 70% 90% -24.25572 30.02202 34.00440 39.84592 45.95512 + 20% 40% 60% 80% +0.0000000 0.3093771 1.1307324 2.5489391 4.9117043 \end{Soutput} \end{Schunk} -In the \texttt{Epi} package there is a convenience wrapper for the -\texttt{n}atural \texttt{s}pline generator \texttt{ns}, \texttt{Ns}, -that takes the smallest and the largest of a set of supplied knots to -be the boundary knots. +In the \texttt{Epi} package there is a convenience wrapper, +\texttt{Ns}, for the \texttt{n}atural \texttt{s}pline generator +\texttt{ns}, that takes the smallest and the largest of a set of +supplied knots to be the boundary knots, so the explicit definition of +the boundary knots becomes superfluous. -\section{Models for rates} +Note that it is a feature of the \texttt{Ns} (via the features of +\texttt{ns}) that any generated spline function is 0 at the leftmost +knot. -\subsection{One time scale} +\subsection{Poisson model} -A model that only models lung cancer mortality -rates as a function of age would then be: +A model that describes mortality rates as only a function of age would +then be: \begin{Schunk} \begin{Sinput} -> ma <- glm( (lex.Xst==1) ~ Ns(age,knots=a.kn), -+ family = poisson, -+ offset = log(lex.dur), -+ data = nicM ) +> ma <- glm( (lex.Xst=="Dead") ~ Ns(age,knots=a.kn), ++ family = poisson, ++ offset = log(lex.dur), ++ data = dmCs ) > summary( ma ) \end{Sinput} \begin{Soutput} Call: -glm(formula = (lex.Xst == 1) ~ Ns(age, knots = a.kn), family = poisson, - data = nicM, offset = log(lex.dur)) +glm(formula = (lex.Xst == "Dead") ~ Ns(age, knots = a.kn), family = poisson, + data = dmCs, offset = log(lex.dur)) Deviance Residuals: Min 1Q Median 3Q Max --0.5074 -0.3896 -0.2143 -0.1203 3.7904 +-0.5883 -0.1688 -0.1144 -0.0766 4.5958 Coefficients: Estimate Std. Error z value Pr(>|z|) -(Intercept) -4.6591 0.1324 -35.199 < 2e-16 -Ns(age, knots = a.kn)1 0.1671 0.2970 0.563 0.57371 -Ns(age, knots = a.kn)2 -0.1315 0.3727 -0.353 0.72411 -Ns(age, knots = a.kn)3 0.7827 0.2885 2.713 0.00667 -Ns(age, knots = a.kn)4 -0.3717 0.2780 -1.337 0.18125 +(Intercept) -3.82830 0.03861 -99.16 <2e-16 +Ns(age, knots = a.kn)1 1.36254 0.08723 15.62 <2e-16 +Ns(age, knots = a.kn)2 1.49446 0.06845 21.83 <2e-16 +Ns(age, knots = a.kn)3 2.63557 0.07058 37.34 <2e-16 +Ns(age, knots = a.kn)4 1.94173 0.05769 33.66 <2e-16 (Dispersion parameter for poisson family taken to be 1) - Null deviance: 1024.38 on 3128 degrees of freedom -Residual deviance: 979.16 on 3124 degrees of freedom -AIC: 1263.2 + Null deviance: 27719 on 228747 degrees of freedom +Residual deviance: 25423 on 228743 degrees of freedom +AIC: 30431 -Number of Fisher Scoring iterations: 7 +Number of Fisher Scoring iterations: 8 \end{Soutput} \end{Schunk} The offset, \texttt{log(lex.dur)} comes from the fact that the -likelihood for the follow-up data is the same as that for independent -Poisson variates with mean $\lambda\ell$, and that the default link -function for the Poisson family is the log, so that we are using a -linear model for the log-mean, that is $\log(\lambda) + \log(\ell)$. -But when we want a model for the log-rate ($\log(\lambda)$), the term -$\log(\ell)$ must be included as a covariate with regression -coefficient fixed to 1; a so-called offset. - -The parameters from the model do not have any direct interpretation -\textit{per se}, but we can compute the estimated lung cancer incidence -rates for a range of ages using \texttt{ci.pred} with a suitably -defined prediction data frame. Note that we must specify all -covariates in the model, also the variable in the offset, -\texttt{lex.dur}. We set the latter to 1000, because we want the lung -cancer mortality rates per 1000 PY. By default \texttt{ci.pred} yields a -prediction on the response-scale, that is the rate-scale: -\begin{Schunk} -\begin{Sinput} -> nd <- data.frame( age=40:85, lex.dur=1000 ) -> pr.a <- ci.pred( ma, newdata = nd ) -> matplot( nd$age, pr.a, -+ type="l", lty=1, col=1, lwd=c(3,1,1), -+ log="y", xlab="Age (years)", -+ ylab="Lunng cancer mortality per 1000 PY") -\end{Sinput} -\end{Schunk} -\insfig{pr-a}{0.8}{Lung cancer mortality among Nickel smelter workers - by age. We see that the rates increase till about 55 years, and from -then on is approximately flat.} - -\subsection{More time scales} - -There may however also be an effect of time since hire too, so we can -add this term to the model: -\begin{Schunk} -\begin{Sinput} -> mat <- update( ma, . ~ . + Ns(tfh,knots=t.kn) ) -> summary( mat ) +likelihood for the follow-up data during $\ell$ time is the same as +that for independent Poisson variates with mean $\lambda \ell$, and +that the default link function for the Poisson family is the log, so +that we are using a linear model for the log-mean, +$\log(\lambda) + \log(\ell)$. But when we want a model for the +log-rate ($\log(\lambda)$), the term $\log(\ell)$ must still be +included as a covariate, but with regression coefficient fixed to 1; a +so-called \emph{offset}. This is however a technicality; it just +exploits that the likelihood of a particular Poisson model and that of +the rates model is the same. + +In the \texttt{Epi} package is a \texttt{glm} family, \texttt{poisreg} +that has a more intuitive interface, where the response is a 2-column +matrix of events and person-time, respectively. This is in concert +with the fact that the outcome variable in follow-up studies is +bivariate: (event, risk time). +\begin{Schunk} +\begin{Sinput} +> Ma <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn), ++ family = poisreg, data = dmCs ) +> summary( Ma ) \end{Sinput} \begin{Soutput} Call: -glm(formula = (lex.Xst == 1) ~ Ns(age, knots = a.kn) + Ns(tfh, - knots = t.kn), family = poisson, data = nicM, offset = log(lex.dur)) +glm(formula = cbind(lex.Xst == "Dead", lex.dur) ~ Ns(age, knots = a.kn), + family = poisreg, data = dmCs) Deviance Residuals: Min 1Q Median 3Q Max --0.6308 -0.3730 -0.2170 -0.1180 3.8903 +-0.5883 -0.1688 -0.1144 -0.0766 4.5958 Coefficients: Estimate Std. Error z value Pr(>|z|) -(Intercept) -4.65125 0.14844 -31.335 <2e-16 -Ns(age, knots = a.kn)1 0.19029 0.32601 0.584 0.5594 -Ns(age, knots = a.kn)2 0.04239 0.40857 0.104 0.9174 -Ns(age, knots = a.kn)3 0.87848 0.37395 2.349 0.0188 -Ns(age, knots = a.kn)4 0.08124 0.37567 0.216 0.8288 -Ns(tfh, knots = t.kn)1 0.05961 0.45702 0.130 0.8962 -Ns(tfh, knots = t.kn)2 -0.30254 0.39214 -0.771 0.4404 -Ns(tfh, knots = t.kn)3 -0.08144 0.37493 -0.217 0.8281 -Ns(tfh, knots = t.kn)4 -0.63400 0.34055 -1.862 0.0626 +(Intercept) -3.82830 0.03861 -99.15 <2e-16 +Ns(age, knots = a.kn)1 1.36254 0.08723 15.62 <2e-16 +Ns(age, knots = a.kn)2 1.49446 0.06845 21.83 <2e-16 +Ns(age, knots = a.kn)3 2.63557 0.07058 37.34 <2e-16 +Ns(age, knots = a.kn)4 1.94173 0.05769 33.66 <2e-16 (Dispersion parameter for poisson family taken to be 1) - Null deviance: 1024.4 on 3128 degrees of freedom -Residual deviance: 970.7 on 3120 degrees of freedom -AIC: 1262.7 + Null deviance: 27719 on 228747 degrees of freedom +Residual deviance: 25423 on 228743 degrees of freedom +AIC: 30431 Number of Fisher Scoring iterations: 7 \end{Soutput} \end{Schunk} -This model has two time scales, age and time since hire, so it makes -little sense to report the effect of age for a \emph{fixed} value of -time since hire --- the time since hire increases by age. -Instead we can show the mortality rates for persons hired at different -ages, and report the \emph{joint} effect of increasing age and time -since hire. - -In order to get a feeling for the values that can be use we look at \texttt{age1st} +Exploiting the multistate structure in the \texttt{Lexis} object +there is a multistate convenience wrapper for \texttt{glm} with the +\texttt{poisreg} family, that just requires specification of the +transitions in terms of \texttt{from} and \texttt{to}. Although it is +called \texttt{glm.Lexis} it is \emph{not} an S3 method for +\texttt{Lexis} objects: \begin{Schunk} \begin{Sinput} -> summary( nickel$age1st ) +> Xa <- glm.Lexis( dmCs, from="DM", to="Dead", ++ formula = ~ Ns(age,knots=a.kn) ) \end{Sinput} \begin{Soutput} - Min. 1st Qu. Median Mean 3rd Qu. Max. - 10.78 21.80 26.16 26.74 30.63 52.19 +stats::glm Poisson analysis of Lexis object dmCs with log link: +Rates for the transition: DM->Dead +\end{Soutput} +\end{Schunk} +The result is a \texttt{glm} object but with an extra attribute, \texttt{Lexis}: +\begin{Schunk} +\begin{Sinput} +> attr( Xa, "Lexis" ) +\end{Sinput} +\begin{Soutput} +$data +[1] "dmCs" + +$trans +[1] "DM->Dead" + +$formula +~Ns(age, knots = a.kn) + + +$scale +[1] 1 \end{Soutput} \end{Schunk} -Thus we shall show mortality rates in ages 20--90 for persons hired in -ages 15, 25, 35 and 45: +There are similar wrappers for \texttt{gam} and \texttt{coxph} models, +\texttt{gam.Lexis} and \texttt{coxph.Lexis}, but these will not be +elaborated in detail. + +The \texttt{from=} and \texttt{to=} can even be omitted, in which case +all possible transitions \emph{into} any of the absorbing states is +modeled: \begin{Schunk} \begin{Sinput} -> nd <- data.frame( expand.grid( age=c(20:90,NA), age1st=seq(15,45,10) ) ) -> nd <- transform( nd, tfh = ifelse( age > age1st, age-age1st, NA ), -+ lex.dur = 1000 ) -> # makes no sense to have age < age1st -> nd <- transform( nd, age = ifelse( age > age1st, age, NA ) ) -> head( nd ) +> xa <- glm.Lexis( dmCs, formula = ~ Ns(age,knots=a.kn) ) \end{Sinput} \begin{Soutput} - age age1st tfh lex.dur -1 20 15 5 1000 -2 21 15 6 1000 -3 22 15 7 1000 -4 23 15 8 1000 -5 24 15 9 1000 -6 25 15 10 1000 +stats::glm Poisson analysis of Lexis object dmCs with log link: +Rates for transitions: DM->Dead, Ins->Dead \end{Soutput} \end{Schunk} -With this in place we can plot the estimated rates as before, only -now we will get 4 separate lines. The purpose of inserting an -\texttt{NA} on the age-scale in the \texttt{expand.grid} is that the -different instances of \texttt{age1st} be separated by \texttt{NA}s, and -hence will not be connected when we plot the curves. The downside of -this trick is that lines cannot be plotted with different colors or -symbols. +We can check if the four models fitted are the same: \begin{Schunk} \begin{Sinput} -> pr.at <- ci.pred( mat, newdata = nd ) -> matplot( nd$age, pr.at, -+ type="l", lty=1, col=1, lwd=c(3,1,1), -+ log="y", xlab="Age (years)", -+ ylab="Lunng cancer mortality per 1000 PY") +> c( deviance(ma), deviance(Ma), deviance(Xa), deviance(xa) ) \end{Sinput} +\begin{Soutput} +[1] 25422.92 25422.92 20902.31 25422.92 +\end{Soutput} \end{Schunk} -\insfig{pr-at}{0.8}{Lung cancer mortality among Nickel smelter workers - by age and age at hire 15, 25,35 and 45. Each line (except the - first) starts at the age of hire; we see that the later in life you - are hired, the smaller the initial risk, but the higher the eventual - risk of lung cancer death.} +Oops! the model \texttt{Xa} is apparently not the same as the other +three? This is because the explicit specification +\verb|from="DM", to="Dead"|, omits modeling contributions from the +$\mathtt{Ins}\rightarrow\mathtt{Dead}$ transition --- the output +actually said so. The other three models all use both transitions --- +and assume that the two transition rates are the same, \ie that start +of insulin has no effect on mortality. We shall relax this assumption +later. -We can check whether the effect of time since hire is actually -improving the model: +The parameters from the model do not have any direct interpretation +\textit{per se}, but we can compute the estimated mortality rates for +a range of ages using \texttt{ci.pred} with a suitably defined +prediction data frame. + +Note that if we use the \texttt{poisson} family of models, we must +specify all covariates in the model, including the variable in the +offset, \texttt{lex.dur} (remember that this was a covariate with +coefficient fixed at 1). We set the latter to 1000, because we want the +mortality rates per 1000 person-years. Using the \texttt{poisreg} +family, the prediction will ignore any value of \texttt{lex.dur} +specified in the prediction data frame, the returned rates will be per +unit in which \texttt{lex.dur} is recorded. \begin{Schunk} \begin{Sinput} -> anova( ma, mat, test="Chisq" ) +> nd <- data.frame( age=40:85, lex.dur=1000 ) +> pr.0 <- ci.pred( ma, newdata = nd ) # mortality per 100 PY +> pr.a <- ci.pred( Ma, newdata = nd )*1000 # mortality per 100 PY +> summary(pr.0/pr.a) \end{Sinput} \begin{Soutput} -Analysis of Deviance Table + Estimate 2.5% 97.5% + Min. :1 Min. :1 Min. :1 + 1st Qu.:1 1st Qu.:1 1st Qu.:1 + Median :1 Median :1 Median :1 + Mean :1 Mean :1 Mean :1 + 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1 + Max. :1 Max. :1 Max. :1 +\end{Soutput} +\begin{Sinput} +> matshade( nd$age, pr.a, plot=TRUE, ++ type="l", lty=1, ++ log="y", xlab="Age (years)", ++ ylab="DM mortality per 1000 PY") +\end{Sinput} +\end{Schunk} +\insfig{pr-a}{0.8}{Mortality among Danish diabetes patients by age + with 95\% CI as shaded area. We see that the rates increase linearly + on the log-scale, that is exponentially by age.} -Model 1: (lex.Xst == 1) ~ Ns(age, knots = a.kn) -Model 2: (lex.Xst == 1) ~ Ns(age, knots = a.kn) + Ns(tfh, knots = t.kn) - Resid. Df Resid. Dev Df Deviance Pr(>Chi) -1 3124 979.16 -2 3120 970.70 4 8.4626 0.07603 +\section{Time dependent covariate} + +A Poisson model approach to mortality by insulin status, would be to +assume that the rate-ratio between patients on insulin and not on +insulin is a fixed quantity, independent of time since start of insulin, +independent of age. This is commonly termed a proportional hazards +assumption, because the rates (hazards) in the two groups are +proportional along the age (baseline time) scale. +\begin{Schunk} +\begin{Sinput} +> pm <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn) ++ + lex.Cst + sex, ++ family=poisreg, data = dmCs ) +> round( ci.exp( pm ), 3 ) +\end{Sinput} +\begin{Soutput} + exp(Est.) 2.5% 97.5% +(Intercept) 0.022 0.021 0.024 +Ns(age, knots = a.kn)1 4.248 3.581 5.040 +Ns(age, knots = a.kn)2 5.008 4.376 5.731 +Ns(age, knots = a.kn)3 16.832 14.624 19.373 +Ns(age, knots = a.kn)4 7.994 7.126 8.968 +lex.CstIns 1.985 1.791 2.200 +sexF 0.668 0.617 0.724 \end{Soutput} \end{Schunk} -We see a pretty strong indication that this is the case. +So we see that persons on insulin have about twice the mortality of +persons not on insulin and that women have 2/3 the mortality of men. -\subsection{Difference between time scales} +\subsection{Time since insulin start} -However it might be the case that it really is the age at first hire -that is the main determinant (recall that -$\mathtt{age}-\mathtt{thf}=\mathtt{age1st}$), so we could fit a model -with this variable instead --- a model with only 1 timescale, namely -\texttt{age}. +If we want to test whether the excess mortality depends on the time +since start if insulin treatment, we can add a spline terms in +\texttt{tfI}. But since \texttt{tfI} is a time scale defined as time +since entry into a new state (\texttt{Ins}), the variable \texttt{tfI} +will be missing for those in the \texttt{DM} state, so before modeling +we must set the \texttt{NA}s to 0, which we do with \texttt{tsNA20} +(acronym for \texttt{t}ime\texttt{s}cale \texttt{NA}s to zero): \begin{Schunk} \begin{Sinput} -> ( f.kn <- with( subset( nicM, lex.Xst==1 ), quantile( age1st, (1:5-0.5)/5 ) ) ) +> pm <- glm( cbind(lex.Xst=="Dead",lex.dur) ~ Ns(age,knots=a.kn) ++ + Ns(tfI,knots=i.kn) ++ + lex.Cst + sex, ++ family=poisreg, data = tsNA20(dmCs) ) +\end{Sinput} +\end{Schunk} +As noted before we could do this simpler with \texttt{glm.Lexis}, even +without the \texttt{from=} and \texttt{to=} arguments, because we are +modeling all transitions \emph{into} the absorbing state +(\texttt{Dead}): +\begin{Schunk} +\begin{Sinput} +> Pm <- glm.Lexis( tsNA20(dmCs), ++ form = ~ Ns(age,knots=a.kn) ++ + Ns(tfI,knots=i.kn) ++ + lex.Cst + sex ) \end{Sinput} \begin{Soutput} - 10% 30% 50% 70% 90% -20.25860 22.55422 26.00000 28.36578 33.96910 +stats::glm Poisson analysis of Lexis object tsNA20(dmCs) with log link: +Rates for transitions: DM->Dead, Ins->Dead \end{Soutput} \begin{Sinput} -> maf <- update( ma, . ~ . + Ns(age1st,knots=f.kn) ) -> summary( maf ) +> c( deviance(Pm), deviance(pm) ) \end{Sinput} \begin{Soutput} -Call: -glm(formula = (lex.Xst == 1) ~ Ns(age, knots = a.kn) + Ns(age1st, - knots = f.kn), family = poisson, data = nicM, offset = log(lex.dur)) - -Deviance Residuals: - Min 1Q Median 3Q Max --0.5696 -0.3671 -0.2257 -0.1197 3.7777 - -Coefficients: - Estimate Std. Error z value Pr(>|z|) -(Intercept) -4.62646 0.17564 -26.340 < 2e-16 -Ns(age, knots = a.kn)1 0.21589 0.29742 0.726 0.46792 -Ns(age, knots = a.kn)2 -0.06427 0.37653 -0.171 0.86446 -Ns(age, knots = a.kn)3 0.79456 0.29345 2.708 0.00678 -Ns(age, knots = a.kn)4 -0.31305 0.27976 -1.119 0.26314 -Ns(age1st, knots = f.kn)1 -0.15145 0.38279 -0.396 0.69237 -Ns(age1st, knots = f.kn)2 0.04607 0.27980 0.165 0.86923 -Ns(age1st, knots = f.kn)3 0.26374 0.26156 1.008 0.31331 -Ns(age1st, knots = f.kn)4 -0.22878 0.23117 -0.990 0.32234 +[1] 25096.33 25096.33 +\end{Soutput} +\begin{Sinput} +> identical( model.matrix(Pm), model.matrix(pm) ) +\end{Sinput} +\begin{Soutput} +[1] TRUE +\end{Soutput} +\end{Schunk} +The coding of the effect of \texttt{tfI} is so that the value is 0 at +0, so the meaning of the estimate of the effect of \texttt{lex.Cst} is +the RR between persons with and without insulin, immediately after +start of insulin: +\begin{Schunk} +\begin{Sinput} +> round( ci.exp( Pm, subset="ex" ), 3 ) +\end{Sinput} +\begin{Soutput} + exp(Est.) 2.5% 97.5% +lex.CstIns 5.632 4.430 7.16 +sexF 0.674 0.622 0.73 +\end{Soutput} +\end{Schunk} +We see that the effect of sex is pretty much the same as before, but +the effect of \texttt{lex.Cst} is much larger, it now refers to a +different quantity, namely the RR at \texttt{tfI}=0. If we want to see +the effect of time since insulin, it is best viewed jointly with the +effect of age: +\begin{Schunk} +\begin{Sinput} +> ndI <- data.frame( expand.grid( tfI=c(NA,seq(0,15,0.1)), ++ ai=seq(40,80,10) ), ++ sex="M", ++ lex.Cst="Ins" ) +> ndI <- transform( ndI, age=ai+tfI ) +> head( ndI ) +\end{Sinput} +\begin{Soutput} + tfI ai sex lex.Cst age +1 NA 40 M Ins NA +2 0.0 40 M Ins 40.0 +3 0.1 40 M Ins 40.1 +4 0.2 40 M Ins 40.2 +5 0.3 40 M Ins 40.3 +6 0.4 40 M Ins 40.4 +\end{Soutput} +\begin{Sinput} +> ndA <- data.frame( age= seq(40,100,0.1), tfI=0, lex.Cst="DM", sex="M" ) +> pri <- ci.pred( Pm, ndI ) * 1000 +> pra <- ci.pred( Pm, ndA ) * 1000 +> matshade( ndI$age, pri, plot=TRUE, las=1, ++ xlab="Age (years)", ylab="DM mortality per 1000 PY", ++ log="y", lty=1, col="blue" ) +> matshade( ndA$age, pra ) +\end{Sinput} +\end{Schunk} +\insfig{ins-time}{0.8}{Mortality rates of persons on insulin, starting +insulin at ages 40,50,\ldots,80 (blue), compared with persons not on +insulin (black curve). Shaded areas are 95\% CI.} -(Dispersion parameter for poisson family taken to be 1) +In figure \ref{fig:ins-time}, p. \pageref{fig:ins-time}, we see that +mortality is high just after insulin start, but falls by almost a +factor 3 during the first year. Also we see that there is a tendency +that mortality in a given age is smallest for those with the longest +duration of insulin use. - Null deviance: 1024.4 on 3128 degrees of freedom -Residual deviance: 973.2 on 3120 degrees of freedom -AIC: 1265.2 +\section{The Cox model} -Number of Fisher Scoring iterations: 7 +Note that in the Cox-model the age is used as response variable, +slightly counter-intuitive. Hence the age part of the linear predictors +is not in that model: +\begin{Schunk} +\begin{Sinput} +> library( survival ) +> cm <- coxph( Surv(age,age+lex.dur,lex.Xst=="Dead") ~ ++ Ns(tfI,knots=i.kn) + lex.Cst + sex, ++ data = tsNA20(dmCs) ) +\end{Sinput} +\end{Schunk} +There is also a multistate wrapper for Cox models, requiring a +l.h.s. side for the \texttt{formula=} argument: +\begin{Schunk} +\begin{Sinput} +> Cm <- coxph.Lexis( tsNA20(dmCs), ++ form= age ~ Ns(tfI,knots=i.kn) + lex.Cst + sex ) +\end{Sinput} +\begin{Soutput} +model survival::coxph analysis of Lexis object tsNA20(dmCs): +Rates for transitions DM->Dead, Ins->Dead \end{Soutput} \begin{Sinput} -> anova( maf, ma, mat, test="Chisq" ) +> cbind( ci.exp( cm ), ci.exp( Cm ) ) \end{Sinput} \begin{Soutput} -Analysis of Deviance Table - -Model 1: (lex.Xst == 1) ~ Ns(age, knots = a.kn) + Ns(age1st, knots = f.kn) -Model 2: (lex.Xst == 1) ~ Ns(age, knots = a.kn) -Model 3: (lex.Xst == 1) ~ Ns(age, knots = a.kn) + Ns(tfh, knots = t.kn) - Resid. Df Resid. Dev Df Deviance Pr(>Chi) -1 3120 973.20 -2 3124 979.16 -4 -5.9624 0.20198 -3 3120 970.70 4 8.4626 0.07603 + exp(Est.) 2.5% 97.5% exp(Est.) 2.5% 97.5% +Ns(tfI, knots = i.kn)1 0.2984062 0.19417148 0.4585960 0.2984062 0.19417148 0.4585960 +Ns(tfI, knots = i.kn)2 0.3871151 0.29011380 0.5165495 0.3871151 0.29011380 0.5165495 +Ns(tfI, knots = i.kn)3 0.1239128 0.06287008 0.2442238 0.1239128 0.06287008 0.2442238 +Ns(tfI, knots = i.kn)4 0.4405121 0.34839015 0.5569932 0.4405121 0.34839015 0.5569932 +lex.CstIns 5.6700284 4.45011220 7.2243623 5.6700284 4.45011220 7.2243623 +lex.CstDead 1.0000000 1.00000000 1.0000000 1.0000000 1.00000000 1.0000000 +sexF 0.6753202 0.62316569 0.7318397 0.6753202 0.62316569 0.7318397 +\end{Soutput} +\end{Schunk} +We can compare the estimates from the Cox model with those from the +Poisson model --- we must add \texttt{NA}s because the Cox-model does +not give the parameters for the baseline time scale (\texttt{age}), but +also remove one of the parameters, because \texttt{coxph} parametrizes +factors (in this case \texttt{lex.Cst}) by all defined levels and not +only by the levels present in the dataset at hand (note the line of +\texttt{1.0000000}s in the print above): +\begin{Schunk} +\begin{Sinput} +> round( cbind( ci.exp( Pm ), ++ rbind( matrix(NA,5,3), ++ ci.exp( cm )[-6,] ) ), 3 ) +\end{Sinput} +\begin{Soutput} + exp(Est.) 2.5% 97.5% exp(Est.) 2.5% 97.5% +(Intercept) 0.022 0.021 0.024 NA NA NA +Ns(age, knots = a.kn)1 4.208 3.546 4.993 NA NA NA +Ns(age, knots = a.kn)2 5.012 4.380 5.736 NA NA NA +Ns(age, knots = a.kn)3 16.560 14.386 19.063 NA NA NA +Ns(age, knots = a.kn)4 7.921 7.061 8.885 NA NA NA +Ns(tfI, knots = i.kn)1 0.298 0.194 0.458 0.298 0.194 0.459 +Ns(tfI, knots = i.kn)2 0.385 0.289 0.514 0.387 0.290 0.517 +Ns(tfI, knots = i.kn)3 0.125 0.064 0.246 0.124 0.063 0.244 +Ns(tfI, knots = i.kn)4 0.438 0.346 0.553 0.441 0.348 0.557 +lex.CstIns 5.632 4.430 7.160 5.670 4.450 7.224 +sexF 0.674 0.622 0.730 0.675 0.623 0.732 \end{Soutput} \end{Schunk} -We see that there is much less indication that the age at first hire has -an effect. +Thus we see that the Poisson and Cox gives pretty much the same +results. You may argue that Cox requires a smaller dataset, because +there is no need to subdivide data in small intervals \emph{before} +insulin use. But certainly the time \emph{after} insulin inception need +to be if the effect of this time should be modeled. -For the sake of completeness we can draw the predicted values from the -\texttt{maf} model on top of the ones from the \textrm{mat} model: +The drawback of the Cox-modeling is that it is not possible to show +the absolute rates as we did in the graph above. + +\section{Marginal effect of time since insulin} + +When we plot the marginal effect of \texttt{tfI} from the two models +we get pretty much the same; here we plot the RR relative to +\texttt{tfI}=2 years. Note that we are deriving the RR as the ratio of +two sets of predictions, from the data frames \texttt{nd} and +\texttt{nr} --- for further details consult the help page for +\texttt{ci.lin}, specifically the use of a list as the +\texttt{ctr.mat} argument: \begin{Schunk} \begin{Sinput} -> pr.af <- ci.pred( maf, newdata = nd ) -> matplot( nd$age, pr.at, -+ type="l", lty=1, col=1, lwd=c(3,1,1), -+ log="y", xlab="Age (years)", -+ ylab="Lunng cancer mortality per 1000 PY") -> matlines( nd$age, pr.af, -+ type="l", lty=1, col=2, lwd=c(3,0,0) ) +> nd <- data.frame( tfI=seq(0,15,,151), lex.Cst="Ins", sex="M" ) +> nr <- data.frame( tfI= 2 , lex.Cst="Ins", sex="M" ) +> ppr <- ci.exp( pm, list(nd,nr), xvars="age" ) +> cpr <- ci.exp( cm, list(nd,nr) ) +> par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +> matshade( nd$tfI, cbind(ppr,cpr), plot=T, ++ lty=c(1,2), log="y", ++ xlab="Time since insulin (years)", ylab="Rate ratio") +> abline( h=1, lty=3 ) \end{Sinput} \end{Schunk} -\insfig{pr-at-af}{0.8}{Lung cancer mortality among Nickel smelter - workers by age and age at hire 15, 25,35 and 45. Each line (except - the first) starts at the age of hire; we see that the later in life - you are hired, the smaller the initial risk, but the higher the - eventual risk of lung cancer death. The red lines are from the model - \textrm{\tt maf} where the lines are constrained to be parallel, and - which gives a worse fit to data.} +\insfig{Ieff}{0.8}{The naked duration effects relative to 2 years of + duration, black from Poisson model, red from Cox model. The two sets + of estimates are identical, and so are the standard errors, so the + two shaded areas overlap almost perfectly.} -\subsection{The complete picture --- exercise} +In figure \ref{fig:Ieff}, p. \pageref{fig:Ieff}, we see that the +duration effect is exactly the same from the two modeling approaches. -We could fit the remaining models where one or more of the three -variables are included, and compare all of them: +We will also want the RR relative to the non-insulin users --- recall these +are coded 0 on the \texttt{tfI} variable: \begin{Schunk} \begin{Sinput} -> maft <- update( mat, . ~ . + Ns(age1st,knots=f.kn) ) -> summary( maft ) +> nd <- data.frame( tfI=seq(0,15,,151), lex.Cst="Ins", sex="M" ) +> nr <- data.frame( tfI= 0 , lex.Cst="DM" , sex="M" ) +> ppr <- ci.exp( pm, list(nd,nr), xvars="age" ) +> cpr <- ci.exp( cm, list(nd,nr) ) +> par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +> matshade( nd$tfI, cbind(ppr,cpr), ++ xlab="Time since insulin (years)", ++ ylab="Rate ratio relative to non-Insulin", ++ lty=c(1,2), log="y", plot=T ) \end{Sinput} -\begin{Soutput} -Call: -glm(formula = (lex.Xst == 1) ~ Ns(age, knots = a.kn) + Ns(tfh, - knots = t.kn) + Ns(age1st, knots = f.kn), family = poisson, - data = nicM, offset = log(lex.dur)) +\end{Schunk} +\insfig{IeffR}{0.8}{Insulin duration effect (state \textrm{\tt Ins}) + relative to no insulin (state \textrm{\tt DM}), black from Poisson + model, red from Cox model. The \emph{shape} is the same as the + previous figure, but the RR is now relative to non-insulin, instead + of relative to insulin users at 2 years duration. The two sets of + estimates are identical, and so are the standard errors, so the two + shaded areas overlap almost perfectly.} -Deviance Residuals: - Min 1Q Median 3Q Max --0.5899 -0.3579 -0.2224 -0.1185 3.8687 +In figure \ref{fig:IeffR}, p. \pageref{fig:IeffR}, we see the effect +of increasing duration of insulin use \emph{for a fixed age} which is +a bit artificial, so we would like to see the \emph{joint} effects of +age and insulin duration. What we cannot see is how the duration +affects mortality relative to \texttt{current} age (at the age +attained at the same time as the attained \texttt{tfI}). -Coefficients: (1 not defined because of singularities) - Estimate Std. Error z value Pr(>|z|) -(Intercept) -4.71537 0.16481 -28.612 <2e-16 -Ns(age, knots = a.kn)1 0.01671 0.35152 0.048 0.9621 -Ns(age, knots = a.kn)2 -0.11682 0.44638 -0.262 0.7935 -Ns(age, knots = a.kn)3 0.47689 0.50638 0.942 0.3463 -Ns(age, knots = a.kn)4 -0.18241 0.47318 -0.385 0.6999 -Ns(tfh, knots = t.kn)1 0.35272 0.51329 0.687 0.4920 -Ns(tfh, knots = t.kn)2 -0.11034 0.43043 -0.256 0.7977 -Ns(tfh, knots = t.kn)3 0.26874 0.49133 0.547 0.5844 -Ns(tfh, knots = t.kn)4 -0.30302 0.43585 -0.695 0.4869 -Ns(age1st, knots = f.kn)1 -0.10650 0.37476 -0.284 0.7763 -Ns(age1st, knots = f.kn)2 0.17245 0.20063 0.860 0.3900 -Ns(age1st, knots = f.kn)3 0.47357 0.24239 1.954 0.0507 -Ns(age1st, knots = f.kn)4 NA NA NA NA +Another way of interpreting this curve is as the rate ratio relative to a +person not on insulin, so we see that the RR (or hazard ratio, HR as +some call it) is over 5 at the start of insulin (the \texttt{lex.Cst} +estimate), and decreases to about 1.5 in the long term. -(Dispersion parameter for poisson family taken to be 1) +Both figure \ref{fig:Ieff} and \ref{fig:IeffR} indicate a declining +RR by insulin duration, but only from figure \ref{fig:ins-time} it is +visible that mortality actually is \emph{in}creasing by age after some +2 years after insulin start. This point would not be available if we +had only fitted a Cox model where we did not have access to the +baseline hazard as a function of age. - Null deviance: 1024.38 on 3128 degrees of freedom -Residual deviance: 966.31 on 3117 degrees of freedom -AIC: 1264.3 +\section{Age$\times$duration interaction} -Number of Fisher Scoring iterations: 7 -\end{Soutput} -\begin{Sinput} -> mft <- update( maft, . ~ . - Ns(age,knots=a.kn) ) -> mf <- update( maf , . ~ . - Ns(age,knots=a.kn) ) -> mt <- update( mat , . ~ . - Ns(age,knots=a.kn) ) -> allp <- anova( maft, mat, ma, maf, mf, mft, mt, mat, -+ maf, maft, mft, -+ test="Chisq" ) -> mall <- as.matrix( allp ) -> cbind( mod = c("maft","mat","ma","maf","mf","mft","mt","mat","maf","maft","mft"), -+ round( allp[,1:5], 3 ) ) -\end{Sinput} -\begin{Soutput} - mod Resid. Df Resid. Dev Df Deviance Pr(>Chi) -1 maft 3117 966.306 NA NA NA -2 mat 3120 970.697 -3 -4.391 0.222 -3 ma 3124 979.160 -4 -8.463 0.076 -4 maf 3120 973.197 4 5.962 0.202 -5 mf 3124 1011.593 -4 -38.396 0.000 -6 mft 3120 971.120 4 40.473 0.000 -7 mt 3124 985.734 -4 -14.614 0.006 -8 mat 3120 970.697 4 15.037 0.005 -9 maf 3120 973.197 0 -2.500 NA -10 maft 3117 966.306 3 6.892 0.075 -11 mft 3120 971.120 -3 -4.814 0.186 -\end{Soutput} -\end{Schunk} -\begin{enumerate} -\item Explain why there are \texttt{NA}s among the parameters in the - model \texttt{maf}. -\item Draw a graph (a ``DAG'') with the models as nodes and the tests - as vertices, put the p-values on the vertices and use the result to - argue that the model with age an time since hire is actually the - most sensible description in this case. -\end{enumerate} +The model we fitted assumes that the RR is the same regardless of the +age at start of insulin --- the hazards are multiplicative. Sometimes +this is termed the proportional hazards assumption: For \emph{any} +fixed age the HR is the same as a function of time since insulin, and +vice versa. + +A more correct term would be ``main effects model'' --- there is no +interaction between age (the baseline time scale) and other +covariates. So there is really no need for the term ``proportional +hazards''; well defined and precise statistical terms for it has +existed for aeons. -\chapter{Competing risks --- multiple types of events} +\subsection{Age at insulin start} -If we want to consider death from lung cancer and death from other -causes as separate events we can code these as for example 1 and 2. +In order to check the consistency of the multiplicativity assumption +across the spectrum of age at insulin inception, we can fit an +interaction model. One approach to this would be using a non-linear +effect of age at insulin use (for convenience we use the same knots as +for age) --- note that the prediction data frames are the same as we +used above, because we do not compute age at insulin use as a separate +variable, but rather enter it as the difference between current age +(\texttt{age}) and insulin duration (\texttt{tfI}). + +At first glance we might think of doing: \begin{Schunk} \begin{Sinput} -> data( nickel ) -> nicL <- Lexis( entry = list( per = agein+dob, -+ age = agein, -+ tfh = agein-age1st ), -+ exit = list( age = ageout ), -+ exit.status = ( icd > 0 ) + ( icd %in% c(162,163) ), -+ data = nickel ) +> imx <- glm.Lexis( tsNA20(dmCs), ++ formula = ~ Ns(age ,knots=a.kn) ++ + Ns( tfI,knots=i.kn) ++ + Ns(age-tfI,knots=a.kn) ++ + lex.Cst + sex ) \end{Sinput} \begin{Soutput} -NOTE: entry.status has been set to 0 for all. +stats::glm Poisson analysis of Lexis object tsNA20(dmCs) with log link: +Rates for transitions: DM->Dead, Ins->Dead \end{Soutput} +\end{Schunk} +But this will fit a model with a rate-ratio between persons with and +without insulin that depends only on age at insulin start for the time +\emph{after} insulin start, the RR at \texttt{tfI}=0 will be the same +at any age, which really is not the type of interaction we wanted. + +We want the \texttt{age-tfI} term to be specific for the insulin +exposed so we may use one of two other approaches, that are +conceptually alike but mathematically different: +\begin{Schunk} \begin{Sinput} -> summary( nicL ) +> Im <- glm.Lexis( tsNA20(dmCs), ++ formula = ~ Ns(age ,knots=a.kn) ++ + Ns( tfI,knots=i.kn) ++ + Ns((age-tfI)*(lex.Cst=="Ins"),knots=a.kn) ++ + lex.Cst + sex ) \end{Sinput} \begin{Soutput} -Transitions: - To -From 0 1 2 Records: Events: Risk time: Persons: - 0 47 495 137 679 632 15348.06 679 +stats::glm Poisson analysis of Lexis object tsNA20(dmCs) with log link: +Rates for transitions: DM->Dead, Ins->Dead \end{Soutput} \begin{Sinput} -> subset( nicL, id %in% 8:10 ) +> im <- glm.Lexis( tsNA20(dmCs), ++ formula = ~ Ns(age ,knots=a.kn) ++ + Ns( tfI,knots=i.kn) ++ + lex.Cst:Ns(age-tfI,knots=a.kn) ++ + lex.Cst + sex ) \end{Sinput} \begin{Soutput} - per age tfh lex.dur lex.Cst lex.Xst lex.id id icd exposure dob -4 1934.246 47.9067 23.1861 21.7727 0 1 4 8 527 9 1886.340 -5 1934.246 54.7465 24.7890 22.0977 0 1 5 9 150 0 1879.500 -6 1934.246 44.3314 23.0437 18.2099 0 2 6 10 163 2 1889.915 - age1st agein ageout -4 24.7206 47.9067 69.6794 -5 29.9575 54.7465 76.8442 -6 21.2877 44.3314 62.5413 +stats::glm Poisson analysis of Lexis object tsNA20(dmCs) with log link: +Rates for transitions: DM->Dead, Ins->Dead \end{Soutput} \end{Schunk} -In order to have a more readable output we can label the states, we -can enter the names of these in the \texttt{states} parameter, try for -example: +The first model (\texttt{Im}) has a common age-effect (\texttt{Ns(age,...}) for +persons with and without diabetes and a RR depending on insulin +duration \texttt{tfI} and age at insulin (\texttt{age-tfI}). Since the +linear effect of these two terms are in the model as well, a linear +trend in the RR by current age (\texttt{age}) is accommodated as well. + +The second model allows age-effects that differ non-linearly between +person with and without insulin, because the interaction term +\texttt{lex.Cst:Ns(age-tfI...} for persons not on insulin is merely an +age term (since \texttt{tfI} is coded 0 for all follow-up not on +insulin). + +We can compare the models fitted: \begin{Schunk} \begin{Sinput} -> nicL <- Lexis( entry = list( per = agein+dob, -+ age = agein, -+ tfh = agein-age1st ), -+ exit = list( age = ageout ), -+ exit.status = ( icd > 0 ) + ( icd %in% c(162,163) ), -+ data = nickel, -+ states = c("Alive","D.oth","D.lung") ) +> anova( imx, Im, im, test='Chisq') \end{Sinput} \begin{Soutput} -NOTE: entry.status has been set to 0 for all. +Analysis of Deviance Table + +Model 1: cbind(trt(Lx$lex.Cst, Lx$lex.Xst) %in% trnam, Lx$lex.dur) ~ Ns(age, + knots = a.kn) + Ns(tfI, knots = i.kn) + Ns(age - tfI, knots = a.kn) + + lex.Cst + sex +Model 2: cbind(trt(Lx$lex.Cst, Lx$lex.Xst) %in% trnam, Lx$lex.dur) ~ Ns(age, + knots = a.kn) + Ns(tfI, knots = i.kn) + Ns((age - tfI) * + (lex.Cst == "Ins"), knots = a.kn) + lex.Cst + sex +Model 3: cbind(trt(Lx$lex.Cst, Lx$lex.Xst) %in% trnam, Lx$lex.dur) ~ Ns(age, + knots = a.kn) + Ns(tfI, knots = i.kn) + lex.Cst:Ns(age - + tfI, knots = a.kn) + lex.Cst + sex + Resid. Df Resid. Dev Df Deviance Pr(>Chi) +1 228734 25096 +2 228733 25087 1 8.9631 0.002755 +3 228730 25082 3 4.6804 0.196749 \end{Soutput} +\end{Schunk} +so we see that the models indeed are different, and moreover that the +last model does not provide substantial further improvement, by +allowing non-linear RR along the age-scale. + +We can illustrate the different estimated rates from the three models +in figure \ref{fig:dur-int}, p. \pageref{fig:dur-int}: +\begin{Schunk} \begin{Sinput} -> summary( nicL ) +> pxi <- ci.pred( imx, ndI ) +> pxa <- ci.pred( imx, ndA ) +> pIi <- ci.pred( Im , ndI ) +> pIa <- ci.pred( Im , ndA ) +> pii <- ci.pred( im , ndI ) +> pia <- ci.pred( im , ndA ) +> par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +> matshade( ndI$age, cbind( pxi, pIi, pii)*1000, plot=T, log="y", ++ xlab="Age", ylab="Mortality per 1000 PY", ++ lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) +> matshade( ndA$age, cbind( pxa, pIa, pia)*1000, ++ lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) +\end{Sinput} +\end{Schunk} +\insfig{dur-int}{0.8}{Age at insulin as interaction between age and + duration. Blue curves are from the naive interaction model + \textrm{\tt imx} with identical $\RR$ at \textrm{\tt tfI}=0 at any + age; green curves are from the interaction model with age at + insulin, from the model \textrm{\tt Im} with only linear + differences by age, and red lines from the full interaction model + \textrm{\tt im}.} + +We can also plot the RRs only from these models (figure +\ref{fig:dur-int-RR}, p. \pageref{fig:dur-int-RR}); for this we need +the reference frames, and the machinery from \texttt{ci.exp} allowing +a list of two data frames: +\begin{Schunk} +\begin{Sinput} +> ndR <- transform( ndI, tfI=0, lex.Cst="DM" ) +> cbind( head(ndI), head(ndR) ) \end{Sinput} \begin{Soutput} -Transitions: - To -From Alive D.oth D.lung Records: Events: Risk time: Persons: - Alive 47 495 137 679 632 15348.06 679 + tfI ai sex lex.Cst age tfI ai sex lex.Cst age +1 NA 40 M Ins NA 0 40 M DM NA +2 0.0 40 M Ins 40.0 0 40 M DM 40.0 +3 0.1 40 M Ins 40.1 0 40 M DM 40.1 +4 0.2 40 M Ins 40.2 0 40 M DM 40.2 +5 0.3 40 M Ins 40.3 0 40 M DM 40.3 +6 0.4 40 M Ins 40.4 0 40 M DM 40.4 \end{Soutput} \begin{Sinput} -> str( nicL ) +> Rxi <- ci.exp( imx, list(ndI,ndR) ) +> Rii <- ci.exp( im , list(ndI,ndR) ) +> RIi <- ci.exp( Im , list(ndI,ndR) ) +> par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +> matshade( ndI$age, cbind( Rxi, RIi, Rii), plot=T, log="y", ++ xlab="Age (years)", ylab="Rate ratio vs, non-Insulin", ++ lty=1, lwd=2, col=c("blue","forestgreen","red"), alpha=0.1 ) +> abline( h=1 ) +> abline( h=ci.exp(imx,subset="lex.Cst")[,1], lty="25", col="blue" ) +\end{Sinput} +\end{Schunk} +\insfig{dur-int-RR}{0.9}{RR from three different interaction + models. The horizontal dotted line is at the estimated effect of + \textrm{\tt lex.Cst}, to illustrate that the first model (blue) + constrains this initial HR to be constant across age. The green + curves are the extended interaction model, and the red the full + one.} + +\clearpage + +\subsection{General interaction} + +As a final illustration we may want to explore a different kind of +interaction, not defined from the duration --- here we simplify the +interaction by not using the second-last knot in the interaction terms +--- figure \ref{fig:splint}, p. \pageref{fig:splint}. Note again that +the prediction code is the same: +\begin{Schunk} +\begin{Sinput} +> gm <- glm.Lexis( tsNA20(dmCs), ++ formula = ~ Ns(age,knots=a.kn) ++ + Ns(tfI,knots=i.kn) ++ + lex.Cst:Ns(age,knots=a.kn):Ns(tfI,knots=i.kn) ++ + lex.Cst + sex ) \end{Sinput} \begin{Soutput} -Classes ‘Lexis’ and 'data.frame': 679 obs. of 14 variables: - $ per : num 1934 1934 1934 1934 1934 ... - $ age : num 45.2 48.3 53 47.9 54.7 ... - $ tfh : num 27.7 25.1 27.7 23.2 24.8 ... - $ lex.dur : num 47.75 15 1.17 21.77 22.1 ... - $ lex.Cst : Factor w/ 3 levels "Alive","D.oth",..: 1 1 1 1 1 1 1 1 1 1 ... - $ lex.Xst : Factor w/ 3 levels "Alive","D.oth",..: 1 3 3 2 2 3 2 2 2 2 ... - $ lex.id : int 1 2 3 4 5 6 7 8 9 10 ... - $ id : num 3 4 6 8 9 10 15 16 17 18 ... - $ icd : num 0 162 163 527 150 163 334 160 420 12 ... - $ exposure: num 5 5 10 9 0 2 0 0.5 0 0 ... - $ dob : num 1889 1886 1881 1886 1880 ... - $ age1st : num 17.5 23.2 25.2 24.7 30 ... - $ agein : num 45.2 48.3 53 47.9 54.7 ... - $ ageout : num 93 63.3 54.2 69.7 76.8 ... - - attr(*, "time.scales")= chr "per" "age" "tfh" - - attr(*, "time.since")= chr "" "" "" - - attr(*, "breaks")=List of 3 - ..$ per: NULL - ..$ age: NULL - ..$ tfh: NULL +stats::glm Poisson analysis of Lexis object tsNA20(dmCs) with log link: +Rates for transitions: DM->Dead, Ins->Dead \end{Soutput} +\begin{Sinput} +> pgi <- ci.pred( gm, ndI ) +> pga <- ci.pred( gm, ndA ) +> par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +> matshade( ndI$age, cbind( pgi, pii )*1000, plot=T, ++ lty=c("solid","21"), lend="butt", lwd=2, log="y", ++ xlab="Age (years)", ylab="Mortality rates per 1000 PY", ++ alpha=c(0.2,0.1), col=c("black","red") ) +> matshade( ndA$age, cbind( pga, pia )*1000, ++ lty=c("solid","21"), lend="butt", lwd=2, ++ alpha=c(0.2,0.1), col=c("black","red") ) +\end{Sinput} +\end{Schunk} +\insfig{splint}{0.8}{Spline-by-spline interaction between age and + duration (model \textrm{\tt gm}, black), and the interaction using a + non-linear effect of age at entry (model \textrm{\tt im}, red), + corresponding to the red curves in figure \ref{fig:dur-int}.} +This is in figure \ref{fig:splint}, p. \pageref{fig:splint}. + +\subsection{Evaluating interactions} + +Here we see that the interaction effect is such that in the older ages +the length of insulin use has an increasing effect on mortality. + +Even though there is no statistically significant interaction between +age and time since start of insulin, it would be illustrative to show +the RR as a function of age at insulin and age at follow-up: +\begin{Schunk} +\begin{Sinput} +> ndR <- transform( ndI, lex.Cst="DM", tfI=0 ) +> iRR <- ci.exp( im, ctr.mat=list(ndI,ndR) ) +> gRR <- ci.exp( gm, ctr.mat=list(ndI,ndR) ) +> par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1, bty="n" ) +> matshade( ndI$age, cbind(gRR,iRR), lty=1, log="y", plot=TRUE, ++ xlab="Age (years)", ylab="Rate ratio: Ins vs. non-Ins", ++ col=c("black","red") ) +> abline( h=1 ) +\end{Sinput} \end{Schunk} -Note that the \texttt{Lexis} function automatically assumes that all -persons enter in the first level (given in the \texttt{states=} -argument), corresponding to the numerical values given in -\texttt{exit.status}. - -When we cut at a date as in this case, the date where cumulative -exposure exceeds 50 exposure-years, we get the follow-up \emph{after} -the date classified as being in the new state if the exit -(\texttt{lex.Xst}) was to a state we defined as one of the -\texttt{precursor.states}: -\begin{Schunk} -\begin{Sinput} -> nicL$agehi <- nicL$age1st + 50 / nicL$exposure -> nicC <- cutLexis( data = nicL, -+ cut = nicL$agehi, -+ timescale = "age", -+ new.state = "HiExp", -+ precursor.states = "Alive" ) -> subset( nicC, id %in% 8:10 ) -\end{Sinput} -\begin{Soutput} - per age tfh lex.dur lex.Cst lex.Xst lex.id id icd exposure dob -683 1934.246 47.9067 23.1861 21.7727 HiExp D.oth 4 8 527 9 1886.340 -5 1934.246 54.7465 24.7890 22.0977 Alive D.oth 5 9 150 0 1879.500 -6 1934.246 44.3314 23.0437 1.9563 Alive HiExp 6 10 163 2 1889.915 -685 1936.203 46.2877 25.0000 16.2536 HiExp D.lung 6 10 163 2 1889.915 - age1st agein ageout agehi -683 24.7206 47.9067 69.6794 30.27616 -5 29.9575 54.7465 76.8442 Inf -6 21.2877 44.3314 62.5413 46.28770 -685 21.2877 44.3314 62.5413 46.28770 +\insfig{RR-int}{0.8}{The effect of duration of insulin use at + different ages of follow-up (and age at insulin start). Estimates + are from the model with an interaction term using a non-linear + effect of age at insulin start (model \textrm{\tt im}, red) and + using a general spline interactions (model \textrm{\tt gm}, + black). It appears that the general interaction over-models a bit.} +This is in figure \ref{fig:RR-int}, p. \pageref{fig:RR-int}. + +The advantage of the parametric modeling (be that with age at insulin +or general spline interaction) is that it is straight-forward to +\emph{test} whether we have an interaction. + +\section{Separate models} + +In the above we insisted on making a joint model for the +\texttt{DM}$\rightarrow$\texttt{Dead} and the +\texttt{Ins}$\rightarrow$\texttt{Dead} +transitions, but with the complications demonstrated it would actually +have been more sensible to model the two transitions separately: +\begin{Schunk} +\begin{Sinput} +> dmd <- glm.Lexis( dmCs, ++ from="DM", to="Dead", ++ formula = ~ Ns(age,knots=a.kn) ++ + sex ) +\end{Sinput} +\begin{Soutput} +stats::glm Poisson analysis of Lexis object dmCs with log link: +Rates for the transition: DM->Dead \end{Soutput} \begin{Sinput} -> summary( nicC, scale=1000 ) +> ind <- glm.Lexis( dmCs, ++ from="Ins", to="Dead", ++ formula = ~ Ns(age,knots=a.kn) ++ + Ns(tfI,knots=i.kn) ++ + Ns(age-tfI,knots=a.kn) ++ + sex ) \end{Sinput} \begin{Soutput} -Transitions: - To -From Alive HiExp D.oth D.lung Records: Events: Risk time: Persons: - Alive 39 83 279 65 466 427 10.77 466 - HiExp 0 8 216 72 296 288 4.58 296 - Sum 39 91 495 137 762 715 15.35 679 +stats::glm Poisson analysis of Lexis object dmCs with log link: +Rates for the transition: Ins->Dead \end{Soutput} +\begin{Sinput} +> ini <- ci.pred( ind, ndI ) +> dmi <- ci.pred( dmd, ndI ) +> dma <- ci.pred( dmd, ndA ) +\end{Sinput} +\end{Schunk} +The estimated mortality rates are shown in figure \ref{fig:sep-mort}, +p. \pageref{fig:sep-mort}, using: +\begin{Schunk} +\begin{Sinput} +> par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1,bty="n") +> matshade( ndI$age, ini*1000, plot=TRUE, log="y", ++ xlab="Age (years)", ylab="Mortality rates per 1000 PY", ++ lwd=2, col="red" ) +> matshade( ndA$age, dma*1000, ++ lwd=2, col="black" ) +\end{Sinput} \end{Schunk} -Note that the persons-years is the same, but that the number of -events has changed. This is because events are now defined as any -transition, including the transitions to \texttt{HiExp}. +The estimated RRs are computed using that the estimates from the two +models are uncorrelated, and hence qualify for \texttt{ci.ratio} (this +and the previous graph +appear in figure \ref{fig:Ins-noIns}, p. \pageref{fig:Ins-noIns}) +\begin{Schunk} +\begin{Sinput} +> par(mar=c(3,3,1,1),mgp=c(3,1,0)/1.6,las=1,bty="n") +> matshade( ndI$age, ci.ratio(ini,dmi), plot=TRUE, log="y", ++ xlab="Age (years)", ylab="RR insulin vs. no insulin", ++ lwd=2, col="red" ) +> abline( h=1 ) +\end{Sinput} +\end{Schunk} +\begin{figure}[tb] +\centering +\includegraphics[width=0.49\textwidth]{flup-sep-mort} +\includegraphics[width=0.49\textwidth]{flup-sep-HR} +\caption{\it Left panel: Mortality rates from separate models for the + two mortality transitions; the \textrm{\tt + DM}$\rightarrow$\textrm{\tt Dead} transition modeled by age alone; + \textrm{\tt Ins}$\rightarrow$\textrm{\tt Dead} transition modeled + with spline effects of current age, time since insulin and and age + at insulin. \newline Right panel: Mortality HR of insulin vs. no insulin.} +\label{fig:Ins-noIns} +\end{figure} -Also note that (so far) it is necessary to specify the variable with -the cut points in full, using only \texttt{cut=agehi} would give an error. +\chapter{More states} \section{Subdividing states} It may be of interest to subdivide the states following the intermediate event according to whether the event has occurred or -not. That is done by the argument \texttt{split.states=TRUE}. +not. This will enable us to address the question of the fraction of +the patients that ever go on insulin. -Moreover, it will also often be of interest to introduce a new -timescale indicating the time since intermediate event. This can be -done by the argument \texttt{new.scale=TRUE}, alternatively -\texttt{new.scale="tfe"}, as illustrated here: -\begin{Schunk} -\begin{Sinput} -> nicC <- cutLexis( data = nicL, -+ cut = nicL$agehi, -+ timescale = "age", -+ new.state = "HiExp", -+ new.scale = "tfe", -+ split.states = TRUE, -+ precursor.states = "Alive" ) -> subset( nicC, id %in% 8:10 ) -\end{Sinput} -\begin{Soutput} - per age tfh tfe lex.dur lex.Cst lex.Xst lex.id id icd -683 1934.246 47.9067 23.1861 17.63054 21.7727 HiExp D.oth(HiExp) 4 8 527 -5 1934.246 54.7465 24.7890 NA 22.0977 Alive D.oth 5 9 150 -6 1934.246 44.3314 23.0437 NA 1.9563 Alive HiExp 6 10 163 -685 1936.203 46.2877 25.0000 0.00000 16.2536 HiExp D.lung(HiExp) 6 10 163 - exposure dob age1st agein ageout agehi -683 9 1886.340 24.7206 47.9067 69.6794 30.27616 -5 0 1879.500 29.9575 54.7465 76.8442 Inf -6 2 1889.915 21.2877 44.3314 62.5413 46.28770 -685 2 1889.915 21.2877 44.3314 62.5413 46.28770 +This is done by the argument \texttt{split.states=TRUE}. +\begin{Schunk} +\begin{Sinput} +> dmCs <- cutLexis( data = dmS2, ++ cut = dmS2$doins, ++ timescale = "per", ++ new.state = "Ins", ++ new.scale = "tfI", ++ precursor.states = "DM", ++ split.states = TRUE ) +> summary( dmCs ) +\end{Sinput} +\begin{Soutput} +Transitions: + To +From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: + DM 35135 1694 2048 0 38877 3742 45885.49 9899 + Ins 0 5762 0 451 6213 451 8387.77 1791 + Sum 35135 7456 2048 451 45090 4193 54273.27 9996 +\end{Soutput} +\end{Schunk} +We can illustrate the numbers and the transitions (figure +\ref{fig:box4}, p. \pageref{fig:box4}) +\begin{Schunk} +\begin{Sinput} +> boxes( dmCs, boxpos=list(x=c(15,15,85,85), ++ y=c(85,15,85,15)), ++ scale.R=1000, show.BE=TRUE ) +\end{Sinput} +\end{Schunk} +\insfig{box4}{0.7}{Transitions between 4 states: the numbers \emph{in} + the boxes are person-years (middle), and below the number of persons + who start, respectively end their follow-up in each of the states.} + +Note that it is only the mortality rates that we have been modeling, +namely the transitions \texttt{DM}$\rightarrow$\texttt{Dead} +and \texttt{Ins}$\rightarrow$\texttt{Dead(Ins)}. +If we were to model the cumulative risk of using insulin we would also +need a model for the DM$\rightarrow$Ins +transition. Subsequent to that we would then compute the probability +of being in each state conditional on suitable starting +conditions. With models where transition rates depend on several time +scales this is not a trivial task. This is treated in more detail in the +vignette on \texttt{simLexis}. + +\section{Multiple intermediate events} + +We may be interested in starting either insulin or OAD (oral +anti-diabetic drugs), thus giving rise to more states and more +time scales. This can be accomplished by the \texttt{mcutLexis} +function, that generalizes \texttt{cutLexis}: +\begin{Schunk} +\begin{Sinput} +> dmM <- mcutLexis( dmL, ++ timescale = "per", ++ wh = c("doins","dooad"), ++ new.states = c("Ins","OAD"), ++ new.scales = c("tfI","tfO"), ++ precursor.states = "DM", ++ ties.resolve = TRUE ) +\end{Sinput} +\begin{Soutput} +NOTE: 9996 records with tied events times resolved. + Results only reproducible if the seed for the random number generator is set. \end{Soutput} \begin{Sinput} -> summary( nicC, scale=1000, timeScales=TRUE ) +> summary( dmM, t=T ) \end{Sinput} \begin{Soutput} Transitions: To -From Alive HiExp D.oth D.lung D.lung(HiExp) D.oth(HiExp) Records: Events: Risk time: - Alive 39 83 279 65 0 0 466 427 10.77 - HiExp 0 8 0 0 72 216 296 288 4.58 - Sum 39 91 279 65 72 216 762 715 15.35 - +From DM Dead OAD Ins OAD-Ins Ins-OAD Records: Events: Risk time: Persons: + DM 2830 1056 2957 689 0 0 7532 4702 22920.21 7532 + OAD 0 992 3327 0 1005 0 5324 1997 22965.30 5324 + Ins 0 152 0 462 0 172 786 324 3883.14 786 + OAD-Ins 0 266 0 0 739 0 1005 266 3770.53 1005 + Ins-OAD 0 33 0 0 0 139 172 33 734.09 172 + Sum 2830 2499 6284 1151 1744 311 14819 7322 54273.27 9996 + +Timescales: + per age tfD tfI tfO + "" "" "" "Ins" "OAD" +\end{Soutput} +\end{Schunk} +We see that we now have two time scales defined as entry since into +states. +\begin{Schunk} +\begin{Sinput} +> wh <- c(subset(dmM,lex.Cst=="Ins-OAD")$lex.id[1:2], ++ subset(dmM,lex.Cst=="OAD-Ins")$lex.id[1:2]) +> options( width=110 ) +> print( subset( dmM, lex.id %in% wh )[,c('lex.id',names(dmM[1:8]),c("doins","dooad"))], ++ digits=6, row.names=FALSE ) +\end{Sinput} +\begin{Soutput} + lex.id tfI tfO per age tfD lex.dur lex.Cst lex.Xst doins dooad + 18 NA NA 1996.75 61.7221 0.0000000 1.1690623 DM OAD 2005.99 1997.92 + 18 NA 0.00000 1997.92 62.8912 1.1690623 8.0793977 OAD OAD-Ins 2005.99 1997.92 + 18 0.0000000 8.07940 2005.99 70.9706 9.2484600 4.0027379 OAD-Ins OAD-Ins 2005.99 1997.92 + 25 NA NA 2003.69 60.3422 0.0000000 1.8809035 DM OAD 2008.64 2005.57 + 25 NA 0.00000 2005.57 62.2231 1.8809035 3.0691307 OAD OAD-Ins 2008.64 2005.57 + 25 0.0000000 3.06913 2008.64 65.2923 4.9500342 1.3579740 OAD-Ins OAD-Ins 2008.64 2005.57 + 20 NA NA 2009.25 53.2183 0.0000000 0.0316523 DM Ins 2009.28 2009.29 + 20 0.0000000 NA 2009.28 53.2500 0.0316523 0.0105728 Ins Ins-OAD 2009.28 2009.29 + 20 0.0105728 0.00000 2009.29 53.2606 0.0422251 0.7079461 Ins-OAD Ins-OAD 2009.28 2009.29 + 38 NA NA 2008.37 63.9316 0.0000000 0.0930869 DM Ins 2008.46 2008.67 + 38 0.0000000 NA 2008.46 64.0246 0.0930869 0.2135524 Ins Ins-OAD 2008.46 2008.67 + 38 0.2135524 0.00000 2008.67 64.2382 0.3066393 1.3251198 Ins-OAD Dead 2008.46 2008.67 +\end{Soutput} +\begin{Sinput} +> summary( dmM, t=T ) +\end{Sinput} +\begin{Soutput} Transitions: To -From Persons: - Alive 466 - HiExp 296 - Sum 679 +From DM Dead OAD Ins OAD-Ins Ins-OAD Records: Events: Risk time: Persons: + DM 2830 1056 2957 689 0 0 7532 4702 22920.21 7532 + OAD 0 992 3327 0 1005 0 5324 1997 22965.30 5324 + Ins 0 152 0 462 0 172 786 324 3883.14 786 + OAD-Ins 0 266 0 0 739 0 1005 266 3770.53 1005 + Ins-OAD 0 33 0 0 0 139 172 33 734.09 172 + Sum 2830 2499 6284 1151 1744 311 14819 7322 54273.27 9996 Timescales: - time.scale time.since -1 per -2 age -3 tfh -4 tfe HiExp -\end{Soutput} -\end{Schunk} -Note that the \texttt{timeScales=TRUE} to \texttt{summary} lists the -timescales available in the object, and also indicates which of them -that are defined as time since entry to a particular state. This -facility is not used here, but it is needed when simulating follow-up -data --- see the vignette on \textrm{simLexis}. - -With 6 different states it is quite difficult to get an overview of -the transitions between states from the \texttt{summary()}. Therefore -there is function that gives a graphical display of the states showing -the transitions between the states: -\begin{Schunk} -\begin{Sinput} -> boxes( nicC, boxpos = list(x=c(10,10,80,80,80,80), -+ y=c(75,25,87,63,13,37)), -+ scale.Y = 1000, -+ show.BE = TRUE ) -\end{Sinput} -\end{Schunk} -\insfig{nic-box}{0.9}{Transitions between states; the number in the - middle of each box is the person-years (in 1000s --- since - \textrm{\tt scale.Y=1000}), the numbers at the bottom of the boxes - are the number that start, respectively end their follow-up in each - state. The numbers on the arrows are the number of transitions and - crude transition rates (the latter in events per 1000 PY).\newline - The function \textrm{\tt boxes.Lexis} has a zillion arguments to - fine-tune the appearance of the display in terms of colors etc.} - -%% \section{Multiple events of the same type (recurrent events)} -%% Sometimes more events of the same type are recorded for each person and -%% one would then like to count these and put follow-up time in states accordingly. -%% Essentially, each set of cutpoints represents progressions from one -%% state to the next. Therefore the states should be numbered, and the -%% numbering of states subsequently occupied be increased accordingly. - -%% This is a behaviour different from the one outlined above, and it is -%% achieved by the argument \texttt{count=TRUE} to -%% \texttt{cutLexis}. When \texttt{count} is set to \texttt{TRUE}, the -%% value of the arguments \texttt{new.state} and -%% \texttt{precursor.states} are ignored. Actually, when using the -%% argument \texttt{count=TRUE}, the function \texttt{countLexis} is -%% called, so an alternative is to use this directly. - -%% \renewcommand{\bibname}{References} -%% \bibliographystyle{plain} -%% \bibliography{% -%% /home/bendix/art/bibtex/BxC,% -%% /home/bendix/art/bibtex/Stat,% -%% /home/bendix/art/bibtex/DMCa,% -%% /home/bendix/art/bibtex/Diabetes% -%% } -%% \addcontentsline{toc}{section}{\bibname} + per age tfD tfI tfO + "" "" "" "Ins" "OAD" +\end{Soutput} +\end{Schunk} +We can also illustrate the transitions to the different states, as in +figure \ref{fig:mbox}: +\begin{Schunk} +\begin{Sinput} +> boxes( dmM, boxpos=list(x=c(15,80,40,40,85,85), ++ y=c(50,50,90,10,90,10)), ++ scale.R=1000, show.BE=TRUE ) +\end{Sinput} +\end{Schunk} +\insfig{mbox}{1.0}{Boxes for the \textrm{\tt dmM} object. The numbers + \emph{in} the boxes are person-years (middle), and below the number + of persons who start, respectively end their follow-up in each of + the states.} +We may not be interested in whether persons were prescribed insulin +before OAD or vice versa, in which case we would combine the levels +with both insulin and OAD to one, regardless of order (figure +\ref{fig:mboxr}): +\begin{Schunk} +\begin{Sinput} +> summary( dmMr <- Relevel( dmM, list('OAD+Ins'=5:6), first=FALSE) ) +\end{Sinput} +\begin{Soutput} + type old new +1 lex.Cst DM DM +2 lex.Cst Dead +3 lex.Cst OAD OAD +4 lex.Cst Ins Ins +5 lex.Cst OAD-Ins OAD+Ins +6 lex.Cst Ins-OAD OAD+Ins +7 lex.Xst DM DM +8 lex.Xst Dead Dead +9 lex.Xst OAD OAD +10 lex.Xst Ins Ins +11 lex.Xst OAD-Ins OAD+Ins +12 lex.Xst Ins-OAD OAD+Ins + +Transitions: + To +From DM Dead OAD Ins OAD+Ins Records: Events: Risk time: Persons: + DM 2830 1056 2957 689 0 7532 4702 22920.21 7532 + OAD 0 992 3327 0 1005 5324 1997 22965.30 5324 + Ins 0 152 0 462 172 786 324 3883.14 786 + OAD+Ins 0 299 0 0 878 1177 299 4504.62 1177 + Sum 2830 2499 6284 1151 2055 14819 7322 54273.27 9996 +\end{Soutput} +\begin{Sinput} +> boxes( dmMr, boxpos=list(x=c(15,50,15,85,85), ++ y=c(85,50,15,85,15)), ++ scale.R=1000, show.BE=TRUE ) +\end{Sinput} +\end{Schunk} +\insfig{mboxr}{1.0}{Boxes for the \textrm{\tt dmMr} object with + collapsed states. The numbers \emph{in} the boxes are person-years + (middle), and below the number of persons who start, respectively + end their follow-up in each of the states.} + +Diagrams as those in figures +\ref{fig:mbox} and +\ref{fig:mboxr} gives an overview of the possible transitions, +which states it might be relevant to collapse, and which transitions +to model and how. + +\chapter{\texttt{Lexis} functions} + +The \texttt{Lexis} machinery has evolved over time since it was first +introduced in a workable version in \texttt{Epi\_1.0.5} in August 2008. + +Over the years there have been additions of tools for handling +multistate data. Here is a list of the current functions relating to +\texttt{Lexis} objects with a very brief description; it does not +replace the documentation. Unless otherwise stated, functions named +\texttt{something.Lexis} (with a ``\texttt{.}'') are S3 methods for +\texttt{Lexis} objects, so you can skip the ``\texttt{.Lexis}'' in +daily use. + +\setlist{noitemsep} +\begin{description} + +\item[Define]\ \\ +\begin{description} +\item[\texttt{Lexis}] defines a \texttt{Lexis} object +\end{description} + +\item[Cut and split]\ \\ +\begin{description} +\item[\texttt{cutLexis}] cut follow-up at intermediate event +\item[\texttt{mcutLexis}] cut follow-up at several intermediate events +\item[\texttt{countLexis}] cut follow-up at intermediate event count + the no. events so far +\item[\texttt{splitLexis}] split follow up along a time scale +\item[\texttt{splitMulti}] split follow up along a time scale --- from + the \texttt{popEpi} package, faster and has simpler syntax than + \texttt{splitLexis} +\item[\texttt{addCov.Lexis}] add clinical measurements at a given date to a + \texttt{Lexis} object +\end{description} + +\item[Boxes and plots]\ \\ +\begin{description} +\item[\texttt{boxes.Lexis}] draw a diagram of states and transitions +\item[\texttt{plot.Lexis}] draw a standard Lexis diagram +\item[\texttt{points.Lexis}] add points to a Lexis diagram +\item[\texttt{lines.Lexis}] add lines to a Lexis diagram +\item[\texttt{PY.ann.Lexis}] annotate life lines in a Lexis diagram +\end{description} + +\item[Summarize and query]\ \\ +\begin{description} +\item[\texttt{summary.Lexis}] overview of transitions, risk time etc. +\item[\texttt{levels.Lexis}] what are the states in the \texttt{Lexis} object +\item[\texttt{nid.Lexis}] number of persons in the \texttt{Lexis} + object --- how many unique values of \texttt{lex.id} are present +\item[\texttt{entry}] entry time +\item[\texttt{exit}] exit time +\item[\texttt{status}] status at entry or exit +\item[\texttt{timeBand}] factor of time bands +\item[\texttt{timeScales}] what time scales are in the \texttt{Lexis} object +\item[\texttt{timeSince}] what time scales are defined as time since a given state +\item[\texttt{breaks}] what breaks are currently defined +\item[\texttt{absorbing}] what are the absorbing states +\item[\texttt{transient}] what are the transient states +\item[\texttt{preceding}, \texttt{before}] which states precede this +\item[\texttt{succeeding}, \texttt{after}] which states can follow this +\item[\texttt{tmat.Lexis}] transition matrix for the \texttt{Lexis} object +\end{description} + +\item[Manipulate]\ \\ +\begin{description} +\item[\texttt{subset.Lexis}, \texttt{[}] subset of a \texttt{Lexis} object +\item[\texttt{merge.Lexis}] merges a \texttt{Lexis} objects with a + \texttt{data.frame} +\item[\texttt{cbind.Lexis}] bind a \texttt{data.frame} to a \texttt{Lexis} object +\item[\texttt{rbind.Lexis}] put two \texttt{Lexis} objects head-to-foot +\item[\texttt{transform.Lexis}] transform and add variables +\item[\texttt{tsNA20}] turn \texttt{NA}s to 0s for time scales +\item[\texttt{Relevel.Lexis}, \texttt{factorize.Lexis}] reorder and + combine states +\item[\texttt{bootLexis}] bootstrap sample of \emph{persons} + (\texttt{lex.id}) in the \texttt{Lexis} object +\end{description} + +\item[Simulate]\ \\ +\begin{description} +\item[\texttt{simLexis}] simulate a \texttt{Lexis} object from + specified transition rate models +\item[\texttt{nState}, \texttt{pState}] count state occupancy from a + simulated \texttt{Lexis} object +\item[\texttt{plot.pState}, \texttt{lines.pState}] plot state occupancy from a + \texttt{pState} object +\end{description} + +\item[Stack]\ \\ +\begin{description} +\item[\texttt{stack.Lexis}] make a stacked object for simultaneous + analysis of transitions --- returns a \texttt{stacked.Lexis} object +\item[\texttt{subset.stacked.Lexis}] subsets of a \texttt{stacked.Lexis} object +\item[\texttt{transform.stacked.Lexis}] transform a \texttt{stacked.Lexis} object +\end{description} + +\item[Interface to other packages]\ \\ +\begin{description} +\item[\texttt{msdata.Lexis}] interface to \texttt{mstate} package +\item[\texttt{etm.Lexis}] interface to \texttt{etm} package +\item[\texttt{crr.Lexis}] interface to \texttt{cmprsk} package +\end{description} + +\item[Statistical models] --- these are \emph{not} S3 methods +\begin{description} +\item[\texttt{glm.Lexis}] fit a \texttt{glm} model using the + \texttt{poisreg} family to (hopefully) time split data +\item[\texttt{gam.Lexis}] fit a \texttt{gam} model (from the + \texttt{mgcv} package) using the \texttt{poisreg} family to + (hopefully) time split data +\item[\texttt{coxph.Lexis}] fit a Cox model to follow-up in a + \texttt{Lexis} object +\end{description} +\end{description} \end{document} Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/int-test.Rda and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/int-test.Rda differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/pr.Rda and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/pr.Rda differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/simLexis-boxes.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/simLexis-boxes.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/simLexis-comp-0.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/simLexis-comp-0.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/simLexis-mort-int.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/simLexis-mort-int.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/simLexis.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/simLexis.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/simLexis-pstate0.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/simLexis-pstate0.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/simLexis-pstatex.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/simLexis-pstatex.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/simLexis-pstatey.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/simLexis-pstatey.pdf differ diff -Nru r-cran-epi-2.32/vignettes/simLexis.R r-cran-epi-2.37/vignettes/simLexis.R --- r-cran-epi-2.32/vignettes/simLexis.R 2018-05-03 14:35:00.000000000 +0000 +++ r-cran-epi-2.37/vignettes/simLexis.R 2019-05-23 08:21:05.000000000 +0000 @@ -2,7 +2,7 @@ ### Encoding: UTF-8 ################################################### -### code chunk number 1: simLexis.rnw:24-27 +### code chunk number 1: simLexis.rnw:23-26 ################################################### options( width=90, SweaveHooks=list( fig=function() @@ -35,8 +35,7 @@ new.state = "Ins", new.scale = "t.Ins", split.states = TRUE ) -summary( dmi ) -str(dmi) +summary( dmi, timeScales=T ) ################################################### @@ -50,8 +49,8 @@ ################################################### ### code chunk number 6: split ################################################### -Si <- splitLexis( dmi, 0:30/2, "DMdur" ) -dim( Si ) +Si <- splitLexis( dmi, seq(0,20,1/4), "DMdur" ) +summary( Si ) print( subset( Si, lex.id==97 )[,1:10], digits=6 ) @@ -80,35 +79,48 @@ I(Per-2000) + sex, family=poisson, offset=log(lex.dur), data = subset(Si,lex.Cst=="DM") ) -DM.Dead <- glm( (lex.Xst=="Dead") ~ Ns( Age , knots=ad.kn ) + +ci.exp( DM.Ins ) +class( DM.Ins ) + + +################################################### +### code chunk number 9: simLexis.rnw:282-288 +################################################### +DM.Ins <- glm.Lexis( Si, from = "DM", to = "Ins", + formula = ~ Ns( Age , knots=ai.kn ) + + Ns( DMdur, knots=di.kn ) + + I(Per-2000) + sex ) +ci.exp( DM.Ins ) +class( DM.Ins ) + + +################################################### +### code chunk number 10: simLexis.rnw:293-302 +################################################### +DM.Dead <- glm.Lexis( Si, from = "DM", to = "Dead", + formula = ~ Ns( Age , knots=ad.kn ) + + Ns( DMdur, knots=dd.kn ) + + I(Per-2000) + sex ) +Ins.Dead <- glm.Lexis( Si, from = "Ins", + formula = ~ Ns( Age , knots=ad.kn ) + Ns( DMdur, knots=dd.kn ) + - I(Per-2000) + sex, - family=poisson, offset=log(lex.dur), - data = subset(Si,lex.Cst=="DM") ) -Ins.Dead <- glm( (lex.Xst=="Dead(Ins)") ~ Ns( Age , knots=ad.kn ) + - Ns( DMdur, knots=dd.kn ) + - Ns( t.Ins, knots=ti.kn ) + - I(Per-2000) + sex, - family=poisson, offset=log(lex.dur), - data = subset(Si,lex.Cst=="Ins") ) + Ns( t.Ins, knots=ti.kn ) + + I(Per-2000) + sex ) ################################################### -### code chunk number 9: prop-haz +### code chunk number 11: prop-haz ################################################### -with( Si, table(lex.Cst) ) -All.Dead <- glm( (lex.Xst %in% c("Dead(Ins)","Dead")) ~ - Ns( Age , knots=ad.kn ) + - Ns( DMdur, knots=dd.kn ) + - lex.Cst + - I(Per-2000) + sex, - family=poisson, offset=log(lex.dur), - data = Si ) +All.Dead <- glm.Lexis( Si, to = c("Dead(Ins)","Dead"), + formula = ~ Ns( Age , knots=ad.kn ) + + Ns( DMdur, knots=dd.kn ) + + lex.Cst + + I(Per-2000) + sex ) round( ci.exp( All.Dead ), 3 ) ################################################### -### code chunk number 10: get-dev +### code chunk number 12: get-dev ################################################### what <- c("null.deviance","df.null","deviance","df.residual") ( rD <- unlist( DM.Dead[what] ) ) @@ -118,7 +130,7 @@ ################################################### -### code chunk number 11: pr-array +### code chunk number 13: pr-array ################################################### pr.rates <- NArray( list( DMdur = seq(0,12,0.1), DMage = 4:7*10, @@ -129,19 +141,17 @@ ################################################### -### code chunk number 12: simLexis.rnw:382-383 +### code chunk number 14: mknd ################################################### -ci.pred +nd <- data.frame( DMdur = as.numeric( dimnames(pr.rates)[[1]] ), + lex.Cst = factor( 1, levels=1:4, + labels=levels(Si$lex.Cst) ), + sex = factor( 1, levels=1:2, labels=c("M","F")) ) ################################################### -### code chunk number 13: make-pred +### code chunk number 15: make-pred ################################################### -nd <- data.frame( DMdur = as.numeric( dimnames(pr.rates)[[1]] ), - lex.Cst = factor( 1, levels=1:4, - labels=levels(Si$lex.Cst) ), - sex = factor( 1, levels=1:2, labels=c("M","F")), - lex.dur = 1000 ) for( ia in dimnames(pr.rates)[[2]] ) { dnew <- transform( nd, Age = as.numeric(ia)+DMdur, @@ -161,22 +171,22 @@ ################################################### -### code chunk number 14: mort-int +### code chunk number 16: mort-int ################################################### par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1 ) plot( NA, xlim=c(40,82), ylim=c(5,300), bty="n", log="y", xlab="Age", ylab="Mortality rate per 1000 PY" ) abline( v=seq(40,80,5), h=outer(1:9,10^(0:2),"*"), col=gray(0.8) ) for( aa in 4:7*10 ) for( ii in 1:4 ) - matlines( aa+as.numeric(dimnames(pr.rates)[[1]]), - cbind( pr.rates[,paste(aa),ii,"DM/Ins",], - pr.rates[,paste(aa),ii,"All" ,] ), - type="l", lty=1, lwd=c(3,1,1), - col=rep(c("red","limegreen"),each=3) ) + matshade( aa+as.numeric(dimnames(pr.rates)[[1]]), + cbind( pr.rates[,paste(aa),ii,"DM/Ins",], + pr.rates[,paste(aa),ii,"All" ,] )*1000, + type="l", lty=1, lwd=2, + col=c("red","limegreen") ) ################################################### -### code chunk number 15: Tr +### code chunk number 17: Tr ################################################### Tr <- list( "DM" = list( "Ins" = DM.Ins, "Dead" = DM.Dead ), @@ -184,17 +194,13 @@ ################################################### -### code chunk number 16: make-ini +### code chunk number 18: make-ini ################################################### -str( Si[NULL,1:9] ) -ini <- subset(Si,FALSE,select=1:9) -str( ini ) -ini <- subset(Si,select=1:9)[NULL,] -str( ini ) +str( ini <- Si[NULL,1:9] ) ################################################### -### code chunk number 17: ini-fill +### code chunk number 19: ini-fill ################################################### ini[1:2,"lex.id"] <- 1:2 ini[1:2,"lex.Cst"] <- "DM" @@ -206,7 +212,7 @@ ################################################### -### code chunk number 18: simL +### code chunk number 20: simL ################################################### set.seed( 52381764 ) Nsim <- 5000 @@ -217,13 +223,13 @@ ################################################### -### code chunk number 19: sum-simL +### code chunk number 21: sum-simL ################################################### summary( simL, by="sex" ) ################################################### -### code chunk number 20: Tr.p-simP +### code chunk number 22: Tr.p-simP ################################################### Tr.p <- list( "DM" = list( "Ins" = DM.Ins, "Dead" = All.Dead ), @@ -236,7 +242,7 @@ ################################################### -### code chunk number 21: Cox-dur +### code chunk number 23: Cox-dur ################################################### library( survival ) Cox.Dead <- coxph( Surv( DMdur, DMdur+lex.dur, @@ -246,11 +252,10 @@ I(Per-2000) + sex, data = Si ) round( ci.exp( Cox.Dead ), 3 ) -round( ci.exp( All.Dead ), 3 ) ################################################### -### code chunk number 22: TR.c +### code chunk number 24: TR.c ################################################### Tr.c <- list( "DM" = list( "Ins" = Tr$DM$Ins, "Dead" = Cox.Dead ), @@ -263,7 +268,7 @@ ################################################### -### code chunk number 23: nState +### code chunk number 25: nState ################################################### system.time( nSt <- nState( subset(simL,sex=="M"), @@ -272,7 +277,7 @@ ################################################### -### code chunk number 24: pstate0 +### code chunk number 26: pstate0 ################################################### pM <- pState( nSt, perm=c(1,2,4,3) ) head( pM ) @@ -287,14 +292,14 @@ ################################################### -### code chunk number 25: pstatex +### code chunk number 27: pstatex ################################################### clr <- c("limegreen","orange") # expand with a lighter version of the two chosen colors clx <- c( clr, rgb( t( col2rgb( clr[2:1] )*2 + rep(255,3) ) / 3, max=255 ) ) par( mfrow=c(1,2), las=1, mar=c(3,3,4,2), mgp=c(3,1,0)/1.6 ) # Men -plot( pM, col=clx ) +plot( pM, col=clx, xlab="Date of FU" ) lines( as.numeric(rownames(pM)), pM[,2], lwd=3 ) mtext( "60 year old male, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) mtext( "Survival curve", side=3, line=1.5, adj=0 ) @@ -309,19 +314,19 @@ from=1995, time.scale="Per" ), perm=c(1,2,4,3) ) -plot( pF, col=clx ) +plot( pF, col=clx, xlab="Date of FU" ) lines( as.numeric(rownames(pF)), pF[,2], lwd=3 ) mtext( "60 year old female, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) mtext( "Survival curve", side=3, line=1.5, adj=0 ) -mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[1] ) -mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) axis( side=4 ) axis( side=4, at=1:19/20, labels=FALSE ) axis( side=4, at=1:99/100, labels=FALSE, tcl=-0.3 ) ################################################### -### code chunk number 26: pstatey +### code chunk number 28: pstatey ################################################### par( mfrow=c(1,2), las=1, mar=c(3,3,4,2), mgp=c(3,1,0)/1.6 ) # Men @@ -350,8 +355,8 @@ lines( as.numeric(rownames(pF)), pF[,2], lwd=3 ) mtext( "60 year old female, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) mtext( "Survival curve", side=3, line=1.5, adj=0 ) -mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[1] ) -mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) axis( side=4 ) axis( side=4, at=1:9/10, labels=FALSE ) axis( side=4, at=1:19/20, labels=FALSE, tcl=-0.4 ) @@ -359,7 +364,7 @@ ################################################### -### code chunk number 27: comp-0 +### code chunk number 29: comp-0 ################################################### PrM <- pState( nState( subset(simP,sex=="M"), at=seq(0,11,0.2), @@ -387,23 +392,23 @@ lines( PrM, border="blue" , col="transparent", lwd=3 ) lines( CoxM, border="red" , col="transparent", lwd=3 ) text( 60.5, 0.05, "M" ) -box( lwd=3 ) +box( lwd=5, col="white" ) ; box( lwd=2, col="black" ) plot( pF, border="black", col="transparent", lwd=3 ) lines( PrF, border="blue" , col="transparent", lwd=3 ) lines( CoxF, border="red" , col="transparent", lwd=3 ) text( 60.5, 0.05, "F" ) -box( lwd=3 ) +box( lwd=5, col="white" ) ; box( lwd=2, col="black" ) ################################################### -### code chunk number 28: CHANGE1 (eval = FALSE) +### code chunk number 30: CHANGE1 (eval = FALSE) ################################################### ## source( "../R/simLexis.R", keep.source=TRUE ) ################################################### -### code chunk number 29: CHANGE2 +### code chunk number 31: CHANGE2 ################################################### simX <- Epi:::simX sim1 <- Epi:::sim1 @@ -413,63 +418,63 @@ ################################################### -### code chunk number 30: simLexis.rnw:934-937 +### code chunk number 32: simLexis.rnw:972-975 ################################################### cbind( -attr( ini, "time.scale" ), +attr( ini, "time.scales" ), attr( ini, "time.since" ) ) ################################################### -### code chunk number 31: simLexis.rnw:962-963 +### code chunk number 33: simLexis.rnw:1000-1001 ################################################### simLexis ################################################### -### code chunk number 32: simLexis.rnw:980-981 +### code chunk number 34: simLexis.rnw:1018-1019 ################################################### simX ################################################### -### code chunk number 33: simLexis.rnw:993-994 +### code chunk number 35: simLexis.rnw:1031-1032 ################################################### sim1 ################################################### -### code chunk number 34: simLexis.rnw:1006-1007 +### code chunk number 36: simLexis.rnw:1044-1045 ################################################### lint ################################################### -### code chunk number 35: simLexis.rnw:1017-1018 +### code chunk number 37: simLexis.rnw:1055-1056 ################################################### get.next ################################################### -### code chunk number 36: simLexis.rnw:1027-1028 +### code chunk number 38: simLexis.rnw:1065-1066 ################################################### chop.lex ################################################### -### code chunk number 37: simLexis.rnw:1045-1046 +### code chunk number 39: simLexis.rnw:1083-1084 ################################################### nState ################################################### -### code chunk number 38: simLexis.rnw:1055-1056 +### code chunk number 40: simLexis.rnw:1093-1094 ################################################### pState ################################################### -### code chunk number 39: simLexis.rnw:1060-1062 +### code chunk number 41: simLexis.rnw:1098-1100 ################################################### plot.pState lines.pState diff -Nru r-cran-epi-2.32/vignettes/simLexis.rnw r-cran-epi-2.37/vignettes/simLexis.rnw --- r-cran-epi-2.32/vignettes/simLexis.rnw 2018-03-08 11:41:17.000000000 +0000 +++ r-cran-epi-2.37/vignettes/simLexis.rnw 2019-05-23 08:18:15.000000000 +0000 @@ -1,18 +1,17 @@ - \SweaveOpts{results=verbatim,keep.source=TRUE,include=FALSE,eps=FALSE} %\VignetteIndexEntry{Simulation of multistate models with multiple timescales: simLexis} -\documentclass[a4paper,twoside,12pt]{report} +\documentclass[a4paper,dvipsnames,twoside,12pt]{report} \newcommand{\Title}{Simulation of\\ multistate models with\\ multiple timescales:\\ \texttt{simLexis} in the \texttt{Epi} package} \newcommand{\Tit}{Multistate models with multiple timescales} -\newcommand{\Version}{Version 2.4} +\newcommand{\Version}{Version 2.5} \newcommand{\Dates}{\today} -\newcommand{\Where}{SDC} +\newcommand{\Where}{SDCC} \newcommand{\Homepage}{\url{http://BendixCarstensen.com/Epi/simLexis.pdf}} \newcommand{\Faculty}{\begin{tabular}{rl} Bendix Carstensen - & Steno Diabetes Center, Gentofte, Denmark\\ + & Steno Diabetes Center Copenhagen, Gentofte, Denmark\\ & {\small \& Department of Biostatistics, University of Copenhagen} \\ & \texttt{b@bxc.dk}\\ @@ -70,7 +69,7 @@ course be ignored. This first chapter of this vignette shows by an example how to use the -function \texttt{simLexis} and display the results. The subsequent +function \texttt{simLexis} and display the results. The second chapter discusses in more detail how the simulation machinery is implemented and is not needed for the practical use of \texttt{simLexis}. @@ -99,8 +98,15 @@ particular state. Hence, starting conditions should be represented as a \texttt{Lexis} object (where \texttt{lex.dur} and \texttt{lex.Xst} are ignored, since there is no follow-up yet), where the time scale -information is in the attributes \texttt{time.scale} and -\texttt{time.since} respectively. +information is in the attributes \texttt{time.scales} and +\texttt{time.since} respectively. + +Note that \texttt{time.scales} attribute is a vector of names of +variables in the \texttt{Lexis} object, so all of these variables +should be present even if they are not used in the models for the +transitions, and they should be set to 0; if they are not in the +initial dataset, \texttt{simLexis} will crash, if they are +\texttt{NA}, the \texttt{simLexis} will produce an object with 0 rows. Thus there are two main arguments to a function to simulate from a multistate model: @@ -115,9 +121,9 @@ \item A transition object, representing the transition intensities between states, which should be a list of lists of intensity representations. As an intensity representation we mean a function - that for given a \texttt{Lexis} object that can be used to produce + that for a given \texttt{Lexis} object can be used to produce estimates of the transition intensities at a set of supplied time - points since the state represented in the \texttt{Lexis} object. + points. The names of the elements of the transition object (which are lists) will be names of the \emph{transient} states, that is the states @@ -131,10 +137,11 @@ transition intensity from state \texttt{A} to the state \texttt{B}. The entries in the transition object can be either \texttt{glm} - objects, representing Poisson models for the transitions, - \texttt{coxph} objects representing an intensity model along one - time scale, or simply a function that takes a \texttt{Lexis} - object as input returns an estimated intensity for each row. + objects (either with \texttt{poisson} or \texttt{poisreg} family), + representing Poisson models for the transitions, \texttt{coxph} + objects representing an intensity model along one time scale, or + simply a function that takes a \texttt{Lexis} object as input and + returns an estimated intensity for each row. \end{enumerate} @@ -167,8 +174,8 @@ exit.status = factor(!is.na(dodth),labels=c("DM","Dead")), data = DMlate ) @ % -This is just data for a simple survival model with states ``DM'' and -``Dead''. Now we cut the follow-up at insulin start, which for the +This is just data for a simple survival model with states \texttt{DM} and +\texttt{Dead}. Now we cut the follow-up at insulin start, which for the majority of patients (T2D) is a clinical indicator of deterioration of disease regulation. We therefore also introduce a new timescale, and split the non-precursor states, so that we can address the question of @@ -179,9 +186,12 @@ new.state = "Ins", new.scale = "t.Ins", split.states = TRUE ) -summary( dmi ) -str(dmi) +summary( dmi, timeScales=T ) @ % $ +Note that we show the time scales in the \texttt{Lexis} object, and +that it is indicated that the time scale \texttt{t.Ins} is defined as +time since entry into stat state \texttt{Ins.} + We can show how many person-years we have and show the number of transitions and transition rates (per 1000), using the \texttt{boxes.Lexis} function to display the states and the number of @@ -200,25 +210,25 @@ \section{Analysis of rates} In the \texttt{Lexis} object (which is just a data frame) each person -is represented by one record for each transient state he occupies, +is represented by one record for each transient state occupied, thus in this case either 1 or 2 records; those who have a recorded time both without and with insulin have two records. In order to be able to fit Poisson models with occurrence rates varying -by the different time-scales, we split the follow-up in 6-month intervals +by the different time-scales, we split the follow-up in 3-month intervals for modeling: <>= -Si <- splitLexis( dmi, 0:30/2, "DMdur" ) -dim( Si ) +Si <- splitLexis( dmi, seq(0,20,1/4), "DMdur" ) +summary( Si ) print( subset( Si, lex.id==97 )[,1:10], digits=6 ) @ % -Note that when we split the follow-up each person's follow up now +Note that when we split the follow-up, each person's follow up now consists of many records, each with the \emph{current} values of the timescales at the start of the interval represented by the record. In -the modeling we must necessarily assume that the rates are constant -within each 6-month interval, but the \emph{size} of these rates we -model as smooth functions of the time scales (that is the values at -the beginning of each interval). +the modeling we shall assume that the rates are constant within each +6-month interval, but the \emph{size} of these rates we model as +smooth functions of the time scales (that is the values at the +beginning of each interval). The approach often used in epidemiology where one parameter is attached to each interval of time (or age) is not feasible when more @@ -228,13 +238,13 @@ We shall use natural splines (restricted cubic splines) for the analysis of rates, and hence we must allocate knots for the splines. This is done for each of the time-scales, and separately for -the transition out of states ``DM'' and ``Ins''. For age, we place the -knots so that the number of events is the same between each pair of -knots, but only half of this beyond each of the boundary knots, -whereas for the timescales \texttt{DMdur} and \texttt{tIns} where we -have observation from a well-defined 0, we put knots at 0 and place the -remaining knots so that the number of events is the same between each -pair of knots as well as outside the boundary knots. +the transition out of states \texttt{DM} and \texttt{Ins}. For age, we +place the knots so that the number of events is the same between each +pair of knots, but only half of this beyond each of the boundary +knots, whereas for the timescales \texttt{DMdur} and \texttt{tIns} +where we have observation from a well-defined 0, we put knots at 0 and +place the remaining knots so that the number of events is the same +between each pair of knots as well as outside the boundary knots. <>= nk <- 5 ( ai.kn <- with( subset(Si,lex.Xst=="Ins" & lex.Cst!=lex.Xst ), @@ -249,8 +259,8 @@ c(0,quantile( t.Ins+lex.dur, probs=(1:(nk-1))/nk ) )) ) @ % Note that when we tease out the event records for transition to -\emph{transient} states (in this case ``Ins'', that is -verb|lex.Xst=="Ins"|), we should add \verb|lex.Cst!=lex.Xst|, to +\emph{transient} states (in this case \texttt{Ins}, that is +\verb|lex.Xst=="Ins"|), we should add \verb|lex.Cst!=lex.Xst|, to include only transition records and avoiding including records of sojourn time in the transient state. @@ -264,21 +274,39 @@ I(Per-2000) + sex, family=poisson, offset=log(lex.dur), data = subset(Si,lex.Cst=="DM") ) -DM.Dead <- glm( (lex.Xst=="Dead") ~ Ns( Age , knots=ad.kn ) + +ci.exp( DM.Ins ) +class( DM.Ins ) +@ % +We can also fit this model with a slightly simpler syntax using the +\texttt{glm.Lexis} function: +<<>>= +DM.Ins <- glm.Lexis( Si, from = "DM", to = "Ins", + formula = ~ Ns( Age , knots=ai.kn ) + + Ns( DMdur, knots=di.kn ) + + I(Per-2000) + sex ) +ci.exp( DM.Ins ) +class( DM.Ins ) +@ % +So we have a slightly simpler syntax, and we get an informative +message of which transition(s) we are modeling. However we do not have +\texttt{update} method for these objects. +<<>>= +DM.Dead <- glm.Lexis( Si, from = "DM", to = "Dead", + formula = ~ Ns( Age , knots=ad.kn ) + + Ns( DMdur, knots=dd.kn ) + + I(Per-2000) + sex ) +Ins.Dead <- glm.Lexis( Si, from = "Ins", + formula = ~ Ns( Age , knots=ad.kn ) + Ns( DMdur, knots=dd.kn ) + - I(Per-2000) + sex, - family=poisson, offset=log(lex.dur), - data = subset(Si,lex.Cst=="DM") ) -Ins.Dead <- glm( (lex.Xst=="Dead(Ins)") ~ Ns( Age , knots=ad.kn ) + - Ns( DMdur, knots=dd.kn ) + - Ns( t.Ins, knots=ti.kn ) + - I(Per-2000) + sex, - family=poisson, offset=log(lex.dur), - data = subset(Si,lex.Cst=="Ins") ) + Ns( t.Ins, knots=ti.kn ) + + I(Per-2000) + sex ) @ % Note the similarity of the code used to fit the three models, is is -mainly redefining the response variable (``to'' state) and the subset -of the data used (``from'' state). +mainly redefining the response variable (\texttt{to} state) and the subset +of the data used (\texttt{from} state). Also note that the last model need +no specification of \texttt{to}, the default is to model all +transitions from the \texttt{from} state, and his case there is only +one. \section{The mortality rates} @@ -295,24 +323,26 @@ However, there is nothing that prevents us from testing this assumption; we can just fit a model for the mortality rates in the entire data frame \texttt{Si}, and compare the deviance from this with -the sum of the deviances from the separate models: +the sum of the deviances from the separate models using the \texttt{glm.Lexis} +function: <>= -with( Si, table(lex.Cst) ) -All.Dead <- glm( (lex.Xst %in% c("Dead(Ins)","Dead")) ~ - Ns( Age , knots=ad.kn ) + - Ns( DMdur, knots=dd.kn ) + - lex.Cst + - I(Per-2000) + sex, - family=poisson, offset=log(lex.dur), - data = Si ) +All.Dead <- glm.Lexis( Si, to = c("Dead(Ins)","Dead"), + formula = ~ Ns( Age , knots=ad.kn ) + + Ns( DMdur, knots=dd.kn ) + + lex.Cst + + I(Per-2000) + sex ) round( ci.exp( All.Dead ), 3 ) -@ +@ % +Incidentally we could have dispensed with the \texttt{to=} argument +too, because the default is to take \texttt{to} to be all absorbing +states in the model. + From the parameter values we would in a simple setting just claim that start of insulin-treatment was associated with a slightly more than doubling of mortality. The model \texttt{All.dead} assumes that the age- and DM-duration -effects on mortality in the ``DM'' and ``Ins'' states are the same, +effects on mortality in the \texttt{DM} and \texttt{Ins} states are the same, and moreover that there is no effect of insulin duration, but merely a mortality that is larger by a multiplicative constant not depending on insulin duration. The model \texttt{DM.Dead} has 8 parameters to @@ -321,9 +351,9 @@ natural spline with $k$ knots gives $k-1$ parameters, and we chose $k=5$ above). -We can compare the fit of the proportional hazards model with the fit -of the separate models for the two mortality rates, by adding up the -deviances and d.f. from these: +We can compare the fit of the simple proportional hazards model with +the fit of the separate models for the two mortality rates, by adding +up the deviances and d.f. from these: <>= what <- c("null.deviance","df.null","deviance","df.residual") ( rD <- unlist( DM.Dead[what] ) ) @@ -340,10 +370,10 @@ is a difference in the null deviances (and a difference of 1 in the null d.f.) is that the null deviance of \texttt{All.Dead} refer to a model with a single intercept, that is a model with constant and -\emph{identical} mortality rates from the states ``DM'' and ``Ins'', +\emph{identical} mortality rates from the states \texttt{DM} and \texttt{Ins}, whereas the null models for \texttt{DM.Dead} and \texttt{Ins.Dead} have constant but \emph{different} mortality rates from the states -``DM'' and ``Ins''. This is however irrelevant for the comparison of +\texttt{DM} and \texttt{Ins}. This is however irrelevant for the comparison of the \emph{residual} deviances. \subsection{How the mortality rates look} @@ -378,20 +408,25 @@ @ % For convenience the \texttt{Epi} package contains a function that computes predicted (log-)rates with c.i. --- it is merely a wrapper for -\texttt{predict.glm}: -<<>>= -ci.pred -@ -So set up the prediction data frame and modify it in loops over -ages at onset and insulin onset. Note that we set \texttt{lex.dur} to -1000 in the prediction frame, so that we obtain rates in units of -events per 1000 PY. -<>= +\texttt{predict.glm}. + +So we set up the prediction data frame and modify it in loops over +ages at onset and insulin onset in order to collect the predicted +rates in different scenarios: +<>= nd <- data.frame( DMdur = as.numeric( dimnames(pr.rates)[[1]] ), lex.Cst = factor( 1, levels=1:4, labels=levels(Si$lex.Cst) ), - sex = factor( 1, levels=1:2, labels=c("M","F")), - lex.dur = 1000 ) + sex = factor( 1, levels=1:2, labels=c("M","F")) ) +@ % $ +Note that we did \emph{not} insert \texttt{lex.dur} as covariate in +the prediction frame. This would be required if we used the +\texttt{poisson} family with the \texttt{glm}, but the wrapper +\texttt{glm.Lexis} uses the \texttt{poisreg} family, so +\texttt{lex.dur} is ignored and predictions always comes in the +(inverse) units of \texttt{lex.dur}. So we get rates per 1 person-year +in the predictions. +<>= for( ia in dimnames(pr.rates)[[2]] ) { dnew <- transform( nd, Age = as.numeric(ia)+DMdur, @@ -409,21 +444,22 @@ } } @ % $ -So for each age at DM onset we make a plot of the mortality as function -of current age both for those with no insulin treatment at those that -start 1, 3 and 5 years after, thus 4 curves (with c.i.). These curves -are replicated with a different color for the simplified model. +So for each age at DM onset we make a plot of the mortality as +function of current age both for those with no insulin treatment and +those that start insulin treatment 0, 2 and 5 years after diabetes +diagnosis, thus 4 curves (with c.i.). These curves are replicated with +a different color for the simplified model. <>= par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1 ) plot( NA, xlim=c(40,82), ylim=c(5,300), bty="n", log="y", xlab="Age", ylab="Mortality rate per 1000 PY" ) abline( v=seq(40,80,5), h=outer(1:9,10^(0:2),"*"), col=gray(0.8) ) for( aa in 4:7*10 ) for( ii in 1:4 ) - matlines( aa+as.numeric(dimnames(pr.rates)[[1]]), - cbind( pr.rates[,paste(aa),ii,"DM/Ins",], - pr.rates[,paste(aa),ii,"All" ,] ), - type="l", lty=1, lwd=c(3,1,1), - col=rep(c("red","limegreen"),each=3) ) + matshade( aa+as.numeric(dimnames(pr.rates)[[1]]), + cbind( pr.rates[,paste(aa),ii,"DM/Ins",], + pr.rates[,paste(aa),ii,"All" ,] )*1000, + type="l", lty=1, lwd=2, + col=c("red","limegreen") ) @ % \insfig{mort-int}{0.9}{Estimated mortality rates for male diabetes patients with no insulin (lower sets of curves) and insulin (upper @@ -464,6 +500,10 @@ \section{Input to the \texttt{simLexis} function} +We want to estimate the cumulative probability of being in each of the +4 states, so that we can assess the fraction of diabetes pateints that +go on insulin + In order to simulate from the multistate model with the estimated transition rates, and get the follow-up of a hypothetical cohort, we must supply \emph{both} the transition rates and the structure of the @@ -489,23 +529,18 @@ \subsection{The initial cohort} We now define an initial \texttt{Lexis} object of persons with all -relevant covariates defined. Note that we use \texttt{subset} to get a -\texttt{Lexis} object, this conserves the \texttt{time.scale} and -\texttt{time.since} attributes which is needed for the simulation (the -usual ``\texttt{[}'' operator does not preserve these attributes when -you select columns): +relevant covariates defined. Note that we use \texttt{NULL} as row +indicator in the \texttt{Lexis} object we used for modeling; this +conserves the \texttt{time.scale} and \texttt{time.since} attributes +which are needed for the simulation: <>= -str( Si[NULL,1:9] ) -ini <- subset(Si,FALSE,select=1:9) -str( ini ) -ini <- subset(Si,select=1:9)[NULL,] -str( ini ) +str( ini <- Si[NULL,1:9] ) @ % We now have an empty \texttt{Lexis} object with attributes reflecting -the timescales in multistate model we want to simulate, so we must now -enter some data to represent the persons whose follow-up we want to -simulate through the model; we set up an initial dataset with one man -and one woman: +the timescales in the multistate model we want to simulate from. But +we must enter some data to represent the initial state of the persons +whose follow-up we want to simulate through the model; so fill in data +for one man and one woman: <>= ini[1:2,"lex.id"] <- 1:2 ini[1:2,"lex.Cst"] <- "DM" @@ -515,6 +550,10 @@ ini[1:2,"sex"] <- c("M","F") ini @ % +So the persons starts in age 60 in 1995 with 5 years of diabetes +duration. Note that the \texttt{t.Ins} is \texttt{NA}, because this is +a timescale that first comes alive if a transtion to \texttt{Ins} is +simulated. \section{Simulation of the follow-up} @@ -549,8 +588,8 @@ We fitted a proportional mortality model \texttt{All.Dead} (which fitted worse than the other two), this is a model for \emph{both} the -transition from ``DM'' to ``Death'' \emph{and} from ``Ins'' to -``Dead(Ins)'', assuming that they are proportional. But it can easily +transition from \texttt{DM} to \texttt{Death} \emph{and} from \texttt{Ins} to +\texttt{Dead(Ins)}, assuming that they are proportional. But it can easily be used in the simulation set-up, because the state is embedded in the model via the term \texttt{lex.Cst}, which is updated during the simulation. @@ -582,7 +621,6 @@ I(Per-2000) + sex, data = Si ) round( ci.exp( Cox.Dead ), 3 ) -round( ci.exp( All.Dead ), 3 ) @ % Note that in order for this model to be usable for simulation, it is necessary that we use the components of the \texttt{Lexis} object to @@ -656,7 +694,7 @@ clx <- c( clr, rgb( t( col2rgb( clr[2:1] )*2 + rep(255,3) ) / 3, max=255 ) ) par( mfrow=c(1,2), las=1, mar=c(3,3,4,2), mgp=c(3,1,0)/1.6 ) # Men -plot( pM, col=clx ) +plot( pM, col=clx, xlab="Date of FU" ) lines( as.numeric(rownames(pM)), pM[,2], lwd=3 ) mtext( "60 year old male, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) mtext( "Survival curve", side=3, line=1.5, adj=0 ) @@ -671,12 +709,12 @@ from=1995, time.scale="Per" ), perm=c(1,2,4,3) ) -plot( pF, col=clx ) +plot( pF, col=clx, xlab="Date of FU" ) lines( as.numeric(rownames(pF)), pF[,2], lwd=3 ) mtext( "60 year old female, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) mtext( "Survival curve", side=3, line=1.5, adj=0 ) -mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[1] ) -mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) axis( side=4 ) axis( side=4, at=1:19/20, labels=FALSE ) axis( side=4, at=1:99/100, labels=FALSE, tcl=-0.3 ) @@ -716,8 +754,8 @@ lines( as.numeric(rownames(pF)), pF[,2], lwd=3 ) mtext( "60 year old female, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) mtext( "Survival curve", side=3, line=1.5, adj=0 ) -mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[1] ) -mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) axis( side=4 ) axis( side=4, at=1:9/10, labels=FALSE ) axis( side=4, at=1:19/20, labels=FALSE, tcl=-0.4 ) @@ -764,13 +802,13 @@ lines( PrM, border="blue" , col="transparent", lwd=3 ) lines( CoxM, border="red" , col="transparent", lwd=3 ) text( 60.5, 0.05, "M" ) -box( lwd=3 ) +box( lwd=5, col="white" ) ; box( lwd=2, col="black" ) plot( pF, border="black", col="transparent", lwd=3 ) lines( PrF, border="blue" , col="transparent", lwd=3 ) lines( CoxF, border="red" , col="transparent", lwd=3 ) text( 60.5, 0.05, "F" ) -box( lwd=3 ) +box( lwd=5, col="white" ) ; box( lwd=2, col="black" ) @ % \insfig{comp-0}{1.0}{Comparison of the simulated state occupancy probabilities using separate Poisson models for the mortality rates @@ -930,10 +968,10 @@ length as the \texttt{time.scales} attribute, with value equal to a state name if the corresponding time scale is defined as time since entry into that state. In this example the 4th timescale is time since -entry into the ``Ins'' state, and hence: +entry into the \texttt{Ins} state, and hence: <<>>= cbind( -attr( ini, "time.scale" ), +attr( ini, "time.scales" ), attr( ini, "time.since" ) ) @ % \texttt{Lexis} objects will have this attribute set for time scales created diff -Nru r-cran-epi-2.32/vignettes/simLexis.rwl r-cran-epi-2.37/vignettes/simLexis.rwl --- r-cran-epi-2.32/vignettes/simLexis.rwl 2018-05-03 14:36:24.000000000 +0000 +++ r-cran-epi-2.37/vignettes/simLexis.rwl 2019-05-23 08:22:22.000000000 +0000 @@ -1,54 +1,56 @@ -R version 3.4.4 (2018-03-15) +R version 3.6.0 (2019-04-26) --------------------------------------------- Folder: /home/bendix/stat/R/lib.src/Epi/pkg/vignettes - Started: Thursday 03. May 2018, 16:35:00 + Started: Thursday 23. May 2019, 10:21:05 --------------------------------------------- Writing to file simLexis.tex Processing code chunks with options ... - 1 : keep.source term verbatim (simLexis.rnw:24) - 2 : echo keep.source term verbatim (label = start, simLexis.rnw:156) - 3 : echo keep.source term verbatim (label = Lexis, simLexis.rnw:163) - 4 : echo keep.source term verbatim (label = cut, simLexis.rnw:176) - 5 : echo keep.source term verbatim pdf (label = boxes, simLexis.rnw:189) - 6 : echo keep.source term verbatim (label = split, simLexis.rnw:210) - 7 : echo keep.source term verbatim (label = knots, simLexis.rnw:238) - 8 : echo keep.source term verbatim (label = Poisson, simLexis.rnw:260) - 9 : echo keep.source term verbatim (label = prop-haz, simLexis.rnw:299) -10 : echo keep.source term verbatim (label = get-dev, simLexis.rnw:327) -11 : echo keep.source term verbatim (label = pr-array, simLexis.rnw:371) -12 : echo keep.source term verbatim (simLexis.rnw:382) -13 : echo keep.source term verbatim (label = make-pred, simLexis.rnw:389) -14 : echo keep.source term verbatim pdf (label = mort-int, simLexis.rnw:416) -15 : echo keep.source term verbatim (label = Tr, simLexis.rnw:478) -16 : echo keep.source term verbatim (label = make-ini, simLexis.rnw:497) -17 : echo keep.source term verbatim (label = ini-fill, simLexis.rnw:509) -18 : echo keep.source term verbatim (label = simL, simLexis.rnw:530) -19 : echo keep.source term verbatim (label = sum-simL, simLexis.rnw:542) -20 : echo keep.source term verbatim (label = Tr.p-simP, simLexis.rnw:559) -21 : echo keep.source term verbatim (label = Cox-dur, simLexis.rnw:576) -22 : echo keep.source term verbatim (label = TR.c, simLexis.rnw:604) -23 : echo keep.source term verbatim (label = nState, simLexis.rnw:621) -24 : echo keep.source term verbatim pdf (label = pstate0, simLexis.rnw:635) -25 : echo keep.source term verbatim pdf (label = pstatex, simLexis.rnw:653) -26 : echo keep.source term verbatim pdf (label = pstatey, simLexis.rnw:691) -27 : echo keep.source term verbatim pdf (label = comp-0, simLexis.rnw:740) -28 : keep.source (label = CHANGE1, simLexis.rnw:909) -29 : keep.source term hide (label = CHANGE2, simLexis.rnw:913) -30 : echo keep.source term verbatim (simLexis.rnw:934) -31 : echo keep.source term verbatim (simLexis.rnw:962) -32 : echo keep.source term verbatim (simLexis.rnw:980) -33 : echo keep.source term verbatim (simLexis.rnw:993) -34 : echo keep.source term verbatim (simLexis.rnw:1006) -35 : echo keep.source term verbatim (simLexis.rnw:1017) -36 : echo keep.source term verbatim (simLexis.rnw:1027) -37 : echo keep.source term verbatim (simLexis.rnw:1045) -38 : echo keep.source term verbatim (simLexis.rnw:1055) -39 : echo keep.source term verbatim (simLexis.rnw:1060) + 1 : keep.source term verbatim (simLexis.rnw:23) + 2 : echo keep.source term verbatim (label = start, simLexis.rnw:163) + 3 : echo keep.source term verbatim (label = Lexis, simLexis.rnw:170) + 4 : echo keep.source term verbatim (label = cut, simLexis.rnw:183) + 5 : echo keep.source term verbatim pdf (label = boxes, simLexis.rnw:199) + 6 : echo keep.source term verbatim (label = split, simLexis.rnw:220) + 7 : echo keep.source term verbatim (label = knots, simLexis.rnw:248) + 8 : echo keep.source term verbatim (label = Poisson, simLexis.rnw:270) + 9 : echo keep.source term verbatim (simLexis.rnw:282) +10 : echo keep.source term verbatim (simLexis.rnw:293) +11 : echo keep.source term verbatim (label = prop-haz, simLexis.rnw:328) +12 : echo keep.source term verbatim (label = get-dev, simLexis.rnw:357) +13 : echo keep.source term verbatim (label = pr-array, simLexis.rnw:401) +14 : echo keep.source term verbatim (label = mknd, simLexis.rnw:416) +15 : echo keep.source term verbatim (label = make-pred, simLexis.rnw:429) +16 : echo keep.source term verbatim pdf (label = mort-int, simLexis.rnw:452) +17 : echo keep.source term verbatim (label = Tr, simLexis.rnw:518) +18 : echo keep.source term verbatim (label = make-ini, simLexis.rnw:536) +19 : echo keep.source term verbatim (label = ini-fill, simLexis.rnw:544) +20 : echo keep.source term verbatim (label = simL, simLexis.rnw:569) +21 : echo keep.source term verbatim (label = sum-simL, simLexis.rnw:581) +22 : echo keep.source term verbatim (label = Tr.p-simP, simLexis.rnw:598) +23 : echo keep.source term verbatim (label = Cox-dur, simLexis.rnw:615) +24 : echo keep.source term verbatim (label = TR.c, simLexis.rnw:642) +25 : echo keep.source term verbatim (label = nState, simLexis.rnw:659) +26 : echo keep.source term verbatim pdf (label = pstate0, simLexis.rnw:673) +27 : echo keep.source term verbatim pdf (label = pstatex, simLexis.rnw:691) +28 : echo keep.source term verbatim pdf (label = pstatey, simLexis.rnw:729) +29 : echo keep.source term verbatim pdf (label = comp-0, simLexis.rnw:778) +30 : keep.source (label = CHANGE1, simLexis.rnw:947) +31 : keep.source term hide (label = CHANGE2, simLexis.rnw:951) +32 : echo keep.source term verbatim (simLexis.rnw:972) +33 : echo keep.source term verbatim (simLexis.rnw:1000) +34 : echo keep.source term verbatim (simLexis.rnw:1018) +35 : echo keep.source term verbatim (simLexis.rnw:1031) +36 : echo keep.source term verbatim (simLexis.rnw:1044) +37 : echo keep.source term verbatim (simLexis.rnw:1055) +38 : echo keep.source term verbatim (simLexis.rnw:1065) +39 : echo keep.source term verbatim (simLexis.rnw:1083) +40 : echo keep.source term verbatim (simLexis.rnw:1093) +41 : echo keep.source term verbatim (simLexis.rnw:1098) You can now run (pdf)latex on ‘simLexis.tex’ --------------------------------------------- Folder: /home/bendix/stat/R/lib.src/Epi/pkg/vignettes - Ended: Thursday 03. May 2018, 16:36:24 - Elapsed: 00:01:24 + Ended: Thursday 23. May 2019, 10:22:22 + Elapsed: 00:01:16 --------------------------------------------- diff -Nru r-cran-epi-2.32/vignettes/simLexis.tex r-cran-epi-2.37/vignettes/simLexis.tex --- r-cran-epi-2.32/vignettes/simLexis.tex 2018-05-03 14:36:24.000000000 +0000 +++ r-cran-epi-2.37/vignettes/simLexis.tex 2019-05-23 08:22:22.000000000 +0000 @@ -1,18 +1,17 @@ - %\VignetteIndexEntry{Simulation of multistate models with multiple timescales: simLexis} -\documentclass[a4paper,twoside,12pt]{report} +\documentclass[a4paper,dvipsnames,twoside,12pt]{report} \newcommand{\Title}{Simulation of\\ multistate models with\\ multiple timescales:\\ \texttt{simLexis} in the \texttt{Epi} package} \newcommand{\Tit}{Multistate models with multiple timescales} -\newcommand{\Version}{Version 2.4} +\newcommand{\Version}{Version 2.5} \newcommand{\Dates}{\today} -\newcommand{\Where}{SDC} +\newcommand{\Where}{SDCC} \newcommand{\Homepage}{\url{http://BendixCarstensen.com/Epi/simLexis.pdf}} \newcommand{\Faculty}{\begin{tabular}{rl} Bendix Carstensen - & Steno Diabetes Center, Gentofte, Denmark\\ + & Steno Diabetes Center Copenhagen, Gentofte, Denmark\\ & {\small \& Department of Biostatistics, University of Copenhagen} \\ & \texttt{b@bxc.dk}\\ @@ -65,7 +64,7 @@ course be ignored. This first chapter of this vignette shows by an example how to use the -function \texttt{simLexis} and display the results. The subsequent +function \texttt{simLexis} and display the results. The second chapter discusses in more detail how the simulation machinery is implemented and is not needed for the practical use of \texttt{simLexis}. @@ -94,8 +93,15 @@ particular state. Hence, starting conditions should be represented as a \texttt{Lexis} object (where \texttt{lex.dur} and \texttt{lex.Xst} are ignored, since there is no follow-up yet), where the time scale -information is in the attributes \texttt{time.scale} and -\texttt{time.since} respectively. +information is in the attributes \texttt{time.scales} and +\texttt{time.since} respectively. + +Note that \texttt{time.scales} attribute is a vector of names of +variables in the \texttt{Lexis} object, so all of these variables +should be present even if they are not used in the models for the +transitions, and they should be set to 0; if they are not in the +initial dataset, \texttt{simLexis} will crash, if they are +\texttt{NA}, the \texttt{simLexis} will produce an object with 0 rows. Thus there are two main arguments to a function to simulate from a multistate model: @@ -110,9 +116,9 @@ \item A transition object, representing the transition intensities between states, which should be a list of lists of intensity representations. As an intensity representation we mean a function - that for given a \texttt{Lexis} object that can be used to produce + that for a given \texttt{Lexis} object can be used to produce estimates of the transition intensities at a set of supplied time - points since the state represented in the \texttt{Lexis} object. + points. The names of the elements of the transition object (which are lists) will be names of the \emph{transient} states, that is the states @@ -126,10 +132,11 @@ transition intensity from state \texttt{A} to the state \texttt{B}. The entries in the transition object can be either \texttt{glm} - objects, representing Poisson models for the transitions, - \texttt{coxph} objects representing an intensity model along one - time scale, or simply a function that takes a \texttt{Lexis} - object as input returns an estimated intensity for each row. + objects (either with \texttt{poisson} or \texttt{poisreg} family), + representing Poisson models for the transitions, \texttt{coxph} + objects representing an intensity model along one time scale, or + simply a function that takes a \texttt{Lexis} object as input and + returns an estimated intensity for each row. \end{enumerate} @@ -155,25 +162,26 @@ > print( sessionInfo(), l=F ) \end{Sinput} \begin{Soutput} -R version 3.4.4 (2018-03-15) +R version 3.6.0 (2019-04-26) Platform: x86_64-pc-linux-gnu (64-bit) -Running under: Ubuntu 14.04.5 LTS +Running under: Ubuntu 14.04.6 LTS Matrix products: default -BLAS: /usr/lib/openblas-base/libopenblas.so.0 +BLAS: /usr/lib/openblas-base/libopenblas.so.0 LAPACK: /usr/lib/lapack/liblapack.so.3.0 attached base packages: [1] utils datasets graphics grDevices stats methods base other attached packages: -[1] Epi_2.29 +[1] Epi_2.37 loaded via a namespace (and not attached): - [1] cmprsk_2.2-7 zoo_1.8-0 MASS_7.3-49 compiler_3.4.4 - [5] Matrix_1.2-14 plyr_1.8.4 parallel_3.4.4 survival_2.42-3 - [9] etm_1.0.1 Rcpp_0.12.12 splines_3.4.4 grid_3.4.4 -[13] data.table_1.10.4 numDeriv_2016.8-1 lattice_0.20-35 + [1] Rcpp_1.0.0 lattice_0.20-38 zoo_1.8-4 MASS_7.3-51.1 + [5] grid_3.6.0 plyr_1.8.4 nlme_3.1-139 etm_1.0.4 + [9] data.table_1.12.0 Matrix_1.2-17 splines_3.6.0 tools_3.6.0 +[13] cmprsk_2.2-7 numDeriv_2016.8-1 survival_2.44-1.1 parallel_3.6.0 +[17] compiler_3.6.0 mgcv_1.8-28 \end{Soutput} \end{Schunk} First we load the diabetes data and set up a simple illness-death @@ -188,10 +196,11 @@ \end{Sinput} \begin{Soutput} NOTE: entry.status has been set to "DM" for all. +NOTE: Dropping 4 rows with duration of follow up < tol \end{Soutput} \end{Schunk} -This is just data for a simple survival model with states ``DM'' and -``Dead''. Now we cut the follow-up at insulin start, which for the +This is just data for a simple survival model with states \texttt{DM} and +\texttt{Dead}. Now we cut the follow-up at insulin start, which for the majority of patients (T2D) is a clinical indicator of deterioration of disease regulation. We therefore also introduce a new timescale, and split the non-precursor states, so that we can address the question of @@ -203,7 +212,7 @@ + new.state = "Ins", + new.scale = "t.Ins", + split.states = TRUE ) -> summary( dmi ) +> summary( dmi, timeScales=T ) \end{Sinput} \begin{Soutput} Transitions: @@ -212,36 +221,16 @@ DM 6157 1694 2048 0 9899 3742 45885.49 9899 Ins 0 1340 0 451 1791 451 8387.77 1791 Sum 6157 3034 2048 451 11690 4193 54273.27 9996 -\end{Soutput} -\begin{Sinput} -> str(dmi) -\end{Sinput} -\begin{Soutput} -Classes ‘Lexis’ and 'data.frame': 11690 obs. of 15 variables: - $ Per : num 1999 2003 2005 2009 2009 ... - $ Age : num 58.7 64.1 86.3 44 75.8 ... - $ DMdur : num 0 0 0 0 0 0 0 0 0 0 ... - $ t.Ins : num NA NA NA NA NA NA NA NA NA NA ... - $ lex.dur: num 11.08 6.689 5.446 0.736 1.344 ... - $ lex.Cst: Factor w/ 4 levels "DM","Ins","Dead",..: 1 1 1 1 1 1 1 1 1 1 ... - $ lex.Xst: Factor w/ 4 levels "DM","Ins","Dead",..: 1 1 1 1 1 3 1 1 3 1 ... - $ lex.id : int 1 2 3 4 5 6 7 8 9 10 ... - $ sex : Factor w/ 2 levels "M","F": 2 1 2 2 1 2 1 1 2 1 ... - $ dobth : num 1940 1939 1918 1965 1933 ... - $ dodm : num 1999 2003 2005 2009 2009 ... - $ dodth : num NA NA NA NA NA ... - $ dooad : num NA 2007 NA NA NA ... - $ doins : num NA NA NA NA NA NA NA NA NA NA ... - $ dox : num 2010 2010 2010 2010 2010 ... - - attr(*, "time.scales")= chr "Per" "Age" "DMdur" "t.Ins" - - attr(*, "time.since")= chr "" "" "" "Ins" - - attr(*, "breaks")=List of 4 - ..$ Per : NULL - ..$ Age : NULL - ..$ DMdur: NULL - ..$ t.Ins: NULL + +Timescales: + Per Age DMdur t.Ins + "" "" "" "Ins" \end{Soutput} \end{Schunk} +Note that we show the time scales in the \texttt{Lexis} object, and +that it is indicated that the time scale \texttt{t.Ins} is defined as +time since entry into stat state \texttt{Ins.} + We can show how many person-years we have and show the number of transitions and transition rates (per 1000), using the \texttt{boxes.Lexis} function to display the states and the number of @@ -262,43 +251,54 @@ \section{Analysis of rates} In the \texttt{Lexis} object (which is just a data frame) each person -is represented by one record for each transient state he occupies, +is represented by one record for each transient state occupied, thus in this case either 1 or 2 records; those who have a recorded time both without and with insulin have two records. In order to be able to fit Poisson models with occurrence rates varying -by the different time-scales, we split the follow-up in 6-month intervals +by the different time-scales, we split the follow-up in 3-month intervals for modeling: \begin{Schunk} \begin{Sinput} -> Si <- splitLexis( dmi, 0:30/2, "DMdur" ) -> dim( Si ) +> Si <- splitLexis( dmi, seq(0,20,1/4), "DMdur" ) +> summary( Si ) \end{Sinput} \begin{Soutput} -[1] 115370 15 +Transitions: + To +From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: + DM 184986 1694 2048 0 188728 3742 45885.49 9899 + Ins 0 34707 0 451 35158 451 8387.77 1791 + Sum 184986 36401 2048 451 223886 4193 54273.27 9996 \end{Soutput} \begin{Sinput} > print( subset( Si, lex.id==97 )[,1:10], digits=6 ) \end{Sinput} \begin{Soutput} - lex.id Per Age DMdur t.Ins lex.dur lex.Cst lex.Xst sex dobth -1105 97 1997.55 58.9268 0.00000 NA 0.5000000 DM DM F 1938.62 -1106 97 1998.05 59.4268 0.50000 NA 0.5000000 DM DM F 1938.62 -1107 97 1998.55 59.9268 1.00000 NA 0.5000000 DM DM F 1938.62 -1108 97 1999.05 60.4268 1.50000 NA 0.5000000 DM DM F 1938.62 -1109 97 1999.55 60.9268 2.00000 NA 0.1793292 DM Ins F 1938.62 -1110 97 1999.72 61.1061 2.17933 0.000000 0.3206708 Ins Ins F 1938.62 -1111 97 2000.05 61.4268 2.50000 0.320671 0.5000000 Ins Ins F 1938.62 -1112 97 2000.55 61.9268 3.00000 0.820671 0.0116359 Ins Dead(Ins) F 1938.62 + lex.id Per Age DMdur t.Ins lex.dur lex.Cst lex.Xst sex dobth +2142 97 1997.55 58.9268 0.00000 NA 0.2500000 DM DM F 1938.62 +2143 97 1997.80 59.1768 0.25000 NA 0.2500000 DM DM F 1938.62 +2144 97 1998.05 59.4268 0.50000 NA 0.2500000 DM DM F 1938.62 +2145 97 1998.30 59.6768 0.75000 NA 0.2500000 DM DM F 1938.62 +2146 97 1998.55 59.9268 1.00000 NA 0.2500000 DM DM F 1938.62 +2147 97 1998.80 60.1768 1.25000 NA 0.2500000 DM DM F 1938.62 +2148 97 1999.05 60.4268 1.50000 NA 0.2500000 DM DM F 1938.62 +2149 97 1999.30 60.6768 1.75000 NA 0.2500000 DM DM F 1938.62 +2150 97 1999.55 60.9268 2.00000 NA 0.1793292 DM Ins F 1938.62 +2151 97 1999.72 61.1061 2.17933 0.0000000 0.0706708 Ins Ins F 1938.62 +2152 97 1999.80 61.1768 2.25000 0.0706708 0.2500000 Ins Ins F 1938.62 +2153 97 2000.05 61.4268 2.50000 0.3206708 0.2500000 Ins Ins F 1938.62 +2154 97 2000.30 61.6768 2.75000 0.5706708 0.2500000 Ins Ins F 1938.62 +2155 97 2000.55 61.9268 3.00000 0.8206708 0.0116359 Ins Dead(Ins) F 1938.62 \end{Soutput} \end{Schunk} -Note that when we split the follow-up each person's follow up now +Note that when we split the follow-up, each person's follow up now consists of many records, each with the \emph{current} values of the timescales at the start of the interval represented by the record. In -the modeling we must necessarily assume that the rates are constant -within each 6-month interval, but the \emph{size} of these rates we -model as smooth functions of the time scales (that is the values at -the beginning of each interval). +the modeling we shall assume that the rates are constant within each +6-month interval, but the \emph{size} of these rates we model as +smooth functions of the time scales (that is the values at the +beginning of each interval). The approach often used in epidemiology where one parameter is attached to each interval of time (or age) is not feasible when more @@ -308,13 +308,13 @@ We shall use natural splines (restricted cubic splines) for the analysis of rates, and hence we must allocate knots for the splines. This is done for each of the time-scales, and separately for -the transition out of states ``DM'' and ``Ins''. For age, we place the -knots so that the number of events is the same between each pair of -knots, but only half of this beyond each of the boundary knots, -whereas for the timescales \texttt{DMdur} and \texttt{tIns} where we -have observation from a well-defined 0, we put knots at 0 and place the -remaining knots so that the number of events is the same between each -pair of knots as well as outside the boundary knots. +the transition out of states \texttt{DM} and \texttt{Ins}. For age, we +place the knots so that the number of events is the same between each +pair of knots, but only half of this beyond each of the boundary +knots, whereas for the timescales \texttt{DMdur} and \texttt{tIns} +where we have observation from a well-defined 0, we put knots at 0 and +place the remaining knots so that the number of events is the same +between each pair of knots as well as outside the boundary knots. \begin{Schunk} \begin{Sinput} > nk <- 5 @@ -359,8 +359,8 @@ \end{Soutput} \end{Schunk} Note that when we tease out the event records for transition to -\emph{transient} states (in this case ``Ins'', that is -verb|lex.Xst=="Ins"|), we should add \verb|lex.Cst!=lex.Xst|, to +\emph{transient} states (in this case \texttt{Ins}, that is +\verb|lex.Xst=="Ins"|), we should add \verb|lex.Cst!=lex.Xst|, to include only transition records and avoiding including records of sojourn time in the transient state. @@ -375,22 +375,98 @@ + I(Per-2000) + sex, + family=poisson, offset=log(lex.dur), + data = subset(Si,lex.Cst=="DM") ) -> DM.Dead <- glm( (lex.Xst=="Dead") ~ Ns( Age , knots=ad.kn ) + +> ci.exp( DM.Ins ) +\end{Sinput} +\begin{Soutput} + exp(Est.) 2.5% 97.5% +(Intercept) 1.37516629 1.21414923 1.55753699 +Ns(Age, knots = ai.kn)1 0.23411761 0.19185266 0.28569349 +Ns(Age, knots = ai.kn)2 0.23166177 0.19576557 0.27414000 +Ns(Age, knots = ai.kn)3 0.02835009 0.02284379 0.03518363 +Ns(Age, knots = ai.kn)4 0.38067392 0.32945713 0.43985278 +Ns(DMdur, knots = di.kn)1 0.04462626 0.03373451 0.05903459 +Ns(DMdur, knots = di.kn)2 0.22388988 0.19028663 0.26342723 +Ns(DMdur, knots = di.kn)3 0.03379141 0.02574708 0.04434907 +Ns(DMdur, knots = di.kn)4 0.47100646 0.40783317 0.54396529 +I(Per - 2000) 0.97381513 0.96082049 0.98698552 +sexF 0.73757407 0.66886782 0.81333785 +\end{Soutput} +\begin{Sinput} +> class( DM.Ins ) +\end{Sinput} +\begin{Soutput} +[1] "glm" "lm" +\end{Soutput} +\end{Schunk} +We can also fit this model with a slightly simpler syntax using the +\texttt{glm.Lexis} function: +\begin{Schunk} +\begin{Sinput} +> DM.Ins <- glm.Lexis( Si, from = "DM", to = "Ins", ++ formula = ~ Ns( Age , knots=ai.kn ) + ++ Ns( DMdur, knots=di.kn ) + ++ I(Per-2000) + sex ) +\end{Sinput} +\begin{Soutput} +stats::glm Poisson analysis of Lexis object Si with log link: +Rates for the transition: DM->Ins +\end{Soutput} +\begin{Sinput} +> ci.exp( DM.Ins ) +\end{Sinput} +\begin{Soutput} + exp(Est.) 2.5% 97.5% +(Intercept) 1.37516630 1.21415038 1.55753552 +Ns(Age, knots = ai.kn)1 0.23411761 0.19185214 0.28569426 +Ns(Age, knots = ai.kn)2 0.23166177 0.19576473 0.27414118 +Ns(Age, knots = ai.kn)3 0.02835009 0.02284374 0.03518371 +Ns(Age, knots = ai.kn)4 0.38067392 0.32945601 0.43985427 +Ns(DMdur, knots = di.kn)1 0.04462625 0.03373347 0.05903639 +Ns(DMdur, knots = di.kn)2 0.22388988 0.19028599 0.26342813 +Ns(DMdur, knots = di.kn)3 0.03379141 0.02574705 0.04434912 +Ns(DMdur, knots = di.kn)4 0.47100646 0.40783201 0.54396683 +I(Per - 2000) 0.97381513 0.96082027 0.98698574 +sexF 0.73757407 0.66886641 0.81333956 +\end{Soutput} +\begin{Sinput} +> class( DM.Ins ) +\end{Sinput} +\begin{Soutput} +[1] "glm.lex" "glm" "lm" +\end{Soutput} +\end{Schunk} +So we have a slightly simpler syntax, and we get an informative +message of which transition(s) we are modeling. However we do not have +\texttt{update} method for these objects. +\begin{Schunk} +\begin{Sinput} +> DM.Dead <- glm.Lexis( Si, from = "DM", to = "Dead", ++ formula = ~ Ns( Age , knots=ad.kn ) + ++ Ns( DMdur, knots=dd.kn ) + ++ I(Per-2000) + sex ) +\end{Sinput} +\begin{Soutput} +stats::glm Poisson analysis of Lexis object Si with log link: +Rates for the transition: DM->Dead +\end{Soutput} +\begin{Sinput} +> Ins.Dead <- glm.Lexis( Si, from = "Ins", ++ formula = ~ Ns( Age , knots=ad.kn ) + + Ns( DMdur, knots=dd.kn ) + -+ I(Per-2000) + sex, -+ family=poisson, offset=log(lex.dur), -+ data = subset(Si,lex.Cst=="DM") ) -> Ins.Dead <- glm( (lex.Xst=="Dead(Ins)") ~ Ns( Age , knots=ad.kn ) + -+ Ns( DMdur, knots=dd.kn ) + -+ Ns( t.Ins, knots=ti.kn ) + -+ I(Per-2000) + sex, -+ family=poisson, offset=log(lex.dur), -+ data = subset(Si,lex.Cst=="Ins") ) ++ Ns( t.Ins, knots=ti.kn ) + ++ I(Per-2000) + sex ) \end{Sinput} +\begin{Soutput} +stats::glm Poisson analysis of Lexis object Si with log link: +Rates for the transition: Ins->Dead(Ins) +\end{Soutput} \end{Schunk} Note the similarity of the code used to fit the three models, is is -mainly redefining the response variable (``to'' state) and the subset -of the data used (``from'' state). +mainly redefining the response variable (\texttt{to} state) and the subset +of the data used (\texttt{from} state). Also note that the last model need +no specification of \texttt{to}, the default is to model all +transitions from the \texttt{from} state, and his case there is only +one. \section{The mortality rates} @@ -407,48 +483,49 @@ However, there is nothing that prevents us from testing this assumption; we can just fit a model for the mortality rates in the entire data frame \texttt{Si}, and compare the deviance from this with -the sum of the deviances from the separate models: +the sum of the deviances from the separate models using the \texttt{glm.Lexis} +function: \begin{Schunk} \begin{Sinput} -> with( Si, table(lex.Cst) ) +> All.Dead <- glm.Lexis( Si, to = c("Dead(Ins)","Dead"), ++ formula = ~ Ns( Age , knots=ad.kn ) + ++ Ns( DMdur, knots=dd.kn ) + ++ lex.Cst + ++ I(Per-2000) + sex ) \end{Sinput} \begin{Soutput} -lex.Cst - DM Ins Dead Dead(Ins) - 97039 18331 0 0 +stats::glm Poisson analysis of Lexis object Si with log link: +Rates for transitions: Ins->Dead(Ins), DM->Dead \end{Soutput} \begin{Sinput} -> All.Dead <- glm( (lex.Xst %in% c("Dead(Ins)","Dead")) ~ -+ Ns( Age , knots=ad.kn ) + -+ Ns( DMdur, knots=dd.kn ) + -+ lex.Cst + -+ I(Per-2000) + sex, -+ family=poisson, offset=log(lex.dur), -+ data = Si ) > round( ci.exp( All.Dead ), 3 ) \end{Sinput} \begin{Soutput} exp(Est.) 2.5% 97.5% -(Intercept) 0.049 0.043 0.056 -Ns(Age, knots = ad.kn)1 4.120 3.479 4.879 -Ns(Age, knots = ad.kn)2 4.652 4.054 5.338 -Ns(Age, knots = ad.kn)3 15.460 13.575 17.608 -Ns(Age, knots = ad.kn)4 7.529 6.711 8.447 -Ns(DMdur, knots = dd.kn)1 0.520 0.429 0.629 -Ns(DMdur, knots = dd.kn)2 0.707 0.622 0.803 -Ns(DMdur, knots = dd.kn)3 0.319 0.238 0.428 -Ns(DMdur, knots = dd.kn)4 0.829 0.742 0.926 -lex.CstIns 2.168 1.946 2.414 +(Intercept) 0.057 0.049 0.065 +Ns(Age, knots = ad.kn)1 4.101 3.462 4.858 +Ns(Age, knots = ad.kn)2 4.661 4.064 5.346 +Ns(Age, knots = ad.kn)3 15.434 13.548 17.583 +Ns(Age, knots = ad.kn)4 7.509 6.695 8.421 +Ns(DMdur, knots = dd.kn)1 0.466 0.384 0.565 +Ns(DMdur, knots = dd.kn)2 0.642 0.563 0.731 +Ns(DMdur, knots = dd.kn)3 0.229 0.165 0.318 +Ns(DMdur, knots = dd.kn)4 0.796 0.713 0.888 +lex.CstIns 2.168 1.947 2.415 I(Per - 2000) 0.965 0.954 0.977 -sexF 0.665 0.614 0.720 +sexF 0.665 0.614 0.721 \end{Soutput} \end{Schunk} +Incidentally we could have dispensed with the \texttt{to=} argument +too, because the default is to take \texttt{to} to be all absorbing +states in the model. + From the parameter values we would in a simple setting just claim that start of insulin-treatment was associated with a slightly more than doubling of mortality. The model \texttt{All.dead} assumes that the age- and DM-duration -effects on mortality in the ``DM'' and ``Ins'' states are the same, +effects on mortality in the \texttt{DM} and \texttt{Ins} states are the same, and moreover that there is no effect of insulin duration, but merely a mortality that is larger by a multiplicative constant not depending on insulin duration. The model \texttt{DM.Dead} has 8 parameters to @@ -457,9 +534,9 @@ natural spline with $k$ knots gives $k-1$ parameters, and we chose $k=5$ above). -We can compare the fit of the proportional hazards model with the fit -of the separate models for the two mortality rates, by adding up the -deviances and d.f. from these: +We can compare the fit of the simple proportional hazards model with +the fit of the separate models for the two mortality rates, by adding +up the deviances and d.f. from these: \begin{Schunk} \begin{Sinput} > what <- c("null.deviance","df.null","deviance","df.residual") @@ -467,28 +544,28 @@ \end{Sinput} \begin{Soutput} null.deviance df.null deviance df.residual - 19957.95 97038.00 17849.90 97028.00 + 22535.77 188727.00 20412.81 188717.00 \end{Soutput} \begin{Sinput} > ( rI <- unlist( Ins.Dead[what] ) ) \end{Sinput} \begin{Soutput} null.deviance df.null deviance df.residual - 4329.880 18330.000 3674.067 18316.000 + 4867.127 35157.000 4211.735 35143.000 \end{Soutput} \begin{Sinput} > ( rA <- unlist( All.Dead[what] ) ) \end{Sinput} \begin{Soutput} null.deviance df.null deviance df.residual - 24300.15 115369.00 21608.79 115358.00 + 27415.21 223885.00 24705.70 223874.00 \end{Soutput} \begin{Sinput} > round( c( dd <- rA-(rI+rD), "pVal"=1-pchisq(dd[3],dd[4]+1) ), 3 ) \end{Sinput} \begin{Soutput} null.deviance df.null deviance df.residual pVal.deviance - 12.314 1.000 84.822 14.000 0.000 + 12.314 1.000 81.154 14.000 0.000 \end{Soutput} \end{Schunk} Thus we see there is a substantial non-proportionality of mortality @@ -500,10 +577,10 @@ is a difference in the null deviances (and a difference of 1 in the null d.f.) is that the null deviance of \texttt{All.Dead} refer to a model with a single intercept, that is a model with constant and -\emph{identical} mortality rates from the states ``DM'' and ``Ins'', +\emph{identical} mortality rates from the states \texttt{DM} and \texttt{Ins}, whereas the null models for \texttt{DM.Dead} and \texttt{Ins.Dead} have constant but \emph{different} mortality rates from the states -``DM'' and ``Ins''. This is however irrelevant for the comparison of +\texttt{DM} and \texttt{Ins}. This is however irrelevant for the comparison of the \emph{residual} deviances. \subsection{How the mortality rates look} @@ -549,43 +626,28 @@ \end{Schunk} For convenience the \texttt{Epi} package contains a function that computes predicted (log-)rates with c.i. --- it is merely a wrapper for -\texttt{predict.glm}: +\texttt{predict.glm}. + +So we set up the prediction data frame and modify it in loops over +ages at onset and insulin onset in order to collect the predicted +rates in different scenarios: \begin{Schunk} \begin{Sinput} -> ci.pred +> nd <- data.frame( DMdur = as.numeric( dimnames(pr.rates)[[1]] ), ++ lex.Cst = factor( 1, levels=1:4, ++ labels=levels(Si$lex.Cst) ), ++ sex = factor( 1, levels=1:2, labels=c("M","F")) ) \end{Sinput} -\begin{Soutput} -function (obj, newdata, Exp = NULL, alpha = 0.05) -{ - if (!inherits(obj, "glm")) - stop("Not usable for non-glm objects") - zz <- predict(obj, newdata = newdata, se.fit = TRUE, type = "link") - zz <- cbind(zz$fit, zz$se.fit) %*% ci.mat(alpha = alpha) - if (missing(Exp)) { - return(obj$family$linkinv(zz)) - } - else { - if (Exp) { - return(exp(zz)) - } - else if (!Exp) - return(zz) - } -} - -\end{Soutput} \end{Schunk} -So set up the prediction data frame and modify it in loops over -ages at onset and insulin onset. Note that we set \texttt{lex.dur} to -1000 in the prediction frame, so that we obtain rates in units of -events per 1000 PY. +Note that we did \emph{not} insert \texttt{lex.dur} as covariate in +the prediction frame. This would be required if we used the +\texttt{poisson} family with the \texttt{glm}, but the wrapper +\texttt{glm.Lexis} uses the \texttt{poisreg} family, so +\texttt{lex.dur} is ignored and predictions always comes in the +(inverse) units of \texttt{lex.dur}. So we get rates per 1 person-year +in the predictions. \begin{Schunk} \begin{Sinput} -> nd <- data.frame( DMdur = as.numeric( dimnames(pr.rates)[[1]] ), -+ lex.Cst = factor( 1, levels=1:4, -+ labels=levels(Si$lex.Cst) ), -+ sex = factor( 1, levels=1:2, labels=c("M","F")), -+ lex.dur = 1000 ) > for( ia in dimnames(pr.rates)[[2]] ) + { + dnew <- transform( nd, Age = as.numeric(ia)+DMdur, @@ -604,10 +666,11 @@ + } \end{Sinput} \end{Schunk} -So for each age at DM onset we make a plot of the mortality as function -of current age both for those with no insulin treatment at those that -start 1, 3 and 5 years after, thus 4 curves (with c.i.). These curves -are replicated with a different color for the simplified model. +So for each age at DM onset we make a plot of the mortality as +function of current age both for those with no insulin treatment and +those that start insulin treatment 0, 2 and 5 years after diabetes +diagnosis, thus 4 curves (with c.i.). These curves are replicated with +a different color for the simplified model. \begin{Schunk} \begin{Sinput} > par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1 ) @@ -615,11 +678,11 @@ + log="y", xlab="Age", ylab="Mortality rate per 1000 PY" ) > abline( v=seq(40,80,5), h=outer(1:9,10^(0:2),"*"), col=gray(0.8) ) > for( aa in 4:7*10 ) for( ii in 1:4 ) -+ matlines( aa+as.numeric(dimnames(pr.rates)[[1]]), -+ cbind( pr.rates[,paste(aa),ii,"DM/Ins",], -+ pr.rates[,paste(aa),ii,"All" ,] ), -+ type="l", lty=1, lwd=c(3,1,1), -+ col=rep(c("red","limegreen"),each=3) ) ++ matshade( aa+as.numeric(dimnames(pr.rates)[[1]]), ++ cbind( pr.rates[,paste(aa),ii,"DM/Ins",], ++ pr.rates[,paste(aa),ii,"All" ,] )*1000, ++ type="l", lty=1, lwd=2, ++ col=c("red","limegreen") ) \end{Sinput} \end{Schunk} \insfig{mort-int}{0.9}{Estimated mortality rates for male diabetes @@ -661,6 +724,10 @@ \section{Input to the \texttt{simLexis} function} +We want to estimate the cumulative probability of being in each of the +4 states, so that we can assess the fraction of diabetes pateints that +go on insulin + In order to simulate from the multistate model with the estimated transition rates, and get the follow-up of a hypothetical cohort, we must supply \emph{both} the transition rates and the structure of the @@ -688,60 +755,13 @@ \subsection{The initial cohort} We now define an initial \texttt{Lexis} object of persons with all -relevant covariates defined. Note that we use \texttt{subset} to get a -\texttt{Lexis} object, this conserves the \texttt{time.scale} and -\texttt{time.since} attributes which is needed for the simulation (the -usual ``\texttt{[}'' operator does not preserve these attributes when -you select columns): +relevant covariates defined. Note that we use \texttt{NULL} as row +indicator in the \texttt{Lexis} object we used for modeling; this +conserves the \texttt{time.scale} and \texttt{time.since} attributes +which are needed for the simulation: \begin{Schunk} \begin{Sinput} -> str( Si[NULL,1:9] ) -\end{Sinput} -\begin{Soutput} -Classes ‘Lexis’ and 'data.frame': 0 obs. of 9 variables: - $ lex.id : int - $ Per : num - $ Age : num - $ DMdur : num - $ t.Ins : num - $ lex.dur: num - $ lex.Cst: Factor w/ 4 levels "DM","Ins","Dead",..: - $ lex.Xst: Factor w/ 4 levels "DM","Ins","Dead",..: - $ sex : Factor w/ 2 levels "M","F": - - attr(*, "time.scales")= chr "Per" "Age" "DMdur" "t.Ins" - - attr(*, "time.since")= chr "" "" "" "Ins" - - attr(*, "breaks")=List of 4 - ..$ Per : NULL - ..$ Age : NULL - ..$ DMdur: num 0 0.5 1 1.5 2 2.5 3 3.5 4 4.5 ... - ..$ t.Ins: NULL -\end{Soutput} -\begin{Sinput} -> ini <- subset(Si,FALSE,select=1:9) -> str( ini ) -\end{Sinput} -\begin{Soutput} -Classes ‘Lexis’ and 'data.frame': 0 obs. of 9 variables: - $ lex.id : int - $ Per : num - $ Age : num - $ DMdur : num - $ t.Ins : num - $ lex.dur: num - $ lex.Cst: Factor w/ 4 levels "DM","Ins","Dead",..: - $ lex.Xst: Factor w/ 4 levels "DM","Ins","Dead",..: - $ sex : Factor w/ 2 levels "M","F": - - attr(*, "time.scales")= chr "Per" "Age" "DMdur" "t.Ins" - - attr(*, "time.since")= chr "" "" "" "Ins" - - attr(*, "breaks")=List of 4 - ..$ Per : NULL - ..$ Age : NULL - ..$ DMdur: num 0 0.5 1 1.5 2 2.5 3 3.5 4 4.5 ... - ..$ t.Ins: NULL -\end{Soutput} -\begin{Sinput} -> ini <- subset(Si,select=1:9)[NULL,] -> str( ini ) +> str( ini <- Si[NULL,1:9] ) \end{Sinput} \begin{Soutput} Classes ‘Lexis’ and 'data.frame': 0 obs. of 9 variables: @@ -759,15 +779,15 @@ - attr(*, "breaks")=List of 4 ..$ Per : NULL ..$ Age : NULL - ..$ DMdur: num 0 0.5 1 1.5 2 2.5 3 3.5 4 4.5 ... + ..$ DMdur: num 0 0.25 0.5 0.75 1 1.25 1.5 1.75 2 2.25 ... ..$ t.Ins: NULL \end{Soutput} \end{Schunk} We now have an empty \texttt{Lexis} object with attributes reflecting -the timescales in multistate model we want to simulate, so we must now -enter some data to represent the persons whose follow-up we want to -simulate through the model; we set up an initial dataset with one man -and one woman: +the timescales in the multistate model we want to simulate from. But +we must enter some data to represent the initial state of the persons +whose follow-up we want to simulate through the model; so fill in data +for one man and one woman: \begin{Schunk} \begin{Sinput} > ini[1:2,"lex.id"] <- 1:2 @@ -784,6 +804,10 @@ 2 2 1995 60 5 NA NA DM F \end{Soutput} \end{Schunk} +So the persons starts in age 60 in 1995 with 5 years of diabetes +duration. Note that the \texttt{t.Ins} is \texttt{NA}, because this is +a timescale that first comes alive if a transtion to \texttt{Ins} is +simulated. \section{Simulation of the follow-up} @@ -807,7 +831,7 @@ \end{Sinput} \begin{Soutput} user system elapsed - 19.076 2.634 20.723 + 17.598 1.289 17.857 \end{Soutput} \end{Schunk} The result is a \texttt{Lexis} object --- a data frame representing @@ -824,18 +848,18 @@ Transitions: To From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: - DM 1431 2059 1510 0 5000 3569 36693.61 5000 - Ins 0 1410 0 649 2059 649 10931.47 2059 - Sum 1431 3469 1510 649 7059 4218 47625.08 5000 + DM 1438 2048 1514 0 5000 3562 36620.95 5000 + Ins 0 1363 0 685 2048 685 10935.47 2048 + Sum 1438 3411 1514 685 7048 4247 47556.41 5000 $F Transitions: To From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: - DM 2171 1697 1132 0 5000 2829 42283.74 5000 - Ins 0 1360 0 337 1697 337 9541.50 1697 - Sum 2171 3057 1132 337 6697 3166 51825.24 5000 + DM 2186 1689 1125 0 5000 2814 42181.63 5000 + Ins 0 1321 0 368 1689 368 9535.20 1689 + Sum 2186 3010 1125 368 6689 3182 51716.83 5000 \end{Soutput} \end{Schunk} @@ -845,8 +869,8 @@ We fitted a proportional mortality model \texttt{All.Dead} (which fitted worse than the other two), this is a model for \emph{both} the -transition from ``DM'' to ``Death'' \emph{and} from ``Ins'' to -``Dead(Ins)'', assuming that they are proportional. But it can easily +transition from \texttt{DM} to \texttt{Death} \emph{and} from \texttt{Ins} to +\texttt{Dead(Ins)}, assuming that they are proportional. But it can easily be used in the simulation set-up, because the state is embedded in the model via the term \texttt{lex.Cst}, which is updated during the simulation. @@ -864,7 +888,7 @@ \end{Sinput} \begin{Soutput} user system elapsed - 18.898 3.572 21.412 + 16.996 0.776 16.802 \end{Soutput} \begin{Sinput} > summary( simP, by="sex" ) @@ -875,18 +899,18 @@ Transitions: To From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: - DM 1637 2076 1287 0 5000 3363 37695.03 5000 - Ins 0 1209 0 867 2076 867 10129.68 2076 - Sum 1637 3285 1287 867 7076 4230 47824.71 5000 + DM 1633 1984 1383 0 5000 3367 37674.18 5000 + Ins 0 1135 0 849 1984 849 9807.38 1984 + Sum 1633 3119 1383 849 6984 4216 47481.56 5000 $F Transitions: To From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: - DM 2274 1733 993 0 5000 2726 42968.05 5000 - Ins 0 1201 0 532 1733 532 8624.01 1733 - Sum 2274 2934 993 532 6733 3258 51592.05 5000 + DM 2285 1695 1020 0 5000 2715 42892.52 5000 + Ins 0 1191 0 504 1695 504 8833.70 1695 + Sum 2285 2886 1020 504 6695 3219 51726.21 5000 \end{Soutput} \end{Schunk} @@ -909,32 +933,14 @@ \end{Sinput} \begin{Soutput} exp(Est.) 2.5% 97.5% -Ns(Age - DMdur, knots = ad.kn)1 4.172 3.535 4.924 -Ns(Age - DMdur, knots = ad.kn)2 4.503 3.825 5.301 +Ns(Age - DMdur, knots = ad.kn)1 4.172 3.535 4.923 +Ns(Age - DMdur, knots = ad.kn)2 4.502 3.824 5.301 Ns(Age - DMdur, knots = ad.kn)3 16.077 14.087 18.348 -Ns(Age - DMdur, knots = ad.kn)4 7.479 6.500 8.605 -I(lex.Cst == "Ins")TRUE 2.171 1.949 2.419 +Ns(Age - DMdur, knots = ad.kn)4 7.479 6.501 8.605 +I(lex.Cst == "Ins")TRUE 2.170 1.948 2.418 I(Per - 2000) 0.965 0.954 0.977 sexF 0.667 0.616 0.723 \end{Soutput} -\begin{Sinput} -> round( ci.exp( All.Dead ), 3 ) -\end{Sinput} -\begin{Soutput} - exp(Est.) 2.5% 97.5% -(Intercept) 0.049 0.043 0.056 -Ns(Age, knots = ad.kn)1 4.120 3.479 4.879 -Ns(Age, knots = ad.kn)2 4.652 4.054 5.338 -Ns(Age, knots = ad.kn)3 15.460 13.575 17.608 -Ns(Age, knots = ad.kn)4 7.529 6.711 8.447 -Ns(DMdur, knots = dd.kn)1 0.520 0.429 0.629 -Ns(DMdur, knots = dd.kn)2 0.707 0.622 0.803 -Ns(DMdur, knots = dd.kn)3 0.319 0.238 0.428 -Ns(DMdur, knots = dd.kn)4 0.829 0.742 0.926 -lex.CstIns 2.168 1.946 2.414 -I(Per - 2000) 0.965 0.954 0.977 -sexF 0.665 0.614 0.720 -\end{Soutput} \end{Schunk} Note that in order for this model to be usable for simulation, it is necessary that we use the components of the \texttt{Lexis} object to @@ -965,7 +971,7 @@ \end{Sinput} \begin{Soutput} user system elapsed - 20.321 3.608 22.891 + 18.346 0.953 18.291 \end{Soutput} \begin{Sinput} > summary( simC, by="sex" ) @@ -976,18 +982,18 @@ Transitions: To From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: - DM 1671 2121 1208 0 5000 3329 37403.75 5000 - Ins 0 1419 0 702 2121 702 10384.18 2121 - Sum 1671 3540 1208 702 7121 4031 47787.93 5000 + DM 1714 2039 1247 0 5000 3286 37280.04 5000 + Ins 0 1348 0 691 2039 691 9990.38 2039 + Sum 1714 3387 1247 691 7039 3977 47270.42 5000 $F Transitions: To From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: - DM 2272 1807 921 0 5000 2728 42185.28 5000 - Ins 0 1418 0 389 1807 389 9439.83 1807 - Sum 2272 3225 921 389 6807 3117 51625.11 5000 + DM 2332 1702 966 0 5000 2668 42168.20 5000 + Ins 0 1313 0 389 1702 389 9083.14 1702 + Sum 2332 3015 966 389 6702 3057 51251.34 5000 \end{Soutput} \end{Schunk} @@ -1005,7 +1011,7 @@ \end{Sinput} \begin{Soutput} user system elapsed - 0.747 0.000 0.746 + 0.717 0.001 0.717 \end{Soutput} \begin{Sinput} > nSt[1:10,] @@ -1014,15 +1020,15 @@ State when DM Ins Dead Dead(Ins) 1995 5000 0 0 0 - 1995.2 4933 38 29 0 - 1995.4 4849 87 62 2 - 1995.6 4777 128 89 6 - 1995.8 4709 169 116 6 - 1996 4632 212 148 8 - 1996.2 4549 258 183 10 - 1996.4 4480 292 216 12 - 1996.6 4411 331 240 18 - 1996.8 4340 360 277 23 + 1995.2 4932 38 29 1 + 1995.4 4843 93 62 2 + 1995.6 4773 137 87 3 + 1995.8 4702 179 116 3 + 1996 4621 226 148 5 + 1996.2 4547 269 179 5 + 1996.4 4469 309 212 10 + 1996.6 4407 342 237 14 + 1996.8 4322 386 275 17 \end{Soutput} \end{Schunk} We see that as time goes by, the 5000 men slowly move away from the @@ -1042,11 +1048,11 @@ State when DM Ins Dead(Ins) Dead 1995 1.0000 1.0000 1.0000 1 - 1995.2 0.9866 0.9942 0.9942 1 - 1995.4 0.9698 0.9872 0.9876 1 - 1995.6 0.9554 0.9810 0.9822 1 - 1995.8 0.9418 0.9756 0.9768 1 - 1996 0.9264 0.9688 0.9704 1 + 1995.2 0.9864 0.9940 0.9942 1 + 1995.4 0.9686 0.9872 0.9876 1 + 1995.6 0.9546 0.9820 0.9826 1 + 1995.8 0.9404 0.9762 0.9768 1 + 1996 0.9242 0.9694 0.9704 1 \end{Soutput} \begin{Sinput} > par( mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 ) @@ -1072,7 +1078,7 @@ > clx <- c( clr, rgb( t( col2rgb( clr[2:1] )*2 + rep(255,3) ) / 3, max=255 ) ) > par( mfrow=c(1,2), las=1, mar=c(3,3,4,2), mgp=c(3,1,0)/1.6 ) > # Men -> plot( pM, col=clx ) +> plot( pM, col=clx, xlab="Date of FU" ) > lines( as.numeric(rownames(pM)), pM[,2], lwd=3 ) > mtext( "60 year old male, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) > mtext( "Survival curve", side=3, line=1.5, adj=0 ) @@ -1087,12 +1093,12 @@ + from=1995, + time.scale="Per" ), + perm=c(1,2,4,3) ) -> plot( pF, col=clx ) +> plot( pF, col=clx, xlab="Date of FU" ) > lines( as.numeric(rownames(pF)), pF[,2], lwd=3 ) > mtext( "60 year old female, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) > mtext( "Survival curve", side=3, line=1.5, adj=0 ) -> mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[1] ) -> mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[2] ) +> mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +> mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) > axis( side=4 ) > axis( side=4, at=1:19/20, labels=FALSE ) > axis( side=4, at=1:99/100, labels=FALSE, tcl=-0.3 ) @@ -1134,8 +1140,8 @@ > lines( as.numeric(rownames(pF)), pF[,2], lwd=3 ) > mtext( "60 year old female, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) > mtext( "Survival curve", side=3, line=1.5, adj=0 ) -> mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[1] ) -> mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[2] ) +> mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +> mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) > axis( side=4 ) > axis( side=4, at=1:9/10, labels=FALSE ) > axis( side=4, at=1:19/20, labels=FALSE, tcl=-0.4 ) @@ -1183,12 +1189,12 @@ > lines( PrM, border="blue" , col="transparent", lwd=3 ) > lines( CoxM, border="red" , col="transparent", lwd=3 ) > text( 60.5, 0.05, "M" ) -> box( lwd=3 ) +> box( lwd=5, col="white" ) ; box( lwd=2, col="black" ) > plot( pF, border="black", col="transparent", lwd=3 ) > lines( PrF, border="blue" , col="transparent", lwd=3 ) > lines( CoxF, border="red" , col="transparent", lwd=3 ) > text( 60.5, 0.05, "F" ) -> box( lwd=3 ) +> box( lwd=5, col="white" ) ; box( lwd=2, col="black" ) \end{Sinput} \end{Schunk} \insfig{comp-0}{1.0}{Comparison of the simulated state occupancy @@ -1339,11 +1345,11 @@ length as the \texttt{time.scales} attribute, with value equal to a state name if the corresponding time scale is defined as time since entry into that state. In this example the 4th timescale is time since -entry into the ``Ins'' state, and hence: +entry into the \texttt{Ins} state, and hence: \begin{Schunk} \begin{Sinput} > cbind( -+ attr( ini, "time.scale" ), ++ attr( ini, "time.scales" ), + attr( ini, "time.since" ) ) \end{Sinput} \begin{Soutput} @@ -1437,7 +1443,7 @@ attr(sf, "time.since") <- tF chop.lex(sf, tS, max(time.pts)) } - + \end{Soutput} \end{Schunk} @@ -1474,12 +1480,13 @@ prfrm[, tS] <- prfrm[, tS] + rep(time.pts, nr) prfrm$lex.dur <- il <- min(diff(time.pts)) prfrp <- prfrm + prfrp[, "lex.dur"] <- 1 prfrp[, tS] <- prfrp[, tS] + il/2 rt <- data.frame(lex.id = prfrm$lex.id) for (i in 1:length(Tr[[cst]])) { if (inherits(Tr[[cst]][[i]], "glm")) rt <- cbind(rt, predict(Tr[[cst]][[i]], type = "response", - newdata = prfrp)) + newdata = prfrp) * il) else if (inherits(Tr[[cst]][[i]], "coxph")) rt <- cbind(rt, predict(Tr[[cst]][[i]], type = "expected", newdata = prfrm)) @@ -1497,7 +1504,7 @@ merge(nd, do.call(rbind, lapply(split(rt, rt$lex.id), sim1, time.pts)), by = "lex.id") } - + \end{Soutput} \end{Schunk} @@ -1527,7 +1534,7 @@ colnames(ci)[tt == min(tt)] else NA)) } - + \end{Soutput} \end{Schunk} @@ -1549,7 +1556,7 @@ function (ci, tt, u) { if (any(diff(ci) < 0) | any(diff(tt) < 0)) - stop("Non-icreasing arguments") + stop("Non-increasing arguments") c.u <- min(c(ci[ci > u], max(ci))) c.l <- max(c(ci[ci < u], min(ci))) t.u <- min(c(tt[ci > u], max(tt))) @@ -1557,7 +1564,7 @@ ifelse(c.u == c.l, t.l, t.l + (u - c.l)/(c.u - c.l) * (t.u - t.l)) } - + \end{Soutput} \end{Schunk} @@ -1588,7 +1595,7 @@ nxt$lex.Cst <- nxt$lex.Xst return(nxt) } - + \end{Soutput} \end{Schunk} @@ -1615,7 +1622,7 @@ ww$lex.dur <- pmin(x.dur, ww$lex.dur) ww } - + \end{Soutput} \end{Schunk} @@ -1656,7 +1663,7 @@ tab.ab$State <- tab.ab$lex.Xst with(rbind(tab.ab, tab.tr), table(when, State)) } - + \end{Soutput} \end{Schunk} @@ -1679,6 +1686,7 @@ class(tt) <- c("pState", "matrix") tt } + \end{Soutput} \end{Schunk} @@ -1696,6 +1704,7 @@ yaxs = "i", xaxs = "i", xlab = xlab, ylab = ylab, ...) lines.pState(x, col = col, border = border, ...) } + \end{Soutput} \begin{Sinput} @@ -1715,7 +1724,7 @@ border = border[i - 1], ...) } } - + \end{Soutput} \end{Schunk} Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/sL.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/sL.pdf differ diff -Nru r-cran-epi-2.32/vignettes/sL.tex r-cran-epi-2.37/vignettes/sL.tex --- r-cran-epi-2.32/vignettes/sL.tex 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-epi-2.37/vignettes/sL.tex 2019-05-23 08:17:03.000000000 +0000 @@ -0,0 +1,1859 @@ + +%\VignetteIndexEntry{Simulation of multistate models with multiple timescales: simLexis} +\documentclass[a4paper,dvipsnames,twoside,12pt]{report} + +\newcommand{\Title}{Simulation of\\ multistate models with\\ multiple + timescales:\\ \texttt{simLexis} in the \texttt{Epi} package} +\newcommand{\Tit}{Multistate models with multiple timescales} +\newcommand{\Version}{Version 2.5} +\newcommand{\Dates}{\today} +\newcommand{\Where}{SDCC} +\newcommand{\Homepage}{\url{http://BendixCarstensen.com/Epi/simLexis.pdf}} +\newcommand{\Faculty}{\begin{tabular}{rl} +Bendix Carstensen + & Steno Diabetes Center Copenhagen, Gentofte, Denmark\\ + & {\small \& Department of Biostatistics, + University of Copenhagen} \\ + & \texttt{b@bxc.dk}\\ + & \url{http://BendixCarstensen.com} \\[1em] + \end{tabular}} + +\input{topreport} +\renewcommand{\rwpre}{./simLexis} + +\chapter{Using \texttt{simLexis}} + +\section{Introduction} + +This vignette explains the machinery behind simulation of life +histories through multistate models implemented in +\texttt{simLexis}. In \texttt{simLexis} transition rates are allowed +to depend on multiple time scales, including timescales defined as +time since entry to a particular state (duration). This therefore also +covers the case where time \emph{at} entry into a state is an +explanatory variable for the rates, since time at entry is merely time +minus duration. Thus, the set-up here goes beyond Markov- and +semi-Markov-models, and brings simulation based estimation of +state-occupancy probabilities into the realm of realistic multistate +models. + +The basic idea is to simulate a new \texttt{Lexis} object +\cite{Plummer.2011,Carstensen.2011a} as defined in the \texttt{Epi} +package for \R, based on 1) a multistate model defined by its states +and the transition rates between them and 2) an initial population of +individuals. + +Thus the output will be a \texttt{Lexis} object describing the +transitions of a predefined set of persons through a multistate +model. Therefore, if persons are defined to be identical at start, +then calculation of the probability of being in a particular state at +a given time boils down to a simple enumeration of the fraction of the +persons in the particular state at the given time. Bar of course the +(binomial) simulation error, but this can be brought down by +simulation a sufficiently large number of persons. + +An observed \texttt{Lexis} object with follow-up of persons through a +number of states will normally be the basis for estimation of +transition rates between states, and thus will contain all information +about covariates determining the occurrence rates, in particular the +\emph{timescales} \cite{Iacobelli.2013}. Hence, the natural input to +simulation from an estimated multistate model will typically be an +object of the same structure as the originally observed. Since +transitions and times are what is simulated, any values of +\texttt{lex.Xst} and \texttt{lex.dur} in the input object will of +course be ignored. + +This first chapter of this vignette shows by an example how to use the +function \texttt{simLexis} and display the results. The second +chapter discusses in more detail how the simulation machinery is +implemented and is not needed for the practical use of \texttt{simLexis}. + +\section{\texttt{simLexis} in practice} + +This section is largely a commented walk-trough of the example from +the help-page of \texttt{simLexis}, with a larger number of simulated +persons in order to minimize the pure simulation variation. + +When we want to simulate transition times through a multistate model +where transition rates may depend on time since entry to the current +or a previous state, it is essential that we have a machinery to keep +track of the transition time on \emph{all} time scales, as well as a +mechanism that can initiate a new time scale to 0 when a transition +occurs to a state where we shall use time since entry as determinant +of exit rates from that state. This is provided by \texttt{simLexis}. + +\subsection{Input for the simulation} + +Input for simulation of a single trajectory through a multistate model +requires a representation of the \emph{current status} of a person; +the starting conditions. The object that we supply to the simulation +function must contain information about all covariates and all +timescales upon which transitions depend, and in particular which +one(s) of the timescales that are defined as time since entry into a +particular state. Hence, starting conditions should be represented as +a \texttt{Lexis} object (where \texttt{lex.dur} and \texttt{lex.Xst} +are ignored, since there is no follow-up yet), where the time scale +information is in the attributes \texttt{time.scales} and +\texttt{time.since} respectively. + +Note that \texttt{time.scales} attribute is a vector of names of +variables in the \texttt{Lexis} object, so all of these variables +should be present even if they are not used in the models for the +transitions, and they should be set to 0; if they are not in the +initial dataset, \texttt{simLexis} will crash, if they are +\texttt{NA}, the \texttt{simLexis} will produce an object with 0 rows. + +Thus there are two main arguments to a function to simulate from a +multistate model: +\begin{enumerate} +\item A \texttt{Lexis} object representing the initial states and + covariates of the population to be simulated. This has to have the + same structure as the original \texttt{Lexis} object representing + the multistate model from which transition rates in the model were + estimated. As noted above, the values for \texttt{lex.Xst} and + \texttt{lex.dur} are not required (since these are the quantities + that will be simulated). +\item A transition object, representing the transition intensities + between states, which should be a list of lists of intensity + representations. As an intensity representation we mean a function + that for a given \texttt{Lexis} object can be used to produce + estimates of the transition intensities at a set of supplied time + points. + + The names of the elements of the transition object (which are lists) + will be names of the \emph{transient} states, that is the states + \emph{from} which a transition can occur. The names of the elements + of each of these lists are the names of states \emph{to} which + transitions can occur (which may be either transient or absorbing + states). + + Hence, if the transition object is called \texttt{Tr} then + \verb+TR$A$B+ (or \verb+Tr[["A"]][["B"]]+) will represent the + transition intensity from state \texttt{A} to the state \texttt{B}. + + The entries in the transition object can be either \texttt{glm} + objects (either with \texttt{poisson} or \texttt{poisreg} family), + representing Poisson models for the transitions, \texttt{coxph} + objects representing an intensity model along one time scale, or + simply a function that takes a \texttt{Lexis} object as input and + returns an estimated intensity for each row. + +\end{enumerate} + +In addition to these two input items, there will be a couple of tuning +parameters. + +The output of the function will simply be a \texttt{Lexis} object with +simulated transitions between states. This will be the basis for +deriving sensible statistics from the \texttt{Lexis} object --- see +next section. + +\section{Setting up a \texttt{Lexis} object} + +As an example we will use the \texttt{DMlate} dataset from the +\texttt{Epi} package; it is a dataset simulated to resemble a random +sample of 10,000 patients from the Danish National Diabetes Register. + +We start by loading the \texttt{Epi} package: +\begin{Schunk} +\begin{Sinput} +> options( width=90 ) +> library( Epi ) +> print( sessionInfo(), l=F ) +\end{Sinput} +\begin{Soutput} +R version 3.6.0 (2019-04-26) +Platform: x86_64-pc-linux-gnu (64-bit) +Running under: Ubuntu 14.04.6 LTS + +Matrix products: default +BLAS: /usr/lib/openblas-base/libopenblas.so.0 +LAPACK: /usr/lib/lapack/liblapack.so.3.0 + +attached base packages: +[1] utils datasets graphics grDevices stats methods base + +other attached packages: +[1] Epi_2.37 + +loaded via a namespace (and not attached): + [1] Rcpp_1.0.0 lattice_0.20-38 zoo_1.8-4 MASS_7.3-51.1 + [5] grid_3.6.0 plyr_1.8.4 nlme_3.1-139 etm_1.0.4 + [9] data.table_1.12.0 Matrix_1.2-17 splines_3.6.0 tools_3.6.0 +[13] cmprsk_2.2-7 numDeriv_2016.8-1 survival_2.44-1.1 parallel_3.6.0 +[17] compiler_3.6.0 mgcv_1.8-28 +\end{Soutput} +\end{Schunk} +First we load the diabetes data and set up a simple illness-death +model: +\begin{Schunk} +\begin{Sinput} +> data(DMlate) +> dml <- Lexis( entry = list(Per=dodm, Age=dodm-dobth, DMdur=0 ), ++ exit = list(Per=dox), ++ exit.status = factor(!is.na(dodth),labels=c("DM","Dead")), ++ data = DMlate ) +\end{Sinput} +\begin{Soutput} +NOTE: entry.status has been set to "DM" for all. +NOTE: Dropping 4 rows with duration of follow up < tol +\end{Soutput} +\end{Schunk} +This is just data for a simple survival model with states \texttt{DM} and +\texttt{Dead}. Now we cut the follow-up at insulin start, which for the +majority of patients (T2D) is a clinical indicator of deterioration of +disease regulation. We therefore also introduce a new timescale, and +split the non-precursor states, so that we can address the question of +ever having been on insulin: +\begin{Schunk} +\begin{Sinput} +> dmi <- cutLexis( dml, cut = dml$doins, ++ pre = "DM", ++ new.state = "Ins", ++ new.scale = "t.Ins", ++ split.states = TRUE ) +> summary( dmi, timeScales=T ) +\end{Sinput} +\begin{Soutput} +Transitions: + To +From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: + DM 6157 1694 2048 0 9899 3742 45885.49 9899 + Ins 0 1340 0 451 1791 451 8387.77 1791 + Sum 6157 3034 2048 451 11690 4193 54273.27 9996 + +Timescales: + Per Age DMdur t.Ins + "" "" "" "Ins" +\end{Soutput} +\end{Schunk} +Note that we show the time scales in the \texttt{Lexis} object, and +that it is indicated that the time scale \texttt{t.Ins} is defined as +time since entry into stat state \texttt{Ins.} + +We can show how many person-years we have and show the number of +transitions and transition rates (per 1000), using the +\texttt{boxes.Lexis} function to display the states and the number of +transitions between them: +\begin{Schunk} +\begin{Sinput} +> boxes( dmi, boxpos = list(x=c(20,20,80,80), ++ y=c(80,20,80,20)), ++ scale.R = 1000, show.BE = TRUE ) +\end{Sinput} +\end{Schunk} +\insfig{boxes}{0.8}{Data overview for the \textrm{\tt dmi} + dataset. Numbers in the boxes are person-years and the number of + persons who begin, resp. end their follow-up in each state, and + numbers on the arrows are no. of transitions and rates (transition + intensities) per 1000 PY.} + +\section{Analysis of rates} + +In the \texttt{Lexis} object (which is just a data frame) each person +is represented by one record for each transient state occupied, +thus in this case either 1 or 2 records; those who have a recorded +time both without and with insulin have two records. + +In order to be able to fit Poisson models with occurrence rates varying +by the different time-scales, we split the follow-up in 3-month intervals +for modeling: +\begin{Schunk} +\begin{Sinput} +> Si <- splitLexis( dmi, seq(0,20,1/4), "DMdur" ) +> summary( Si ) +\end{Sinput} +\begin{Soutput} +Transitions: + To +From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: + DM 184986 1694 2048 0 188728 3742 45885.49 9899 + Ins 0 34707 0 451 35158 451 8387.77 1791 + Sum 184986 36401 2048 451 223886 4193 54273.27 9996 +\end{Soutput} +\begin{Sinput} +> print( subset( Si, lex.id==97 )[,1:10], digits=6 ) +\end{Sinput} +\begin{Soutput} + lex.id Per Age DMdur t.Ins lex.dur lex.Cst lex.Xst sex dobth +2142 97 1997.55 58.9268 0.00000 NA 0.2500000 DM DM F 1938.62 +2143 97 1997.80 59.1768 0.25000 NA 0.2500000 DM DM F 1938.62 +2144 97 1998.05 59.4268 0.50000 NA 0.2500000 DM DM F 1938.62 +2145 97 1998.30 59.6768 0.75000 NA 0.2500000 DM DM F 1938.62 +2146 97 1998.55 59.9268 1.00000 NA 0.2500000 DM DM F 1938.62 +2147 97 1998.80 60.1768 1.25000 NA 0.2500000 DM DM F 1938.62 +2148 97 1999.05 60.4268 1.50000 NA 0.2500000 DM DM F 1938.62 +2149 97 1999.30 60.6768 1.75000 NA 0.2500000 DM DM F 1938.62 +2150 97 1999.55 60.9268 2.00000 NA 0.1793292 DM Ins F 1938.62 +2151 97 1999.72 61.1061 2.17933 0.0000000 0.0706708 Ins Ins F 1938.62 +2152 97 1999.80 61.1768 2.25000 0.0706708 0.2500000 Ins Ins F 1938.62 +2153 97 2000.05 61.4268 2.50000 0.3206708 0.2500000 Ins Ins F 1938.62 +2154 97 2000.30 61.6768 2.75000 0.5706708 0.2500000 Ins Ins F 1938.62 +2155 97 2000.55 61.9268 3.00000 0.8206708 0.0116359 Ins Dead(Ins) F 1938.62 +\end{Soutput} +\end{Schunk} +Note that when we split the follow-up, each person's follow up now +consists of many records, each with the \emph{current} values of the +timescales at the start of the interval represented by the record. In +the modeling we shall assume that the rates are constant within each +6-month interval, but the \emph{size} of these rates we model as +smooth functions of the time scales (that is the values at the +beginning of each interval). + +The approach often used in epidemiology where one parameter is +attached to each interval of time (or age) is not feasible when more +than one time scale is used, because intervals are not classified the +same way on all timescales. + +We shall use natural splines (restricted cubic splines) for the +analysis of rates, and hence we must allocate knots for the +splines. This is done for each of the time-scales, and separately for +the transition out of states \texttt{DM} and \texttt{Ins}. For age, we +place the knots so that the number of events is the same between each +pair of knots, but only half of this beyond each of the boundary +knots, whereas for the timescales \texttt{DMdur} and \texttt{tIns} +where we have observation from a well-defined 0, we put knots at 0 and +place the remaining knots so that the number of events is the same +between each pair of knots as well as outside the boundary knots. +\begin{Schunk} +\begin{Sinput} +> nk <- 5 +> ( ai.kn <- with( subset(Si,lex.Xst=="Ins" & lex.Cst!=lex.Xst ), ++ quantile( Age+lex.dur , probs=(1:nk-0.5)/nk ) ) ) +\end{Sinput} +\begin{Soutput} + 10% 30% 50% 70% 90% +23.23751 48.82218 58.63244 67.79028 78.88542 +\end{Soutput} +\begin{Sinput} +> ( ad.kn <- with( subset(Si,lex.Xst=="Dead"), ++ quantile( Age+lex.dur , probs=(1:nk-0.5)/nk ) ) ) +\end{Sinput} +\begin{Soutput} + 10% 30% 50% 70% 90% +61.91951 72.52731 78.43121 83.32348 90.15195 +\end{Soutput} +\begin{Sinput} +> ( di.kn <- with( subset(Si,lex.Xst=="Ins" & lex.Cst!=lex.Xst ), ++ c(0,quantile( DMdur+lex.dur, probs=(1:(nk-1))/nk ) )) ) +\end{Sinput} +\begin{Soutput} + 20% 40% 60% 80% +0.00000000 0.06570842 0.45448323 3.28761123 6.63764545 +\end{Soutput} +\begin{Sinput} +> ( dd.kn <- with( subset(Si,lex.Xst=="Dead"), ++ c(0,quantile( DMdur+lex.dur, probs=(1:(nk-1))/nk ) )) ) +\end{Sinput} +\begin{Soutput} + 20% 40% 60% 80% +0.0000000 0.7687885 2.1327858 4.0465435 6.5232033 +\end{Soutput} +\begin{Sinput} +> ( ti.kn <- with( subset(Si,lex.Xst=="Dead(Ins)"), ++ c(0,quantile( t.Ins+lex.dur, probs=(1:(nk-1))/nk ) )) ) +\end{Sinput} +\begin{Soutput} + 20% 40% 60% 80% +0.0000000 0.3093771 1.1307324 2.5489391 4.9117043 +\end{Soutput} +\end{Schunk} +Note that when we tease out the event records for transition to +\emph{transient} states (in this case \texttt{Ins}, that is +\verb|lex.Xst=="Ins"|), we should add \verb|lex.Cst!=lex.Xst|, to +include only transition records and avoiding including records of +sojourn time in the transient state. + +We then fit Poisson models to transition rates, using the wrapper +\texttt{Ns} from the \texttt{Epi} package to simplify the +specification of the rates: +\begin{Schunk} +\begin{Sinput} +> library( splines ) +> DM.Ins <- glm( (lex.Xst=="Ins") ~ Ns( Age , knots=ai.kn ) + ++ Ns( DMdur, knots=di.kn ) + ++ I(Per-2000) + sex, ++ family=poisson, offset=log(lex.dur), ++ data = subset(Si,lex.Cst=="DM") ) +> ci.exp( DM.Ins ) +\end{Sinput} +\begin{Soutput} + exp(Est.) 2.5% 97.5% +(Intercept) 1.37516629 1.21414923 1.55753699 +Ns(Age, knots = ai.kn)1 0.23411761 0.19185266 0.28569349 +Ns(Age, knots = ai.kn)2 0.23166177 0.19576557 0.27414000 +Ns(Age, knots = ai.kn)3 0.02835009 0.02284379 0.03518363 +Ns(Age, knots = ai.kn)4 0.38067392 0.32945713 0.43985278 +Ns(DMdur, knots = di.kn)1 0.04462626 0.03373451 0.05903459 +Ns(DMdur, knots = di.kn)2 0.22388988 0.19028663 0.26342723 +Ns(DMdur, knots = di.kn)3 0.03379141 0.02574708 0.04434907 +Ns(DMdur, knots = di.kn)4 0.47100646 0.40783317 0.54396529 +I(Per - 2000) 0.97381513 0.96082049 0.98698552 +sexF 0.73757407 0.66886782 0.81333785 +\end{Soutput} +\begin{Sinput} +> class( DM.Ins ) +\end{Sinput} +\begin{Soutput} +[1] "glm" "lm" +\end{Soutput} +\end{Schunk} +We can also fit this model with a slightly simpler syntax using the +\texttt{glm.Lexis} function: +\begin{Schunk} +\begin{Sinput} +> DM.Ins <- glm.Lexis( Si, from = "DM", to = "Ins", ++ formula = ~ Ns( Age , knots=ai.kn ) + ++ Ns( DMdur, knots=di.kn ) + ++ I(Per-2000) + sex ) +\end{Sinput} +\begin{Soutput} +stats::glm Poisson analysis of Lexis object Si with log link: +Rates for the transition: DM->Ins +\end{Soutput} +\begin{Sinput} +> ci.exp( DM.Ins ) +\end{Sinput} +\begin{Soutput} + exp(Est.) 2.5% 97.5% +(Intercept) 1.37516630 1.21415038 1.55753552 +Ns(Age, knots = ai.kn)1 0.23411761 0.19185214 0.28569426 +Ns(Age, knots = ai.kn)2 0.23166177 0.19576473 0.27414118 +Ns(Age, knots = ai.kn)3 0.02835009 0.02284374 0.03518371 +Ns(Age, knots = ai.kn)4 0.38067392 0.32945601 0.43985427 +Ns(DMdur, knots = di.kn)1 0.04462625 0.03373347 0.05903639 +Ns(DMdur, knots = di.kn)2 0.22388988 0.19028599 0.26342813 +Ns(DMdur, knots = di.kn)3 0.03379141 0.02574705 0.04434912 +Ns(DMdur, knots = di.kn)4 0.47100646 0.40783201 0.54396683 +I(Per - 2000) 0.97381513 0.96082027 0.98698574 +sexF 0.73757407 0.66886641 0.81333956 +\end{Soutput} +\begin{Sinput} +> class( DM.Ins ) +\end{Sinput} +\begin{Soutput} +[1] "glm.lex" "glm" "lm" +\end{Soutput} +\end{Schunk} +So we have a slightly simpler syntax, and we get an informative +message of which transition(s) we are modeling. However we do not have +\texttt{update} method for these objects. +\begin{Schunk} +\begin{Sinput} +> DM.Dead <- glm.Lexis( Si, from = "DM", to = "Dead", ++ formula = ~ Ns( Age , knots=ad.kn ) + ++ Ns( DMdur, knots=dd.kn ) + ++ I(Per-2000) + sex ) +\end{Sinput} +\begin{Soutput} +stats::glm Poisson analysis of Lexis object Si with log link: +Rates for the transition: DM->Dead +\end{Soutput} +\begin{Sinput} +> Ins.Dead <- glm.Lexis( Si, from = "Ins", ++ formula = ~ Ns( Age , knots=ad.kn ) + ++ Ns( DMdur, knots=dd.kn ) + ++ Ns( t.Ins, knots=ti.kn ) + ++ I(Per-2000) + sex ) +\end{Sinput} +\begin{Soutput} +stats::glm Poisson analysis of Lexis object Si with log link: +Rates for the transition: Ins->Dead(Ins) +\end{Soutput} +\end{Schunk} +Note the similarity of the code used to fit the three models, is is +mainly redefining the response variable (\texttt{to} state) and the subset +of the data used (\texttt{from} state). Also note that the last model need +no specification of \texttt{to}, the default is to model all +transitions from the \texttt{from} state, and his case there is only +one. + +\section{The mortality rates} + +This section discusses in some detail how to extract ad display the +mortality rates from the models fitted. But it is not necessary for +understanding how to use \texttt{simLexis} in practice. + +\subsection{Proportionality of mortality rates} + +Note that we have fitted separate models for the three transitions, +there is no assumption of proportionality between the mortality rates +from \texttt{DM} and \texttt{Ins}. + +However, there is nothing that prevents us from testing this +assumption; we can just fit a model for the mortality rates in the +entire data frame \texttt{Si}, and compare the deviance from this with +the sum of the deviances from the separate models using the \texttt{glm.Lexis} +function: +\begin{Schunk} +\begin{Sinput} +> All.Dead <- glm.Lexis( Si, to = c("Dead(Ins)","Dead"), ++ formula = ~ Ns( Age , knots=ad.kn ) + ++ Ns( DMdur, knots=dd.kn ) + ++ lex.Cst + ++ I(Per-2000) + sex ) +\end{Sinput} +\begin{Soutput} +stats::glm Poisson analysis of Lexis object Si with log link: +Rates for transitions: Ins->Dead(Ins), DM->Dead +\end{Soutput} +\begin{Sinput} +> round( ci.exp( All.Dead ), 3 ) +\end{Sinput} +\begin{Soutput} + exp(Est.) 2.5% 97.5% +(Intercept) 0.057 0.049 0.065 +Ns(Age, knots = ad.kn)1 4.101 3.462 4.858 +Ns(Age, knots = ad.kn)2 4.661 4.064 5.346 +Ns(Age, knots = ad.kn)3 15.434 13.548 17.583 +Ns(Age, knots = ad.kn)4 7.509 6.695 8.421 +Ns(DMdur, knots = dd.kn)1 0.466 0.384 0.565 +Ns(DMdur, knots = dd.kn)2 0.642 0.563 0.731 +Ns(DMdur, knots = dd.kn)3 0.229 0.165 0.318 +Ns(DMdur, knots = dd.kn)4 0.796 0.713 0.888 +lex.CstIns 2.168 1.947 2.415 +I(Per - 2000) 0.965 0.954 0.977 +sexF 0.665 0.614 0.721 +\end{Soutput} +\end{Schunk} +Incidentally we could have dispensed with the \texttt{to=} argument +too, because the default is to take \texttt{to} to be all absorbing +states in the model. + +From the parameter values we would in a simple setting just claim that +start of insulin-treatment was associated with a slightly more than +doubling of mortality. + +The model \texttt{All.dead} assumes that the age- and DM-duration +effects on mortality in the \texttt{DM} and \texttt{Ins} states are the same, +and moreover that there is no effect of insulin duration, but merely a +mortality that is larger by a multiplicative constant not depending on +insulin duration. The model \texttt{DM.Dead} has 8 parameters to +describe the dependency on age and DM duration, the model +\texttt{Ins.Dead} has 12 for the same plus the insulin duration (a +natural spline with $k$ knots gives $k-1$ parameters, and we chose +$k=5$ above). + +We can compare the fit of the simple proportional hazards model with +the fit of the separate models for the two mortality rates, by adding +up the deviances and d.f. from these: +\begin{Schunk} +\begin{Sinput} +> what <- c("null.deviance","df.null","deviance","df.residual") +> ( rD <- unlist( DM.Dead[what] ) ) +\end{Sinput} +\begin{Soutput} +null.deviance df.null deviance df.residual + 22535.77 188727.00 20412.81 188717.00 +\end{Soutput} +\begin{Sinput} +> ( rI <- unlist( Ins.Dead[what] ) ) +\end{Sinput} +\begin{Soutput} +null.deviance df.null deviance df.residual + 4867.127 35157.000 4211.735 35143.000 +\end{Soutput} +\begin{Sinput} +> ( rA <- unlist( All.Dead[what] ) ) +\end{Sinput} +\begin{Soutput} +null.deviance df.null deviance df.residual + 27415.21 223885.00 24705.70 223874.00 +\end{Soutput} +\begin{Sinput} +> round( c( dd <- rA-(rI+rD), "pVal"=1-pchisq(dd[3],dd[4]+1) ), 3 ) +\end{Sinput} +\begin{Soutput} +null.deviance df.null deviance df.residual pVal.deviance + 12.314 1.000 81.154 14.000 0.000 +\end{Soutput} +\end{Schunk} +Thus we see there is a substantial non-proportionality of mortality +rates from the two states; but a test provides no clue whatsoever to +the particular \emph{shape} of the non-proportionality. + +To this end, we shall explore the predicted mortalities under the two +models quantitatively in more detail. Note that the reason that there +is a difference in the null deviances (and a difference of 1 in the +null d.f.) is that the null deviance of \texttt{All.Dead} refer to a +model with a single intercept, that is a model with constant and +\emph{identical} mortality rates from the states \texttt{DM} and \texttt{Ins}, +whereas the null models for \texttt{DM.Dead} and \texttt{Ins.Dead} +have constant but \emph{different} mortality rates from the states +\texttt{DM} and \texttt{Ins}. This is however irrelevant for the comparison of +the \emph{residual} deviances. + +\subsection{How the mortality rates look} + +If we want to see how the mortality rates are modelled in +\texttt{DM.Dead} and \texttt{Ins.Dead} in relation to +\texttt{All.Dead}, we make a prediction of rates for say men diagnosed +in different ages and going on insulin at different times after +this. So we consider men diagnosed in ages 40, 50, 60 and 70, and who +either never enter insulin treatment or do it 0, 2 or 5 years after +diagnosis of DM. + +To this end we create a prediction data frame where we have +observation times from diagnosis and 12 years on (longer would not +make sense as this is the extent of the data). + +But we start by setting up an array to hold the predicted mortality +rates, classified by diabetes duration, age at diabetes onset, time of +insulin onset, and of course type of model. What we want to do is to +plot the age-specific mortality rates for persons not on insulin, and +for persons starting insulin at different times after DM. The +mortality curves start at the age where the person gets diabetes and +continues 12 years; for persons on insulin they start at the age when +they initiate insulin. +\begin{Schunk} +\begin{Sinput} +> pr.rates <- NArray( list( DMdur = seq(0,12,0.1), ++ DMage = 4:7*10, ++ r.Ins = c(NA,0,2,5), ++ model = c("DM/Ins","All"), ++ what = c("rate","lo","hi") ) ) +> str( pr.rates ) +\end{Sinput} +\begin{Soutput} + logi [1:121, 1:4, 1:4, 1:2, 1:3] NA NA NA NA NA NA ... + - attr(*, "dimnames")=List of 5 + ..$ DMdur: chr [1:121] "0" "0.1" "0.2" "0.3" ... + ..$ DMage: chr [1:4] "40" "50" "60" "70" + ..$ r.Ins: chr [1:4] NA "0" "2" "5" + ..$ model: chr [1:2] "DM/Ins" "All" + ..$ what : chr [1:3] "rate" "lo" "hi" +\end{Soutput} +\end{Schunk} +For convenience the \texttt{Epi} package contains a function that computes +predicted (log-)rates with c.i. --- it is merely a wrapper for +\texttt{predict.glm}. + +So we set up the prediction data frame and modify it in loops over +ages at onset and insulin onset in order to collect the predicted +rates in different scenarios: +\begin{Schunk} +\begin{Sinput} +> nd <- data.frame( DMdur = as.numeric( dimnames(pr.rates)[[1]] ), ++ lex.Cst = factor( 1, levels=1:4, ++ labels=levels(Si$lex.Cst) ), ++ sex = factor( 1, levels=1:2, labels=c("M","F")) ) +\end{Sinput} +\end{Schunk} +Note that we did \emph{not} insert \texttt{lex.dur} as covariate in +the prediction frame. This would be required if we used the +\texttt{poisson} family with the \texttt{glm}, but the wrapper +\texttt{glm.Lexis} uses the \texttt{poisreg} family, so +\texttt{lex.dur} is ignored and predictions always comes in the +(inverse) units of \texttt{lex.dur}. So we get rates per 1 person-year +in the predictions. +\begin{Schunk} +\begin{Sinput} +> for( ia in dimnames(pr.rates)[[2]] ) ++ { ++ dnew <- transform( nd, Age = as.numeric(ia)+DMdur, ++ Per = 1998+DMdur ) ++ pr.rates[,ia,1,"DM/Ins",] <- ci.pred( DM.Dead, newdata = dnew ) ++ pr.rates[,ia,1,"All" ,] <- ci.pred( All.Dead, newdata = dnew ) ++ for( ii in dimnames(pr.rates)[[3]][-1] ) ++ { ++ dnew = transform( dnew, lex.Cst = factor( 2, levels=1:4, ++ labels=levels(Si$lex.Cst) ), ++ t.Ins = ifelse( (DMdur-as.numeric(ii)) >= 0, ++ DMdur-as.numeric(ii), NA ) ) ++ pr.rates[,ia, ii ,"DM/Ins",] <- ci.pred( Ins.Dead, newdata = dnew ) ++ pr.rates[,ia, ii ,"All" ,] <- ci.pred( All.Dead, newdata = dnew ) ++ } ++ } +\end{Sinput} +\end{Schunk} +So for each age at DM onset we make a plot of the mortality as +function of current age both for those with no insulin treatment and +those that start insulin treatment 0, 2 and 5 years after diabetes +diagnosis, thus 4 curves (with c.i.). These curves are replicated with +a different color for the simplified model. +\begin{Schunk} +\begin{Sinput} +> par( mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, las=1 ) +> plot( NA, xlim=c(40,82), ylim=c(5,300), bty="n", ++ log="y", xlab="Age", ylab="Mortality rate per 1000 PY" ) +> abline( v=seq(40,80,5), h=outer(1:9,10^(0:2),"*"), col=gray(0.8) ) +> for( aa in 4:7*10 ) for( ii in 1:4 ) ++ matshade( aa+as.numeric(dimnames(pr.rates)[[1]]), ++ cbind( pr.rates[,paste(aa),ii,"DM/Ins",], ++ pr.rates[,paste(aa),ii,"All" ,] )*1000, ++ type="l", lty=1, lwd=2, ++ col=c("red","limegreen") ) +\end{Sinput} +\end{Schunk} +\insfig{mort-int}{0.9}{Estimated mortality rates for male diabetes + patients with no insulin (lower sets of curves) and insulin (upper + curves), with DM onset in age 40, 50, 60 and 70. The red curves are + from the models with separate age effects for persons with and + without insulin, and a separate effect of insulin duration. The + green curves are from the model with common age-effects and only a + time-dependent effect of insulin, assuming no effect of insulin + duration (the classical time-dependent variable approach). Hence the + upper green curve is common for any time of insulin inception.} + +From figure \ref{fig:mort-int} we see that there is a substantial +insulin-duration effect which is not accommodated by the simple model +with only one time-dependent variable to describe the insulin +effect. Note that the simple model (green curves) for those on insulin +does not depend in insulin duration, and hence the mortality curves +for those on insulin are just parallel to the mortality curves for +those not on insulin, regardless of diabetes duration (or age) at the +time of insulin initiation. This is the proportional hazards +assumption. Thus the effect of insulin initiation is under-estimated +for short duration of insulin and over-estimated for long duration of +insulin. + +This is the major discrepancy between the two models, and +illustrates the importance of being able to accommodate different time +scales, but there is also a declining overall insulin effect by age which +is not accommodated by the proportional hazards approach. + +Finally, this plot illustrates an important feature in reporting +models with multiple timescales; all timescales must be represented in +the predicted rates, only reporting the effect of one timescale, +conditional on a fixed value of other timescales is misleading since +all timescales by definition advance at the same pace. For example, +the age-effect for a fixed value of insulin duration really is a +misnomer since it does not correspond to any real person's follow-up, +but to the mortality of persons in different ages but with the same +duration of insulin use. + +\section{Input to the \texttt{simLexis} function} + +We want to estimate the cumulative probability of being in each of the +4 states, so that we can assess the fraction of diabetes pateints that +go on insulin + +In order to simulate from the multistate model with the estimated +transition rates, and get the follow-up of a hypothetical cohort, we +must supply \emph{both} the transition rates and the structure of the +model \emph{as well as} the initial cohort status to +\texttt{simLexis}. + +\subsection{The transition object} + +We first put the models into an object representing the transitions; +note this is a list of lists, the latter having \texttt{glm} objects +as elements: +\begin{Schunk} +\begin{Sinput} +> Tr <- list( "DM" = list( "Ins" = DM.Ins, ++ "Dead" = DM.Dead ), ++ "Ins" = list( "Dead(Ins)" = Ins.Dead ) ) +\end{Sinput} +\end{Schunk} +Now we have the description of the rates and of the structure of the +model. The \texttt{Tr} object defines the states and models for all +transitions between them; the object \verb|Tr$A$B| is the model +for the transition intensity from state \texttt{A} to state +\texttt{B}. + +\subsection{The initial cohort} + +We now define an initial \texttt{Lexis} object of persons with all +relevant covariates defined. Note that we use \texttt{NULL} as row +indicator in the \texttt{Lexis} object we used for modeling; this +conserves the \texttt{time.scale} and \texttt{time.since} attributes +which are needed for the simulation: +\begin{Schunk} +\begin{Sinput} +> str( ini <- Si[NULL,1:9] ) +\end{Sinput} +\begin{Soutput} +Classes ‘Lexis’ and 'data.frame': 0 obs. of 9 variables: + $ lex.id : int + $ Per : num + $ Age : num + $ DMdur : num + $ t.Ins : num + $ lex.dur: num + $ lex.Cst: Factor w/ 4 levels "DM","Ins","Dead",..: + $ lex.Xst: Factor w/ 4 levels "DM","Ins","Dead",..: + $ sex : Factor w/ 2 levels "M","F": + - attr(*, "time.scales")= chr "Per" "Age" "DMdur" "t.Ins" + - attr(*, "time.since")= chr "" "" "" "Ins" + - attr(*, "breaks")=List of 4 + ..$ Per : NULL + ..$ Age : NULL + ..$ DMdur: num 0 0.25 0.5 0.75 1 1.25 1.5 1.75 2 2.25 ... + ..$ t.Ins: NULL +\end{Soutput} +\end{Schunk} +We now have an empty \texttt{Lexis} object with attributes reflecting +the timescales in the multistate model we want to simulate from. But +we must enter some data to represent the initial state of the persons +whose follow-up we want to simulate through the model; so fill in data +for one man and one woman: +\begin{Schunk} +\begin{Sinput} +> ini[1:2,"lex.id"] <- 1:2 +> ini[1:2,"lex.Cst"] <- "DM" +> ini[1:2,"Per"] <- 1995 +> ini[1:2,"Age"] <- 60 +> ini[1:2,"DMdur"] <- 5 +> ini[1:2,"sex"] <- c("M","F") +> ini +\end{Sinput} +\begin{Soutput} + lex.id Per Age DMdur t.Ins lex.dur lex.Cst lex.Xst sex +1 1 1995 60 5 NA NA DM M +2 2 1995 60 5 NA NA DM F +\end{Soutput} +\end{Schunk} +So the persons starts in age 60 in 1995 with 5 years of diabetes +duration. Note that the \texttt{t.Ins} is \texttt{NA}, because this is +a timescale that first comes alive if a transtion to \texttt{Ins} is +simulated. + +\section{Simulation of the follow-up} + +Now we simulate life-courses of a 1000 of each of these persons using +the estimated model. The \texttt{t.range} argument gives the times +range where the integrated intensities (cumulative rates) are +evaluated and where linear interpolation is used when simulating +transition times. Note that this must be given in the same units as +\texttt{lex.dur} in the \texttt{Lexis} object used for fitting the +models for the transitions. It is not a parameter that can be easily +determined from the \texttt{TR} object, hence it must be supplied by +the user. +\begin{Schunk} +\begin{Sinput} +> set.seed( 52381764 ) +> Nsim <- 5000 +> system.time( simL <- simLexis( Tr, ++ ini, ++ t.range = 12, ++ N = Nsim ) ) +\end{Sinput} +\begin{Soutput} + user system elapsed + 17.739 1.311 18.010 +\end{Soutput} +\end{Schunk} +The result is a \texttt{Lexis} object --- a data frame representing +the simulated follow-up of 10000 persons (5000 +identical men and 5000 identical women) according to the rates +we estimated from the original dataset. +\begin{Schunk} +\begin{Sinput} +> summary( simL, by="sex" ) +\end{Sinput} +\begin{Soutput} +$M + +Transitions: + To +From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: + DM 1438 2048 1514 0 5000 3562 36620.95 5000 + Ins 0 1363 0 685 2048 685 10935.47 2048 + Sum 1438 3411 1514 685 7048 4247 47556.41 5000 + +$F + +Transitions: + To +From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: + DM 2186 1689 1125 0 5000 2814 42181.63 5000 + Ins 0 1321 0 368 1689 368 9535.20 1689 + Sum 2186 3010 1125 368 6689 3182 51716.83 5000 +\end{Soutput} +\end{Schunk} + +\subsection{Using other models for simulation} + +\subsubsection{Proportional hazards Poisson model} + +We fitted a proportional mortality model \texttt{All.Dead} (which fitted +worse than the other two), this is a model for \emph{both} the +transition from \texttt{DM} to \texttt{Death} \emph{and} from \texttt{Ins} to +\texttt{Dead(Ins)}, assuming that they are proportional. But it can easily +be used in the simulation set-up, because the state is embedded in the +model via the term \texttt{lex.Cst}, which is updated during the simulation. + +Simulation using this instead just requires that we supply a different +transition object: +\begin{Schunk} +\begin{Sinput} +> Tr.p <- list( "DM" = list( "Ins" = DM.Ins, ++ "Dead" = All.Dead ), ++ "Ins" = list( "Dead(Ins)" = All.Dead ) ) +> system.time( simP <- simLexis( Tr.p, ++ ini, ++ t.range = 12, ++ N = Nsim ) ) +\end{Sinput} +\begin{Soutput} + user system elapsed + 17.009 0.830 16.771 +\end{Soutput} +\begin{Sinput} +> summary( simP, by="sex" ) +\end{Sinput} +\begin{Soutput} +$M + +Transitions: + To +From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: + DM 1633 1984 1383 0 5000 3367 37674.18 5000 + Ins 0 1135 0 849 1984 849 9807.38 1984 + Sum 1633 3119 1383 849 6984 4216 47481.56 5000 + +$F + +Transitions: + To +From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: + DM 2285 1695 1020 0 5000 2715 42892.52 5000 + Ins 0 1191 0 504 1695 504 8833.70 1695 + Sum 2285 2886 1020 504 6695 3219 51726.21 5000 +\end{Soutput} +\end{Schunk} + +\subsubsection{Proportional hazards Cox model} + +A third possibility would be to replace the two-time scale +proportional mortality model by a one-time-scale Cox-model, using +diabetes duration as time scale, and age at diagnosis of DM as (fixed) +covariate: +\begin{Schunk} +\begin{Sinput} +> library( survival ) +> Cox.Dead <- coxph( Surv( DMdur, DMdur+lex.dur, ++ lex.Xst %in% c("Dead(Ins)","Dead")) ~ ++ Ns( Age-DMdur, knots=ad.kn ) + ++ I(lex.Cst=="Ins") + ++ I(Per-2000) + sex, ++ data = Si ) +> round( ci.exp( Cox.Dead ), 3 ) +\end{Sinput} +\begin{Soutput} + exp(Est.) 2.5% 97.5% +Ns(Age - DMdur, knots = ad.kn)1 4.172 3.535 4.923 +Ns(Age - DMdur, knots = ad.kn)2 4.502 3.824 5.301 +Ns(Age - DMdur, knots = ad.kn)3 16.077 14.087 18.348 +Ns(Age - DMdur, knots = ad.kn)4 7.479 6.501 8.605 +I(lex.Cst == "Ins")TRUE 2.170 1.948 2.418 +I(Per - 2000) 0.965 0.954 0.977 +sexF 0.667 0.616 0.723 +\end{Soutput} +\end{Schunk} +Note that in order for this model to be usable for simulation, it is +necessary that we use the components of the \texttt{Lexis} object to +specify the survival. Each record in the data frame \texttt{Si} +represents follow up from \texttt{DMdur} to \texttt{DMdur+lex.dur}, so +the model is a Cox model with diabetes duration as underlying timescale +and age at diagnosis, \texttt{Age-DMdur}, as covariate. + +Also note that we used \texttt{I(lex.Cst=="Ins")} instead of just +\texttt{lex.Cst}, because \texttt{coxph} assigns design matrix columns +to all levels of \texttt{lex.Cst}, also those not present in data, +which would give \texttt{NA}s among the parameter estimates and +\texttt{NA}s as mortality outcomes. + +We see that the effect of insulin and the other covariates are pretty +much the same as in the two-time-scale model. We can simulate from this +model too; there is no restrictions on what type of model can be used +for different transitions +\begin{Schunk} +\begin{Sinput} +> Tr.c <- list( "DM" = list( "Ins" = Tr$DM$Ins, ++ "Dead" = Cox.Dead ), ++ "Ins" = list( "Dead(Ins)" = Cox.Dead ) ) +> system.time( simC <- simLexis( Tr.c, ++ ini, ++ t.range = 12, ++ N = Nsim ) ) +\end{Sinput} +\begin{Soutput} + user system elapsed + 18.517 0.988 18.471 +\end{Soutput} +\begin{Sinput} +> summary( simC, by="sex" ) +\end{Sinput} +\begin{Soutput} +$M + +Transitions: + To +From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: + DM 1714 2039 1247 0 5000 3286 37280.04 5000 + Ins 0 1348 0 691 2039 691 9990.38 2039 + Sum 1714 3387 1247 691 7039 3977 47270.42 5000 + +$F + +Transitions: + To +From DM Ins Dead Dead(Ins) Records: Events: Risk time: Persons: + DM 2332 1702 966 0 5000 2668 42168.20 5000 + Ins 0 1313 0 389 1702 389 9083.14 1702 + Sum 2332 3015 966 389 6702 3057 51251.34 5000 +\end{Soutput} +\end{Schunk} + +\section{Reporting the simulation results} + +We can now tabulate the number of persons in each state at a +predefined set of times on a given time scale. Note that in order for +this to be sensible, the \texttt{from} argument would normally be +equal to the starting time for the simulated individuals. +\begin{Schunk} +\begin{Sinput} +> system.time( ++ nSt <- nState( subset(simL,sex=="M"), ++ at=seq(0,11,0.2), from=1995, time.scale="Per" ) ) +\end{Sinput} +\begin{Soutput} + user system elapsed + 0.712 0.000 0.711 +\end{Soutput} +\begin{Sinput} +> nSt[1:10,] +\end{Sinput} +\begin{Soutput} + State +when DM Ins Dead Dead(Ins) + 1995 5000 0 0 0 + 1995.2 4932 38 29 1 + 1995.4 4843 93 62 2 + 1995.6 4773 137 87 3 + 1995.8 4702 179 116 3 + 1996 4621 226 148 5 + 1996.2 4547 269 179 5 + 1996.4 4469 309 212 10 + 1996.6 4407 342 237 14 + 1996.8 4322 386 275 17 +\end{Soutput} +\end{Schunk} +We see that as time goes by, the 5000 men slowly move away from the +starting state (\texttt{DM}). + +Based on this table (\texttt{nSt} is a table) we can now compute the +fractions in each state, or, rather more relevant, the cumulative fraction +across the states in some specified order, so that a plot of the +stacked probabilities can be made, using either the default rather +colorful layout, or a more minimalist version (both in figure \ref{fig:pstate0}): +\begin{Schunk} +\begin{Sinput} +> pM <- pState( nSt, perm=c(1,2,4,3) ) +> head( pM ) +\end{Sinput} +\begin{Soutput} + State +when DM Ins Dead(Ins) Dead + 1995 1.0000 1.0000 1.0000 1 + 1995.2 0.9864 0.9940 0.9942 1 + 1995.4 0.9686 0.9872 0.9876 1 + 1995.6 0.9546 0.9820 0.9826 1 + 1995.8 0.9404 0.9762 0.9768 1 + 1996 0.9242 0.9694 0.9704 1 +\end{Soutput} +\begin{Sinput} +> par( mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 ) +> plot( pM ) +> plot( pM, border="black", col="transparent", lwd=3 ) +> text( rep(as.numeric(rownames(pM)[nrow(pM)-1]),ncol(pM)), ++ pM[nrow(pM),]-diff(c(0,pM[nrow(pM),]))/5, ++ colnames( pM ), adj=1 ) +> box( col="white", lwd=3 ) +> box() +\end{Sinput} +\end{Schunk} +\insfig{pstate0}{1.0}{Default layout of the \textrm{\tt plot.pState} + graph (left), and a version with the state probabilities as lines and + annotation of states.} + +A more useful set-up of the graph would include a more through +annotation and sensible choice of colors, as seen in figure \ref{fig:pstatex}: +\begin{Schunk} +\begin{Sinput} +> clr <- c("limegreen","orange") +> # expand with a lighter version of the two chosen colors +> clx <- c( clr, rgb( t( col2rgb( clr[2:1] )*2 + rep(255,3) ) / 3, max=255 ) ) +> par( mfrow=c(1,2), las=1, mar=c(3,3,4,2), mgp=c(3,1,0)/1.6 ) +> # Men +> plot( pM, col=clx, xlab="Date of FU" ) +> lines( as.numeric(rownames(pM)), pM[,2], lwd=3 ) +> mtext( "60 year old male, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) +> mtext( "Survival curve", side=3, line=1.5, adj=0 ) +> mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +> mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) +> axis( side=4 ) +> axis( side=4, at=1:19/20, labels=FALSE ) +> axis( side=4, at=1:99/100, labels=FALSE, tcl=-0.3 ) +> # Women +> pF <- pState( nState( subset(simL,sex=="F"), ++ at=seq(0,11,0.2), ++ from=1995, ++ time.scale="Per" ), ++ perm=c(1,2,4,3) ) +> plot( pF, col=clx, xlab="Date of FU" ) +> lines( as.numeric(rownames(pF)), pF[,2], lwd=3 ) +> mtext( "60 year old female, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) +> mtext( "Survival curve", side=3, line=1.5, adj=0 ) +> mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +> mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) +> axis( side=4 ) +> axis( side=4, at=1:19/20, labels=FALSE ) +> axis( side=4, at=1:99/100, labels=FALSE, tcl=-0.3 ) +\end{Sinput} +\end{Schunk} +\insfig{pstatex}{1.0}{\textrm{\tt plot.pState} graphs where persons + ever on insulin are given in orange and persons never on insulin in + green, and the overall survival (dead over the line) as a black line.} + +If we instead wanted to show the results on the age-scale, we would +use age as timescale when constructing the probabilities; otherwise the +code is pretty much the same as before (Figure \ref{fig:pstatey}): +\begin{Schunk} +\begin{Sinput} +> par( mfrow=c(1,2), las=1, mar=c(3,3,4,2), mgp=c(3,1,0)/1.6 ) +> # Men +> pM <- pState( nState( subset(simL,sex=="M"), ++ at=seq(0,11,0.2), ++ from=60, ++ time.scale="Age" ), ++ perm=c(1,2,4,3) ) +> plot( pM, col=clx, xlab="Age" ) +> lines( as.numeric(rownames(pM)), pM[,2], lwd=3 ) +> mtext( "60 year old male, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) +> mtext( "Survival curve", side=3, line=1.5, adj=0 ) +> mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +> mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) +> axis( side=4 ) +> axis( side=4, at=1:19/20, labels=FALSE ) +> axis( side=4, at=1:19/20, labels=FALSE, tcl=-0.4 ) +> axis( side=4, at=1:99/100, labels=FALSE, tcl=-0.3 ) +> # Women +> pF <- pState( nState( subset(simL,sex=="F"), ++ at=seq(0,11,0.2), ++ from=60, ++ time.scale="Age" ), ++ perm=c(1,2,4,3) ) +> plot( pF, col=clx, xlab="Age" ) +> lines( as.numeric(rownames(pF)), pF[,2], lwd=3 ) +> mtext( "60 year old female, diagnosed 1990, aged 55", side=3, line=2.5, adj=0, col=gray(0.6) ) +> mtext( "Survival curve", side=3, line=1.5, adj=0 ) +> mtext( "DM, no insulin DM, Insulin", side=3, line=0.5, adj=0, col=clr[2] ) +> mtext( "DM, no insulin", side=3, line=0.5, adj=0, col=clr[1] ) +> axis( side=4 ) +> axis( side=4, at=1:9/10, labels=FALSE ) +> axis( side=4, at=1:19/20, labels=FALSE, tcl=-0.4 ) +> axis( side=4, at=1:99/100, labels=FALSE, tcl=-0.3 ) +\end{Sinput} +\end{Schunk} +Note the several statements with \texttt{axis(side=4,...}; they are +necessary to get the fine tick-marks in the right hand side of the +plots that you will need in order to read off the probabilities at +2006 (or 71 years). + +\insfig{pstatey}{1.0}{\textrm{\tt plot.pState} graphs where persons + ever on insulin are given in orange and persons never on insulin in + green, and the overall survival (dead over the line) as a black line.} + +\subsection{Comparing predictions from different models} + +We have actually fitted different models for the transitions, and we +have simulated Lexis objects from all three approaches, so we can plot +these predictions on top of each other: +\begin{Schunk} +\begin{Sinput} +> PrM <- pState( nState( subset(simP,sex=="M"), ++ at=seq(0,11,0.2), ++ from=60, ++ time.scale="Age" ), ++ perm=c(1,2,4,3) ) +> PrF <- pState( nState( subset(simP,sex=="F"), ++ at=seq(0,11,0.2), ++ from=60, ++ time.scale="Age" ), ++ perm=c(1,2,4,3) ) +> CoxM <- pState( nState( subset(simC,sex=="M"), ++ at=seq(0,11,0.2), ++ from=60, ++ time.scale="Age" ), ++ perm=c(1,2,4,3) ) +> CoxF <- pState( nState( subset(simC,sex=="F"), ++ at=seq(0,11,0.2), ++ from=60, ++ time.scale="Age" ), ++ perm=c(1,2,4,3) ) +> par( mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(3,1,0)/1.6 ) +> plot( pM, border="black", col="transparent", lwd=3 ) +> lines( PrM, border="blue" , col="transparent", lwd=3 ) +> lines( CoxM, border="red" , col="transparent", lwd=3 ) +> text( 60.5, 0.05, "M" ) +> box( lwd=5, col="white" ) ; box( lwd=2, col="black" ) +> plot( pF, border="black", col="transparent", lwd=3 ) +> lines( PrF, border="blue" , col="transparent", lwd=3 ) +> lines( CoxF, border="red" , col="transparent", lwd=3 ) +> text( 60.5, 0.05, "F" ) +> box( lwd=5, col="white" ) ; box( lwd=2, col="black" ) +\end{Sinput} +\end{Schunk} +\insfig{comp-0}{1.0}{Comparison of the simulated state occupancy + probabilities using separate Poisson models for the mortality rates + with and without insulin (black) and using proportional hazards + Poisson models (blue) and Cox-models with diabetes duration as + timescale and age at diabetes diagnosis as covariate (red).} + +From figure \ref{fig:comp-0} it is clear that the two proportional +hazards models (blue and red curves) produce pretty much the same +estimates of the state occupancy probabilities over time, but also that +they relative to the model with separately estimated transition +intensities overestimates the probability of being alive without +insulin and underestimates the probabilities of being dead without +insulin. However both the overall survival, and the fraction of +persons on insulin are quite well in agreement with the more elaborate +model. Thus the proportional hazards models overestimate the relative +mortality of the insulin treated diabetes patients relative to the +non-insulin treated. + +Interestingly, we also see a bump in the estimated probabilities from +the Cox-model based model, but this is entirely an artifact that comes +from the estimation method for the baseline hazard of the Cox-model +that lets the (cumulative) hazard have large jumps at event times +where the risk set is small. So also here it shows up that an +assumption of continuous underlying hazards leads to more credible +estimates. + +\chapter{Simulation of transitions in multistate models} + +\section{Theory} + +Suppose that the rate functions for the transitions out of the current +state to, say, 3 different states are $\lambda_1$, $\lambda_2$ and +$\lambda_3$, and the corresponding cumulative rates are $\Lambda_1$, +$\Lambda_2$ and $\Lambda_3$, and we want to simulate an exit time and +an exit state (that is either 1, 2 or 3). This can be done in two +slightly different ways: +\begin{enumerate} +\item First time, then state: + \begin{enumerate} + \item Compute the survival function, $S(t) = + \exp\bigl(-\Lambda_1(t)-\Lambda_2(t)-\Lambda_3(t)\bigr)$ + \item Simulate a random U(0,1) variate, $u$, say. + \item The simulated exit time is then the solution $t_u$ to + the equation $S(t_u) = u \quad \Leftrightarrow \quad \sum_j\Lambda_j(t_u) = -\log(u)$. + \item A simulated transition at $t_u$ is then found by simulating a + random draw from the multinomial distribution with probabilities + $p_i=\lambda_i(t_u) / \sum_j\lambda_j(t_u)$. + \end{enumerate} +\item Separate cumulative incidences: + \begin{enumerate} + \item Simulate 3 independent U(0,1) random variates $u_1$, $u_2$ and $u_3$. + \item Solve the equations $\Lambda_i(t_i)=-\log(u_i), i=1,2,3$ and get $(t_1,t_2,t_3)$. + \item The simulated survival time is then $\min(t_1,t_2,t_3)$, and + the simulated transition is to the state corresponding to this, + that is $k \in \{1,2,3\}$, where $t_k=\min(t_1,t_2,t_3)$ + \end{enumerate} +\end{enumerate} +The intuitive argument is that with three possible transition there are +3 independent processes running, but only the first transition is observed. +The latter approach is used in the implementation in \texttt{simLexis}. + +The formal argument for the equality of the two approaches goes as +follows: +\begin{enumerate} +\item Equality of the transition times: + \begin{enumerate} + \item In the first approach we simulate from a distribution with + cumulative rate $\Lambda_1(t)+\Lambda_2(t)+\Lambda_3(t)$, hence + from a distribution with survival function: + \begin{align*} + S(t) & = \exp\bigl(-(\Lambda_1(t)+\Lambda_2(t)+\Lambda_3(t))\bigr) \\ + & = \exp\bigl(-\Lambda_1(t)\bigr)\times + \exp\bigl(-\Lambda_2(t)\bigr)\times + \exp\bigl(-\Lambda_3(t)\bigr) + \end{align*} + \item In the second approach we choose the smallest of three + independent survival times, with survival functions + $\exp(-\Lambda_i), i=1,2,3$. Now, the survival function for the + minimum of three independent survival times is: + \begin{align*} + S_\text{min}(t) & = \pmat{\min(t_1,t_2,t_3)>t} \\ + & = \pmat{t_1>t} \times + \pmat{t_2>t} \times + \pmat{t_3>t} \\ + & = \exp\bigl(-\Lambda_1(t)\bigr)\times + \exp\bigl(-\Lambda_2(t)\bigr)\times + \exp\bigl(-\Lambda_3(t)\bigr) + \end{align*} + which is the same survival function as derived above. + \end{enumerate} +\item Type of transition: + \begin{enumerate} + \item In the first instance the probability of a transition to state + $i$, conditional on the transition time being $t$, is as known + from standard probability theory: + $\lambda_i(t)/\bigl(\lambda_1(t)+\lambda_2(t)+\lambda_3(t)\bigr)$. + \item In the second approach we choose the transition corresponding + to the the smallest of the transition times. So when we condition + on the event that a transition takes place at time $t$, we have to + show that the conditional probability that the smallest of the + three simulated transition times was actually the $i$th, is as above. + + But conditional on \emph{survival} till $t$, the probabilities + that events of type $1,2,3$ takes place in the interval $(t,t+\dif + t)$ are $\lambda_1(t)\dif t$, $\lambda_2(t)\dif t$ and + $\lambda_1(t)\dif t$, respectively (assuming that the probability + of more than one event in the interval of length $\dif t$ is + 0). Hence the conditional probabilities \emph{given a transition + time} in $(t,t+\dif t)$ is: + \[ + \frac{\lambda_i(t)\dif t}{\lambda_1(t)\dif t+\lambda_2(t)\dif t+\lambda_3(t)\dif t}= + \frac{\lambda_i(t)}{\lambda_1(t)+\lambda_2(t)+\lambda_3(t)} + \] + --- exactly as above. + \end{enumerate} +\end{enumerate} + +\section{Components of \texttt{simLexis}} + +This section explains the actually existing code for +\texttt{simLexis}, as it is in the current version of \texttt{Epi}. + +% The following code sources the originally formatted code in +% simLexis.R in order to make it printable with all the formatting and +% comments in it. However the sourcing does not work when compiling +% the vignettes in R CMD build. So when checking the code we comment +% this out by putting eval=FALSE, and moreover, because it is only +% simLexis that is exported, we also need to bring the functions simX, +% sim1, lint, get.next and chop.lex into the workspace. So depending +% on whether we actually construct the pdf for the inst/doc folder or +% test (upload) the package, one of the following to chunks are run +% with eval=FALSE and the other with eval=TRUE + +% When compiling the published vignette +% When checking the package +The function \texttt{simLexis} takes a \texttt{Lexis} object as +input. This defines the initial state(s) and times of the start for a +number of persons. Since the purpose is to simulate a history through +the estimated multistate model, the input values of the outcome +variables \texttt{lex.Xst} and \texttt{lex.dur} are ignored --- the +aim is to simulate values for them. + +Note however that the attribute \texttt{time.since} must be present in the +object. This is used for initializing timescales defined as time since +entry into a particular state, it is a character vector of the same +length as the \texttt{time.scales} attribute, with value equal to a +state name if the corresponding time scale is defined as time since +entry into that state. In this example the 4th timescale is time since +entry into the \texttt{Ins} state, and hence: +\begin{Schunk} +\begin{Sinput} +> cbind( ++ attr( ini, "time.scales" ), ++ attr( ini, "time.since" ) ) +\end{Sinput} +\begin{Soutput} + [,1] [,2] +[1,] "Per" "" +[2,] "Age" "" +[3,] "DMdur" "" +[4,] "t.Ins" "Ins" +\end{Soutput} +\end{Schunk} +\texttt{Lexis} objects will have this attribute set for time scales created +using \texttt{cutLexis}. + +The other necessary argument is a transition object \texttt{Tr}, which +is a list of lists. The elements of the lists should be \texttt{glm} +objects derived by fitting Poisson models to a \texttt{Lexis} object +representing follow-up data in a multistate model. It is assumed (but +not checked) that timescales enter in the model via the timescales of +the \texttt{Lexis} object. Formally, there are no assumptions about +how \texttt{lex.dur} enters in the model. + +Optional arguments are \texttt{t.range}, \texttt{n.int} or +\texttt{time.pts}, specifying the times after entry at which the +cumulative rates will be computed (the maximum of which will be taken +as the censoring time), and \texttt{N} a scalar or numerical vector of +the number of persons with a given initial state each record of the +\texttt{init} object should represent. + +The central part of the functions uses a \texttt{do.call} / +\texttt{lapply} / \texttt{split} construction to do simulations for +different initial states. This is the construction in the middle that +calls \texttt{simX}. \texttt{simLexis} also calls \texttt{get.next} +which is further detailed below. +\begin{Schunk} +\begin{Sinput} +> simLexis +\end{Sinput} +\begin{Soutput} +function( Tr, # List of lists of transition objects + init, # Lexis object of persons to simulate. + N = 1, # No. persons simulated per line in init + lex.id, + t.range = 20, # Range for rate computation in the simulation + n.int = 101, # length of time intervals + time.pts = seq(0,t.range,length.out=n.int) + ) +{ +# Expand the input data frame using N and put in lex.id +if( time.pts[1] !=0 ) + stop( "First time point must be 0, time.pts[1:3]= ", + time.pts[1:3] ) + +# Expand init +if( !missing(N) ) + { + if( length(N) == 1 ) + init <- init[rep(1:nrow(init),each=N),] + else init <- init[rep(1:nrow(init), N),] + } +# and update lex.id if necessary +if( !missing(lex.id) ) + { + if( length(lex.id)==nrow(init) ) + init$lex.id <- lex.id + else init$lex.id <- 1:nrow(init) + } +else init$lex.id <- 1:nrow(init) + +# Check/fix attributes +if( is.null( tS <- attr(init,"time.scales") ) ) + stop( "No time.scales attribute for init" ) +if( is.null( tF <- attr(init,"time.since") ) ) + { + attr(init,"time.since") <- tF <- rep( "", tS ) + warning( "'time.since' attribute set to blanks" ) + } + +# Convenience constants +np <- length( time.pts ) +tr.st <- names( Tr ) + +# Set up a NULL object to hold the follow-up records +sf <- NULL + +# Take as initiators only those who start in a transient state +nxt <- init[init$lex.Cst %in% tr.st,] + +# If some are not in a transient state then say so +if( nrow(nxt) < nrow(init) ) + { + tt <- table(init$lex.Cst) + tt <- tt[tt>0] + nt <- length(tt) + warning("\nSome initiators start in a absorbing state\n", + "Initiator states represented are: ", + paste( rbind( names(tt), rep(":",nt), + paste(tt), rep(" ",nt) ), collapse="" ), "\n", + "Transient states are: ", paste( names( Tr ), coll=" " ) ) + if( nrow(nxt)==0 ) stop( "\nNo initiators in transient states!" ) + } + +# Then we update those who are in a transient states and keep on doing +# that till all are in absorbing states or censored +while( nrow(nxt) > 0 ) +{ +nx <- do.call( rbind.data.frame, + lapply( split( nxt, + nxt$lex.Cst ), + simX, + Tr, time.pts, tS ) ) +sf <- rbind.data.frame( sf, nx ) +nxt <- get.next( nx, tr.st, tS, tF ) +} + +# Doctor lex.Xst levels, fix values for the censored +sf$lex.Xst <- factor( sf$lex.Xst, levels=levels(sf$lex.Cst) ) +sf$lex.Xst[is.na(sf$lex.Xst)] <- sf$lex.Cst[is.na(sf$lex.Xst)] + +# Nicely order the output by persons, then times and states +nord <- match( c( "lex.id", tS, + "lex.dur", + "lex.Cst", + "lex.Xst" ), names(sf) ) +noth <- setdiff( 1:ncol(sf), nord ) +sf <- sf[order(sf$lex.id,sf[,tS[1]]),c(nord,noth)] +rownames(sf) <- NULL +# Finally, supply attributes - note we do not supply the "breaks" +# attribute as this is irrelevant for simulated objects +attr( sf, "time.scales" ) <- tS +attr( sf, "time.since" ) <- tF +chop.lex( sf, tS, max(time.pts) ) +} +\end{Soutput} +\end{Schunk} + +\subsection{\texttt{simX}} + +\texttt{simX} is called by \texttt{simLexis} and simulates +transition-times and -types for a set of patients assumed to be in the +same state. It is called from \texttt{simLexis} with a data frame as +argument, uses the state in \texttt{lex.Cst} to select the relevant +component of \texttt{Tr} and compute predicted cumulative intensities +for all states reachable from this state. + +Note that it is here the switch between \texttt{glm}, \texttt{coxph} +and objects of class \texttt{function} is made. + +The dataset on which this prediction is done has +\texttt{length(time.pts)} rows per person. +\begin{Schunk} +\begin{Sinput} +> simX +\end{Sinput} +\begin{Soutput} +function( nd, Tr, time.pts, tS ) +{ +# Simulation is done from the data frame nd, in chunks of starting +# state, lex.Cst. This is necessary because different states have +# different (sets of) exit rates. Therefore, this function simulates +# for a set of persons from the same starting state. +np <- length( time.pts ) +nr <- nrow( nd ) +if( nr==0 ) return( NULL ) + +# The 'as.character' below is necessary because indexing by a factor +# by default is by the number of the level, and we are not indexing by +# this, but by components of Tr which just happens to have names that +# are a subset of the levels of lex.Cst. +cst <- as.character( unique(nd$lex.Cst) ) +if( length(cst)>1 ) stop( "More than one lex.Cst present:\n", cst, "\n" ) + +# Expand each person by the time points +prfrm <- nd[rep(1:nr,each=np),] +prfrm[,tS] <- prfrm[,tS] + rep(time.pts,nr) +prfrm$lex.dur <- il <- min( diff(time.pts) ) +# Poisson-models should use the estimated rate at the midpoint of the +# intervals, and have risk time equal to 1 in order to accommodate +# both poisson and poisreg families - they only produce identical +# predictions if lex.dur is 1 (i.e. offset is 0), scaling is after prediction +prfrp <- prfrm +prfrp[,"lex.dur"] <- 1 +prfrp[,tS] <- prfrp[,tS]+il/2 + +# Make a data frame with predicted rates for each of the transitions +# out of this state for these times +rt <- data.frame( lex.id = prfrm$lex.id ) +for( i in 1:length(Tr[[cst]]) ) + { + if( inherits( Tr[[cst]][[i]], "glm" ) ) + rt <- cbind( rt, predict( Tr[[cst]][[i]], + type="response", + newdata=prfrp ) * il ) # scaled to interval + else + if( inherits( Tr[[cst]][[i]], "coxph" ) ) + rt <- cbind( rt, predict( Tr[[cst]][[i]], + type="expected", + newdata=prfrm ) ) + else + if( is.function( Tr[[cst]][[i]] ) ) + rt <- cbind( rt, Tr[[cst]][[i]](prfrm) ) + else + stop( "Invalid object supplied as transition, elements of the list must be either:\n", + "- a glm(poisson) object fitted to a Lexis object\n", + "- a coxph object fitted to a Lexis object\n", + "- a function that takes a Lexis object as argument and returns\n", + " average rates for each record in the same units as lex.dur.") + } +names( rt )[-1] <- names( Tr[[cst]] ) + +# Then find the transition time and exit state for each person: +xx <- match( c("lex.dur","lex.Xst"), names(nd) ) +if( any(!is.na(xx)) ) nd <- nd[,-xx[!is.na(xx)]] +merge( nd, + do.call( rbind, + lapply( split( rt, + rt$lex.id ), + sim1, + time.pts ) ), + by="lex.id" ) +} +\end{Soutput} +\end{Schunk} +As we see, \texttt{simX} calls \texttt{sim1} which simulates the +transition for one person. + +\subsection{\texttt{sim1}} + +The predicted cumulative intensities are fed, person by person, to +\texttt{sim1} --- again via a \texttt{do.call} / \texttt{lapply} / +\texttt{split} construction --- and the resulting time and state is +appended to the \texttt{nd} data frame. This way we have simulated +\emph{one} transition (time and state) for each person: +\begin{Schunk} +\begin{Sinput} +> sim1 +\end{Sinput} +\begin{Soutput} +function( rt, time.pts ) +{ +# Simulates a single transition time and state based on the data frame +# rt with columns lex.id and timescales. It is assumed that the coumns +# in in rt are the id, followed by the set of estimated transition +# rates to the different states reachable from the current one. +ci <- apply( rbind(0,rt[,-1,drop=FALSE]), 2, cumsum )[1:nrow(rt),,drop=FALSE] +tt <- uu <- -log( runif(ncol(ci)) ) +for( i in 1:ncol(ci) ) tt[i] <- lint( ci[,i], time.pts, uu[i] ) +# Note this resulting data frame has 1 row and is NOT a Lexis object +data.frame( lex.id = rt[1,1], + lex.dur = min(tt,na.rm=TRUE), + lex.Xst = factor( if( min(tt) < max(time.pts) ) + colnames(ci)[tt==min(tt)] + else NA ) ) +} +\end{Soutput} +\end{Schunk} +The \texttt{sim1} function uses \texttt{lint} to do linear interpolation. + +\subsection{\texttt{lint}} + +We do not use \texttt{approx} to do the linear interpolation, because +this function does not do the right thing if the cumulative incidences +(\texttt{ci}) are constant across a number of times. Therefore we have +a custom made linear interpolator that does the interpolation +exploiting the fact the the \texttt{ci} is non-decreasing and +\texttt{tt} is strictly monotonously increasing: +\begin{Schunk} +\begin{Sinput} +> lint +\end{Sinput} +\begin{Soutput} +function( ci, tt, u ) +{ +# Makes a linear interpolation, but does not crash if all ci values are +# identical, but requires that both ci and tt are non-decreasing. +# ci plays the role of cumulative intensity, tt of time +if( any( diff(ci)<0 ) | any( diff(tt)<0 ) ) stop("Non-increasing arguments") +c.u <- min( c( ci[ci>u], max(ci) ) ) +c.l <- max( c( ci[ciu], max(tt) ) ) +t.l <- max( c( tt[ci get.next +\end{Sinput} +\begin{Soutput} +function( sf, tr.st, tS, tF ) +{ +# Produces an initial Lexis object for the next simulation for those +# who have ended up in a transient state. +# Note that this exploits the existence of the "time.since" attribute +# for Lexis objects and assumes that a character vector naming the +# transient states is supplied as argument. +if( nrow(sf)==0 ) return( sf ) +nxt <- sf[sf$lex.Xst %in% tr.st,] +if( nrow(nxt) == 0 ) return( nxt ) +nxt[,tS] <- nxt[,tS] + nxt$lex.dur +wh <- tF +for( i in 1:length(wh) ) + if( wh[i] != "" ) nxt[nxt$lex.Xst==wh[i],tS[i]] <- 0 +nxt$lex.Cst <- nxt$lex.Xst +return( nxt ) +} +\end{Soutput} +\end{Schunk} + +\subsection{\texttt{chop.lex}} + +The operation so far has censored individuals \texttt{max(time.pts)} +after \emph{each} new entry to a transient state. In order to groom +the output data we use \texttt{chop.lex} to censor all persons at the +same designated time after \emph{initial} entry. +\begin{Schunk} +\begin{Sinput} +> chop.lex +\end{Sinput} +\begin{Soutput} +function( obj, tS, cens ) +{ +# A function that chops off all follow-up beyond cens since entry for +# each individual +# Entry times on 1st timescale +zz <- entry( obj, 1, by.id=TRUE ) +# Merge with the revised exit times on this timescale +ww <- merge( obj, data.frame( lex.id = as.numeric(names(zz)), + cens = zz+cens ) ) +# Only retain records with an entry time prior to the revised exit time +ww <- ww[ww[,tS[1]] < ww$cens,] +# Revise the duration according the the revised exit time +x.dur <- pmin( ww$lex.dur, ww[,"cens"]-ww[,tS[1]] ) +# Change lex.Xst to lex.Cst for those with shortened follow-up +ww$lex.Xst[x.dur nState +\end{Sinput} +\begin{Soutput} +function ( obj, + at, + from, + time.scale = 1 ) +{ +# Counts the number of persons in each state of the Lexis object 'obj' +# at the times 'at' from the time 'from' in the time scale +# 'time.scale' + +# Determine timescales and absorbing and transient states +tS <- check.time.scale(obj,time.scale) +TT <- tmat(obj) +absorb <- rownames(TT)[apply(!is.na(TT),1,sum)==0] +transient <- setdiff( rownames(TT), absorb ) + +# Expand each record length(at) times +tab.frm <- obj[rep(1:nrow(obj),each=length(at)), + c(tS,"lex.dur","lex.Cst","lex.Xst")] + +# Stick in the corresponding times on the chosen time scale +tab.frm$when <- rep( at, nrow(obj) ) + from + +# For transient states keep records that includes these points in time +tab.tr <- tab.frm[tab.frm[,tS] <= tab.frm$when & + tab.frm[,tS]+tab.frm$lex.dur > tab.frm$when,] +tab.tr$State <- tab.tr$lex.Cst + +# For absorbing states keep records where follow-up ended before +tab.ab <- tab.frm[tab.frm[,tS]+tab.frm$lex.dur <= tab.frm$when & + tab.frm$lex.Xst %in% absorb,] +tab.ab$State <- tab.ab$lex.Xst + +# Make a table using the combination of those in transient and +# absorbing states. +with( rbind( tab.ab, tab.tr ), table( when, State ) ) +} +\end{Soutput} +\end{Schunk} + +\subsection{\texttt{pState}, \texttt{plot.pState} and \texttt{lines.pState}} + +In order to plot probabilities of state-occupancy it is useful to +compute cumulative probabilities across states in any given order; +this is done by the function \texttt{pState}, which returns a matrix +of class \texttt{pState}: +\begin{Schunk} +\begin{Sinput} +> pState +\end{Sinput} +\begin{Soutput} +function( nSt, perm=1:ncol(nSt) ) +{ +# Compute cumulative proportions of persons across states in order +# designate by 'perm' +tt <- t( apply( nSt[,perm], 1, cumsum ) ) +tt <- sweep( tt, 1, tt[,ncol(tt)], "/" ) +class( tt ) <- c("pState","matrix") +tt +} +\end{Soutput} +\end{Schunk} +There is also a \texttt{plot} and \texttt{lines} method for the +resulting \texttt{pState} objects: +\begin{Schunk} +\begin{Sinput} +> plot.pState +\end{Sinput} +\begin{Soutput} +function( x, + col = rainbow(ncol(x)), + border = "transparent", + xlab = "Time", + ylim = 0:1, + ylab = "Probability", ... ) +{ +# Function to plot cumulative probabilities along the time scale. +matplot( as.numeric(rownames(x)), x, type="n", + ylim=ylim, yaxs="i", xaxs="i", + xlab=xlab, ylab=ylab, ... ) +lines.pState( x, + col = col, + border = border, ... ) +} +\end{Soutput} +\begin{Sinput} +> lines.pState +\end{Sinput} +\begin{Soutput} +function( x, + col = rainbow(ncol(x)), + border = "transparent", ... ) +{ +# Function to plot cumulative probabilities along the time scale. + +# Fixing the colors: +nc <- ncol(x) +col <- rep( col , nc )[1:nc] +border <- rep( border, nc )[1:nc] + +# Just for coding convenience when plotting polygons +pSt <- cbind( 0, x ) +for( i in 2:ncol(pSt) ) + { + polygon( c( as.numeric(rownames(pSt)) , + rev(as.numeric(rownames(pSt))) ), + c( pSt[,i ], + rev(pSt[,i-1]) ), + col=col[i-1], border=border[i-1], ... ) + } +} +\end{Soutput} +\end{Schunk} + +\bibliographystyle{plain} +\begin{thebibliography}{1} + +\bibitem{Carstensen.2011a} +B~Carstensen and M~Plummer. +\newblock Using {L}exis objects for multi-state models in {R}. +\newblock {\em Journal of Statistical Software}, 38(6):1--18, 1 2011. + +\bibitem{Iacobelli.2013} +S~Iacobelli and B~Carstensen. +\newblock {Multiple time scales in multi-state models}. +\newblock {\em Stat Med}, 32(30):5315--5327, Dec 2013. + +\bibitem{Plummer.2011} +M~Plummer and B~Carstensen. +\newblock Lexis: An {R} class for epidemiological studies with long-term + follow-up. +\newblock {\em Journal of Statistical Software}, 38(5):1--12, 1 2011. + +\end{thebibliography} + +\addcontentsline{toc}{chapter}{References} + +\end{document} diff -Nru r-cran-epi-2.32/vignettes/useful.tex r-cran-epi-2.37/vignettes/useful.tex --- r-cran-epi-2.32/vignettes/useful.tex 2018-02-12 06:08:52.000000000 +0000 +++ r-cran-epi-2.37/vignettes/useful.tex 2019-02-12 17:05:24.000000000 +0000 @@ -117,6 +117,9 @@ % Changed to slightly smaller symbols \providecommand{\mpydiv}[0]{\stackrel{\scriptstyle\times}{\scriptstyle\div}} \providecommand{\mie}[1]{{\it #1}} +\providecommand{\ie}{\textit{i.e.} } +\providecommand{\eg}{\textit{e.g.} } +\providecommand{\ea}{\textit{et al.} } \providecommand{\mycircle}[0]{\circle*{5}} \providecommand{\smcircle}[0]{\circle*{1}} \providecommand{\corner}[0]{_{\text{\rm \tiny C}}} @@ -157,66 +160,66 @@ \providecommand{\dif}{{\,\mathrm d}} \providecommand{\pmat}[1]{\Pp\!\left\{#1\right\}} \providecommand{\ptxt}[1]{\Pp\!\left\{\text{#1}\right\}} -\providecommand{\E}{\text{\rm E}} -\providecommand{\V}{\text{\rm V}} -\providecommand{\BLUP}{\text{\rm BLUP}} -\providecommand{\se}{\text{\rm s.e.}} -\providecommand{\sem}{\text{\rm s.e.m.}} -\providecommand{\std}{\text{\rm std}} -\providecommand{\sd}{\text{\rm s.d.}} -\providecommand{\Var}{\text{\rm var}} -\providecommand{\VAR}{\text{\rm var}} -\providecommand{\var}{\text{\rm var}} -\providecommand{\cov}{\text{\rm cov}} -\providecommand{\corr}{\text{\rm corr}} -\providecommand{\mean}{\text{\rm mean}} -\providecommand{\CV}{\text{\rm CV}} -\providecommand{\median}{\text{\rm median}} -\providecommand{\cv}{\text{\rm c.v.}} -\providecommand{\erf}{\text{\rm erf}} -\providecommand{\ef}{\text{\rm ef}} -\providecommand{\SSD}{\text{\rm SSD}} -\providecommand{\SPD}{\text{\rm SPD}} -\providecommand{\odds}{\text{\rm odds}} -\providecommand{\bin}{\text{\rm binom}} +\providecommand{\E}{\operatorname{E}} +\providecommand{\V}{\operatorname{V}} +\providecommand{\BLUP}{\operatorname{BLUP}} +\providecommand{\std}{\operatorname{std}} +\providecommand{\sd}{\operatorname{s.d.}} +\providecommand{\se}{\operatorname{s.e.}} +\providecommand{\sem}{\operatorname{s.e.m.}} +\providecommand{\Var}{\operatorname{var}} +\providecommand{\VAR}{\operatorname{var}} +\providecommand{\var}{\operatorname{var}} +\providecommand{\cov}{\operatorname{cov}} +\providecommand{\corr}{\operatorname{corr}} +\providecommand{\mean}{\operatorname{mean}} +\providecommand{\CV}{\operatorname{CV}} +\providecommand{\median}{\operatorname{median}} +\providecommand{\cv}{\operatorname{c.v.}} +\providecommand{\erf}{\operatorname{erf}} +\providecommand{\ef}{\operatorname{ef}} +\providecommand{\SSD}{\operatorname{SSD}} +\providecommand{\SPD}{\operatorname{SPD}} +\providecommand{\odds}{\operatorname{odds}} +\providecommand{\bin}{\operatorname{binom}} \providecommand{\half}{\frac{1}{2}} % \providecommand{\td}[0]{\stackrel{\textstyle \times}{\div}} % Changed to slightly smaller symbols \providecommand{\td}[0]{\stackrel{\scriptstyle \times}{\scriptstyle \div}} \providecommand{\dt}[0]{\stackrel{\scriptstyle \div}{\scriptstyle \times}} -\providecommand{\diag}{\text{\rm diag}} -\providecommand{\det}{\text{\rm det}} -\providecommand{\dim}{\text{\rm dim}} -\providecommand{\spcol}{\text{\rm span}} -\providecommand{\logit}{\text{\rm logit}} -% \providecommand{\link}{\text{\rm link}} -\providecommand{\spn}{\text{\rm span}} -\providecommand{\CI}{\text{\rm CI}} -\providecommand{\IP}{\text{\rm IP}} -\providecommand{\OR}{\text{\rm OR}} -\providecommand{\RR}{\text{\rm RR}} -\providecommand{\ER}{\text{\rm ER}} -\providecommand{\EM}{\text{\rm EM}} -\providecommand{\EF}{\text{\rm EF}} -\providecommand{\RD}{\text{\rm RD}} -\providecommand{\AC}{\text{\rm AC}} -\providecommand{\AF}{\text{\rm AF}} -\providecommand{\PAF}{\text{\rm PAF}} -\providecommand{\AR}{\text{\rm AR}} -\providecommand{\CR}{\text{\rm CR}} -\providecommand{\PAR}{\text{\rm PAR}} -\providecommand{\EL}{\text{\rm EL}} -\providecommand{\ERL}{\text{\rm ERL}} -\providecommand{\YLL}{\text{\rm YLL}} -\providecommand{\SD}{\text{\rm SD}} -\providecommand{\SE}{\text{\rm SE}} -\providecommand{\SEM}{\text{\rm SEM}} -\providecommand{\SR}{\text{\rm SR}} -\providecommand{\SMR}{\text{\rm SMR}} -\providecommand{\RSR}{\text{\rm RSR}} -\providecommand{\CMF}{\text{\rm CMF}} -\providecommand{\pvp}{\text{\rm PV$+$}} -\providecommand{\pvn}{\text{\rm PV$-$}} +\providecommand{\diag}{\operatorname{diag}} +\providecommand{\det}{\operatorname{det}} +\providecommand{\dim}{\operatorname{dim}} +\providecommand{\logit}{\operatorname{logit}} +% \providecommand{\link}{\operatorname{link}} +\providecommand{\spcol}{\operatorname{span}} +\providecommand{\spn}{\operatorname{span}} +\providecommand{\CI}{\operatorname{CI}} +\providecommand{\IP}{\operatorname{IP}} +\providecommand{\OR}{\operatorname{OR}} +\providecommand{\RR}{\operatorname{RR}} +\providecommand{\ER}{\operatorname{ER}} +\providecommand{\EM}{\operatorname{EM}} +\providecommand{\EF}{\operatorname{EF}} +\providecommand{\RD}{\operatorname{RD}} +\providecommand{\AC}{\operatorname{AC}} +\providecommand{\AF}{\operatorname{AF}} +\providecommand{\PAF}{\operatorname{PAF}} +\providecommand{\AR}{\operatorname{AR}} +\providecommand{\CR}{\operatorname{CR}} +\providecommand{\PAR}{\operatorname{PAR}} +\providecommand{\EL}{\operatorname{EL}} +\providecommand{\ERL}{\operatorname{ERL}} +\providecommand{\YLL}{\operatorname{YLL}} +\providecommand{\SD}{\operatorname{SD}} +\providecommand{\SE}{\operatorname{SE}} +\providecommand{\SEM}{\operatorname{SEM}} +\providecommand{\SR}{\operatorname{SR}} +\providecommand{\SMR}{\operatorname{SMR}} +\providecommand{\RSR}{\operatorname{RSR}} +\providecommand{\CMF}{\operatorname{CMF}} +\providecommand{\pvp}{\operatorname{PV$+$}} +\providecommand{\pvn}{\operatorname{PV$-$}} \providecommand{\R}{{\textsf{\textbf{R}}}} \providecommand{\sas}{\textsl{\textbf{SAS}}} \providecommand{\SAS}{\textsl{\textbf{SAS}}} @@ -233,7 +236,13 @@ %%% Insert output from program in small text %%% (requires package verbatim) \providecommand{\insoutsmall}[1]{ -% \small + \small + \renewcommand{\baselinestretch}{0.8} + \verbatiminput{#1} + \renewcommand{\baselinestretch}{1.0} + \normalsize +} +\providecommand{\insoutfoot}[1]{ \footnotesize \renewcommand{\baselinestretch}{0.8} \verbatiminput{#1} @@ -248,20 +257,20 @@ \normalsize } \providecommand{\insouttiny}[1]{ -\tiny -\renewcommand{\baselinestretch}{0.8} -\verbatiminput{#1} -\renewcommand{\baselinestretch}{1.0} -\normalsize + \tiny + \renewcommand{\baselinestretch}{0.8} + \verbatiminput{#1} + \renewcommand{\baselinestretch}{1.0} + \normalsize } % From Esa: \providecommand{\T}{\text{\rm \small{T}}} -\providecommand{\id}{\text{\rm id}} -\providecommand{\Dev}{\text{\rm Dev}} -\providecommand{\Bin}{\text{\rm Bin}} -\providecommand{\probit}{\text{\rm probit}} -\providecommand{\cloglog}{\text{\rm cloglog}} +\providecommand{\id}{\operatorname{id}} +\providecommand{\Dev}{\operatorname{Dev}} +\providecommand{\Bin}{\operatorname{Bin}} +\providecommand{\probit}{\operatorname{probit}} +\providecommand{\cloglog}{\operatorname{cloglog}} % Special commands to include output from R, Bugs and Stata Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/yll-imm.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/yll-imm.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/yll.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/yll.pdf differ diff -Nru r-cran-epi-2.32/vignettes/yll.rwl r-cran-epi-2.37/vignettes/yll.rwl --- r-cran-epi-2.32/vignettes/yll.rwl 2018-05-03 14:36:44.000000000 +0000 +++ r-cran-epi-2.37/vignettes/yll.rwl 2019-05-23 08:22:41.000000000 +0000 @@ -1,7 +1,7 @@ -R version 3.4.4 (2018-03-15) +R version 3.6.0 (2019-04-26) --------------------------------------------- Folder: /home/bendix/stat/R/lib.src/Epi/pkg/vignettes - Started: Thursday 03. May 2018, 16:36:26 + Started: Thursday 23. May 2019, 10:22:24 --------------------------------------------- Writing to file yll.tex Processing code chunks with options ... @@ -28,6 +28,6 @@ --------------------------------------------- Folder: /home/bendix/stat/R/lib.src/Epi/pkg/vignettes - Ended: Thursday 03. May 2018, 16:36:44 - Elapsed: 00:00:18 + Ended: Thursday 23. May 2019, 10:22:41 + Elapsed: 00:00:16 --------------------------------------------- Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/yll-states.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/yll-states.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/yll-sus.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/yll-sus.pdf differ diff -Nru r-cran-epi-2.32/vignettes/yll.tex r-cran-epi-2.37/vignettes/yll.tex --- r-cran-epi-2.32/vignettes/yll.tex 2018-05-03 14:36:44.000000000 +0000 +++ r-cran-epi-2.37/vignettes/yll.tex 2019-05-23 08:22:41.000000000 +0000 @@ -256,27 +256,27 @@ > str( DMepi ) \end{Sinput} \begin{Soutput} -'data.frame': 4000 obs. of 8 variables: - $ sex : Factor w/ 2 levels "M","F": 1 2 1 2 1 2 1 2 1 2 ... - $ A : num 0 0 1 1 2 2 3 3 4 4 ... - $ P : num 1996 1996 1996 1996 1996 ... - $ X : num 1 9 4 7 7 2 6 5 9 4 ... - $ D.nD: num 28 19 23 19 7 8 8 8 6 7 ... - $ Y.nD: num 35454 33095 36451 34790 35329 ... +'data.frame': 4200 obs. of 8 variables: + $ sex : Factor w/ 2 levels "M","F": 1 1 1 1 1 1 1 1 1 1 ... + $ A : num 0 0 0 0 0 0 0 0 0 0 ... + $ P : num 1996 1997 1998 1999 2000 ... $ D.DM: num 0 0 0 0 0 0 0 0 0 0 ... - $ Y.DM: num 0.476 3.877 4.92 7.248 12.474 ... + $ Y.DM: num 0.484 0.64 1.641 0.552 2.507 ... + $ X : num 1 2 4 4 4 1 1 3 4 1 ... + $ D.nD: num 28 19 20 11 21 16 21 15 16 16 ... + $ Y.nD: num 35469 35085 34240 34056 34002 ... \end{Soutput} \begin{Sinput} > head( DMepi ) \end{Sinput} \begin{Soutput} - sex A P X D.nD Y.nD D.DM Y.DM -1 M 0 1996 1 28 35453.65 0 0.4757016 -2 F 0 1996 9 19 33094.86 0 3.8767967 -3 M 1 1996 4 23 36450.73 0 4.9199179 -4 F 1 1996 7 19 34789.99 0 7.2484600 -5 M 2 1996 7 7 35328.92 0 12.4743326 -6 F 2 1996 2 8 33673.43 0 8.0951403 + sex A P D.DM Y.DM X D.nD Y.nD +2 M 0 1996 0 0.4839151 1 28 35468.92 +3 M 0 1997 0 0.6399726 2 19 35085.18 +4 M 0 1998 0 1.6406571 4 20 34240.14 +5 M 0 1999 0 0.5523614 4 11 34055.52 +6 M 0 2000 0 2.5065024 4 21 34002.22 +7 M 0 2001 0 0.1184120 1 16 34177.39 \end{Soutput} \end{Schunk} For each combination of sex, age, period and date of birth in 1 year @@ -305,13 +305,13 @@ > head(DMepi) \end{Sinput} \begin{Soutput} - sex A P X D.nD Y.nD D.DM Y.DM D.T Y.T -63 M 31 1996 21 51 43909.32 0 291.4107 51 44200.73 -64 F 31 1996 33 16 41376.91 2 287.4969 18 41664.41 -65 M 32 1996 26 67 43159.94 0 299.6571 67 43459.59 -66 F 32 1996 20 23 40706.49 1 275.2615 24 40981.75 -67 M 33 1996 35 54 41251.06 4 321.0397 58 41572.10 -68 F 33 1996 32 23 39102.29 1 277.4463 24 39379.74 + sex A P D.DM Y.DM X D.nD Y.nD D.T Y.T +684 M 31 1996 0 305.9671 29 51 44161.83 51 44467.80 +685 M 31 1997 2 326.2074 31 54 45508.64 56 45834.85 +686 M 31 1998 2 340.1759 34 46 44954.45 48 45294.63 +687 M 31 1999 5 330.9918 24 39 41148.97 44 41479.96 +688 M 31 2000 1 332.0876 41 26 39027.30 27 39359.39 +689 M 31 2001 2 310.8467 35 33 37975.78 35 38286.63 \end{Soutput} \end{Schunk} With the correct age and period coding in the Lexis triangles, we fit @@ -347,14 +347,14 @@ D.nD D.DM X sex M F M F M F cut(A, c(30, a.kn, Inf)) -(30,40] 8899 4525 564 269 9912 8622 -(40,51] 24686 15296 2886 1399 31668 20769 -(51,62] 57747 38968 10276 4916 53803 34495 -(62,73] 102877 78771 24070 13008 51000 38731 -(73,84] 154804 153842 31006 25414 25444 26804 -(84,95] 97698 175484 13972 21231 4726 7852 -(95,Inf] 5800 20563 545 1522 74 238 -Sum 452511 487449 83319 67759 176627 137511 +(30,40] 9135 4650 597 277 12080 9648 +(40,51] 25535 15784 2947 1439 35445 23925 +(51,62] 59698 40171 10838 5253 60539 40034 +(62,73] 106519 81253 26197 14299 55908 44221 +(73,84] 158365 156678 34510 28834 27985 30381 +(84,95] 100880 179466 16194 25317 5272 8967 +(95,Inf] 6095 21414 640 1928 90 288 +Sum 466227 499416 91923 77347 197319 157464 \end{Soutput} \begin{Sinput} > pe <- xtabs( cbind(D.nD,D.DM,X) ~ cut(P,c(1990,p.kn,Inf)) + sex, data=DMepi ) @@ -364,12 +364,12 @@ D.nD D.DM X sex M F M F M F cut(P, c(1990, p.kn, Inf)) -(1990,1997] 51901 54162 6012 5378 12477 10030 -(1997,2003] 145418 157768 21028 17976 43749 34255 -(2003,2009] 133175 144717 25172 20595 56556 43891 -(2009,2015] 122017 130802 31107 23810 63845 49335 -(2015,Inf] 0 0 0 0 0 0 -Sum 452511 487449 83319 67759 176627 137511 +(1990,1997] 51569 53567 6407 5990 13175 10960 +(1997,2003] 144136 155695 22390 19922 49805 41277 +(2003,2009] 131159 141782 26079 22249 60554 48066 +(2009,2015] 119812 127714 31285 24704 63440 49457 +(2015,Inf] 19551 20658 5762 4482 10345 7704 +Sum 466227 499416 91923 77347 197319 157464 \end{Soutput} \begin{Sinput} > ce <- xtabs( cbind(D.nD,D.DM,X) ~ cut(P-A,c(-Inf,c.kn,Inf)) + sex, data=DMepi ) @@ -379,14 +379,14 @@ D.nD D.DM X sex M F M F M F cut(P - A, c(-Inf, c.kn, Inf)) -(-Inf,1.91e+03] 19912 49797 1784 3731 536 1143 -(1.91e+03,1.92e+03] 130691 190012 18160 23709 9765 13519 -(1.92e+03,1.94e+03] 154227 146284 32435 24876 34897 32481 -(1.94e+03,1.95e+03] 93397 67909 22921 11437 65012 44292 -(1.95e+03,1.96e+03] 40948 26234 6724 3326 46155 29891 -(1.96e+03,1.98e+03] 12534 6810 1245 654 19293 15311 -(1.98e+03, Inf] 802 403 50 26 969 874 -Sum 452511 487449 83319 67759 176627 137511 +(-Inf,1.91e+03] 19679 49020 2004 4446 599 1416 +(1.91e+03,1.92e+03] 129799 189192 19780 26870 10665 15275 +(1.92e+03,1.94e+03] 158953 152454 35667 28471 37850 36659 +(1.94e+03,1.95e+03] 99058 72432 25494 13074 71489 50631 +(1.95e+03,1.96e+03] 44129 28315 7534 3731 51750 34255 +(1.96e+03,1.98e+03] 13598 7484 1373 720 22942 17689 +(1.98e+03, Inf] 1011 519 71 35 2024 1539 +Sum 466227 499416 91923 77347 197319 157464 \end{Soutput} \begin{Sinput} > # Fit an APC-model for all transitions, seperately for men and women @@ -462,7 +462,7 @@ \end{Sinput} \begin{Soutput} user system elapsed - 18.571 8.688 16.366 + 16.748 8.634 14.860 \end{Soutput} \begin{Sinput} > round( ftable( aYLL[,,seq(1,61,10),], col.vars=c(3,2) ), 1 ) @@ -471,69 +471,69 @@ age 30 40 50 60 70 80 90 sex M F M F M F M F M F M F M F type date -Imm 1996 11.7 10.8 9.8 9.7 7.8 8.2 5.6 6.3 3.4 3.9 1.5 1.6 0.0 0.0 - 1997 11.5 10.6 9.7 9.4 7.7 7.9 5.6 6.1 3.4 3.9 1.4 1.6 0.0 0.0 - 1998 11.3 10.3 9.6 9.2 7.6 7.7 5.5 5.9 3.4 3.8 1.4 1.5 0.0 0.0 - 1999 11.1 10.0 9.4 9.0 7.5 7.5 5.4 5.7 3.3 3.7 1.4 1.5 0.0 0.0 - 2000 10.9 9.8 9.3 8.7 7.4 7.3 5.4 5.6 3.3 3.6 1.4 1.5 0.0 0.0 - 2001 10.7 9.5 9.1 8.5 7.3 7.1 5.3 5.4 3.3 3.4 1.4 1.4 0.0 0.0 - 2002 10.5 9.2 9.0 8.3 7.1 6.9 5.2 5.2 3.2 3.3 1.3 1.4 0.0 0.0 - 2003 10.3 9.0 8.8 8.1 7.0 6.7 5.1 5.1 3.1 3.2 1.3 1.3 0.0 0.0 - 2004 10.0 8.8 8.6 7.8 6.8 6.5 5.0 4.9 3.1 3.1 1.3 1.3 0.0 0.0 - 2005 9.7 8.5 8.4 7.6 6.6 6.3 4.8 4.8 3.0 3.0 1.2 1.3 0.0 0.0 - 2006 9.4 8.3 8.1 7.5 6.5 6.2 4.7 4.6 2.9 2.9 1.2 1.2 0.0 0.0 - 2007 9.1 8.1 7.9 7.3 6.3 6.0 4.6 4.5 2.8 2.8 1.1 1.2 0.0 0.0 - 2008 8.9 7.9 7.7 7.1 6.1 5.9 4.4 4.3 2.7 2.7 1.1 1.1 0.0 0.0 - 2009 8.6 7.7 7.5 6.9 6.0 5.7 4.3 4.2 2.7 2.6 1.1 1.1 0.0 0.0 - 2010 8.4 7.5 7.3 6.8 5.9 5.6 4.2 4.1 2.6 2.5 1.1 1.1 0.0 0.0 - 2011 8.3 7.3 7.2 6.7 5.8 5.5 4.2 4.0 2.6 2.5 1.0 1.0 0.0 0.0 - 2012 8.1 7.2 7.1 6.5 5.7 5.4 4.1 4.0 2.6 2.4 1.0 1.0 0.0 0.0 - 2013 8.0 7.1 7.0 6.4 5.6 5.3 4.1 3.9 2.5 2.4 1.0 1.0 0.0 0.0 - 2014 7.8 6.9 6.9 6.3 5.6 5.3 4.1 3.8 2.5 2.3 1.0 0.9 0.0 0.0 - 2015 7.7 6.8 6.8 6.2 5.5 5.2 4.0 3.8 2.5 2.2 1.0 0.9 0.0 0.0 - 2016 7.6 6.7 6.7 6.1 5.5 5.1 4.0 3.7 2.5 2.2 1.0 0.9 0.0 0.0 -Tot 1996 11.1 10.4 9.3 9.2 7.3 7.7 5.2 5.9 3.1 3.7 1.3 1.5 0.0 0.0 - 1997 10.9 10.1 9.1 9.0 7.2 7.5 5.1 5.7 3.1 3.6 1.3 1.5 0.0 0.0 - 1998 10.7 9.8 9.0 8.7 7.0 7.3 5.0 5.5 3.1 3.5 1.3 1.4 0.0 0.0 - 1999 10.5 9.6 8.8 8.5 6.9 7.1 5.0 5.4 3.0 3.4 1.3 1.4 0.0 0.0 - 2000 10.3 9.3 8.6 8.3 6.8 6.9 4.9 5.2 3.0 3.3 1.3 1.4 0.0 0.0 - 2001 10.0 9.0 8.5 8.0 6.6 6.6 4.8 5.0 2.9 3.2 1.2 1.3 0.0 0.0 - 2002 9.8 8.8 8.3 7.8 6.5 6.4 4.7 4.8 2.9 3.1 1.2 1.3 0.0 0.0 - 2003 9.5 8.5 8.1 7.6 6.3 6.2 4.5 4.7 2.8 2.9 1.2 1.2 0.0 0.0 - 2004 9.3 8.2 7.8 7.3 6.1 6.0 4.4 4.5 2.7 2.8 1.1 1.2 0.0 0.0 - 2005 9.0 8.0 7.6 7.1 5.9 5.9 4.3 4.3 2.6 2.7 1.1 1.1 0.0 0.0 - 2006 8.7 7.8 7.4 6.9 5.8 5.7 4.1 4.2 2.5 2.6 1.0 1.1 0.0 0.0 - 2007 8.4 7.5 7.1 6.7 5.6 5.5 4.0 4.1 2.4 2.5 1.0 1.1 0.0 0.0 - 2008 8.1 7.3 6.9 6.6 5.4 5.4 3.8 3.9 2.3 2.4 1.0 1.0 0.0 0.0 - 2009 7.8 7.1 6.7 6.4 5.2 5.2 3.7 3.8 2.3 2.3 0.9 1.0 0.0 0.0 - 2010 7.6 7.0 6.5 6.3 5.1 5.1 3.6 3.7 2.2 2.2 0.9 0.9 0.0 0.0 - 2011 7.4 6.8 6.4 6.1 5.0 5.0 3.5 3.6 2.2 2.2 0.9 0.9 0.0 0.0 - 2012 7.3 6.6 6.3 6.0 4.9 4.9 3.5 3.5 2.1 2.1 0.9 0.9 0.0 0.0 - 2013 7.1 6.5 6.1 5.9 4.8 4.8 3.4 3.4 2.1 2.0 0.9 0.8 0.0 0.0 - 2014 7.0 6.3 6.0 5.8 4.8 4.7 3.4 3.4 2.0 2.0 0.8 0.8 0.0 0.0 - 2015 6.8 6.2 5.9 5.6 4.7 4.6 3.3 3.3 2.0 1.9 0.8 0.8 0.0 0.0 - 2016 6.7 6.1 5.8 5.5 4.6 4.6 3.3 3.2 2.0 1.9 0.8 0.7 0.0 0.0 -Sus 1996 10.7 10.1 8.9 9.0 7.1 7.6 5.2 5.9 3.2 3.8 1.4 1.6 0.0 0.0 - 1997 10.5 9.8 8.8 8.8 7.0 7.4 5.1 5.7 3.2 3.7 1.4 1.5 0.0 0.0 - 1998 10.3 9.6 8.7 8.5 6.9 7.2 5.1 5.5 3.2 3.6 1.4 1.5 0.0 0.0 - 1999 10.2 9.3 8.5 8.3 6.8 7.0 5.0 5.4 3.1 3.5 1.4 1.5 0.0 0.0 - 2000 10.0 9.0 8.4 8.1 6.7 6.8 4.9 5.2 3.1 3.4 1.4 1.4 0.0 0.0 - 2001 9.7 8.8 8.2 7.8 6.5 6.6 4.8 5.0 3.1 3.3 1.3 1.4 0.0 0.0 - 2002 9.5 8.5 8.1 7.6 6.4 6.4 4.7 4.9 3.0 3.2 1.3 1.4 0.0 0.0 - 2003 9.2 8.3 7.8 7.4 6.2 6.2 4.6 4.7 2.9 3.1 1.3 1.3 0.0 0.0 - 2004 8.9 8.0 7.6 7.2 6.0 5.9 4.5 4.5 2.9 3.0 1.2 1.3 0.0 0.0 - 2005 8.6 7.7 7.3 6.9 5.8 5.8 4.3 4.4 2.8 2.8 1.2 1.2 0.0 0.0 - 2006 8.3 7.5 7.1 6.7 5.6 5.6 4.2 4.2 2.7 2.7 1.1 1.2 0.0 0.0 - 2007 8.0 7.2 6.8 6.5 5.4 5.4 4.0 4.1 2.6 2.6 1.1 1.1 0.0 0.0 - 2008 7.7 7.0 6.6 6.3 5.2 5.2 3.9 3.9 2.5 2.5 1.1 1.1 0.0 0.0 - 2009 7.4 6.8 6.4 6.2 5.1 5.1 3.8 3.8 2.4 2.4 1.0 1.1 0.0 0.0 - 2010 7.3 6.6 6.2 6.0 5.0 5.0 3.7 3.7 2.4 2.4 1.0 1.0 0.0 0.0 - 2011 7.1 6.5 6.2 5.9 4.9 4.9 3.7 3.7 2.4 2.3 1.0 1.0 0.0 0.0 - 2012 7.0 6.4 6.1 5.8 4.9 4.9 3.6 3.6 2.3 2.3 1.0 1.0 0.0 0.0 - 2013 7.0 6.3 6.0 5.8 4.9 4.8 3.6 3.6 2.3 2.2 1.0 0.9 0.0 0.0 - 2014 6.9 6.3 6.0 5.7 4.9 4.8 3.6 3.5 2.3 2.2 1.0 0.9 0.0 0.0 - 2015 6.8 6.2 6.0 5.7 4.9 4.8 3.6 3.5 2.3 2.1 1.0 0.9 0.0 0.0 - 2016 6.8 6.1 6.0 5.6 4.9 4.7 3.6 3.5 2.3 2.1 1.0 0.8 0.0 0.0 +Imm 1996 11.3 10.2 9.4 9.1 7.4 7.6 5.4 5.9 3.4 3.8 1.5 1.6 0.0 0.0 + 1997 11.1 9.9 9.2 8.9 7.3 7.4 5.3 5.7 3.3 3.7 1.4 1.5 0.0 0.0 + 1998 10.9 9.7 9.1 8.6 7.2 7.2 5.3 5.5 3.3 3.6 1.4 1.5 0.0 0.0 + 1999 10.7 9.4 9.0 8.4 7.1 7.0 5.2 5.3 3.3 3.5 1.4 1.5 0.0 0.0 + 2000 10.5 9.1 8.8 8.2 6.9 6.8 5.1 5.1 3.2 3.4 1.4 1.4 0.0 0.0 + 2001 10.3 8.9 8.6 7.9 6.8 6.6 5.0 5.0 3.1 3.2 1.3 1.4 0.0 0.0 + 2002 10.0 8.6 8.4 7.7 6.6 6.3 4.9 4.8 3.1 3.1 1.3 1.3 0.0 0.0 + 2003 9.7 8.3 8.2 7.4 6.4 6.1 4.7 4.6 3.0 3.0 1.3 1.3 0.0 0.0 + 2004 9.4 8.0 7.9 7.1 6.2 5.9 4.5 4.4 2.8 2.8 1.2 1.2 0.0 0.0 + 2005 9.0 7.6 7.6 6.9 6.0 5.6 4.4 4.1 2.7 2.6 1.1 1.1 0.0 0.0 + 2006 8.6 7.3 7.4 6.6 5.8 5.4 4.2 3.9 2.6 2.5 1.1 1.1 0.0 0.0 + 2007 8.3 7.0 7.1 6.3 5.5 5.2 4.0 3.8 2.5 2.4 1.0 1.0 0.0 0.0 + 2008 8.0 6.8 6.8 6.1 5.4 5.0 3.8 3.6 2.4 2.2 1.0 0.9 0.0 0.0 + 2009 7.7 6.6 6.6 6.0 5.2 4.9 3.7 3.5 2.3 2.1 0.9 0.9 0.0 0.0 + 2010 7.5 6.4 6.5 5.8 5.1 4.8 3.7 3.4 2.2 2.1 0.9 0.9 0.0 0.0 + 2011 7.4 6.3 6.4 5.8 5.1 4.7 3.6 3.4 2.2 2.0 0.9 0.8 0.0 0.0 + 2012 7.3 6.3 6.4 5.7 5.1 4.7 3.6 3.3 2.2 2.0 0.9 0.8 0.0 0.0 + 2013 7.3 6.2 6.4 5.7 5.1 4.7 3.6 3.3 2.2 2.0 0.9 0.8 0.0 0.0 + 2014 7.3 6.2 6.4 5.7 5.1 4.7 3.7 3.4 2.2 2.0 0.9 0.8 0.0 0.0 + 2015 7.3 6.2 6.4 5.7 5.2 4.8 3.7 3.4 2.3 2.0 0.9 0.8 0.0 0.0 + 2016 7.3 6.2 6.4 5.7 5.2 4.8 3.8 3.4 2.3 2.0 0.9 0.8 0.0 0.0 +Tot 1996 10.7 9.8 8.8 8.7 6.9 7.2 5.0 5.5 3.1 3.5 1.3 1.4 0.0 0.0 + 1997 10.5 9.5 8.6 8.4 6.7 7.0 4.9 5.3 3.0 3.4 1.3 1.4 0.0 0.0 + 1998 10.3 9.2 8.5 8.2 6.6 6.8 4.8 5.1 3.0 3.3 1.3 1.4 0.0 0.0 + 1999 10.0 8.9 8.3 7.9 6.5 6.5 4.7 4.9 2.9 3.2 1.3 1.3 0.0 0.0 + 2000 9.8 8.7 8.1 7.7 6.3 6.3 4.6 4.7 2.9 3.1 1.2 1.3 0.0 0.0 + 2001 9.6 8.4 7.9 7.4 6.2 6.1 4.5 4.6 2.8 2.9 1.2 1.2 0.0 0.0 + 2002 9.3 8.1 7.7 7.2 6.0 5.9 4.3 4.4 2.7 2.8 1.2 1.2 0.0 0.0 + 2003 9.0 7.8 7.5 6.9 5.8 5.6 4.2 4.2 2.6 2.7 1.1 1.1 0.0 0.0 + 2004 8.6 7.5 7.2 6.6 5.6 5.4 4.0 4.0 2.5 2.5 1.1 1.1 0.0 0.0 + 2005 8.3 7.1 6.9 6.4 5.3 5.1 3.8 3.7 2.4 2.4 1.0 1.0 0.0 0.0 + 2006 7.9 6.8 6.6 6.1 5.1 4.9 3.6 3.6 2.2 2.2 0.9 0.9 0.0 0.0 + 2007 7.6 6.5 6.4 5.9 4.9 4.7 3.4 3.4 2.1 2.1 0.9 0.9 0.0 0.0 + 2008 7.3 6.3 6.1 5.6 4.7 4.5 3.3 3.2 2.0 2.0 0.8 0.8 0.0 0.0 + 2009 7.0 6.1 5.9 5.5 4.5 4.4 3.2 3.1 1.9 1.9 0.8 0.8 0.0 0.0 + 2010 6.8 5.9 5.8 5.4 4.4 4.3 3.1 3.0 1.8 1.8 0.8 0.7 0.0 0.0 + 2011 6.7 5.8 5.7 5.3 4.4 4.3 3.0 3.0 1.8 1.7 0.7 0.7 0.0 0.0 + 2012 6.6 5.8 5.6 5.2 4.3 4.2 3.0 2.9 1.8 1.7 0.7 0.7 0.0 0.0 + 2013 6.5 5.7 5.6 5.2 4.3 4.2 3.0 2.9 1.8 1.7 0.7 0.7 0.0 0.0 + 2014 6.5 5.7 5.6 5.2 4.4 4.2 3.0 2.9 1.8 1.7 0.7 0.7 0.0 0.0 + 2015 6.4 5.6 5.6 5.2 4.4 4.2 3.1 2.9 1.8 1.7 0.7 0.7 0.0 0.0 + 2016 6.4 5.6 5.6 5.2 4.4 4.3 3.1 3.0 1.8 1.7 0.8 0.7 0.0 0.0 +Sus 1996 10.3 9.4 8.4 8.4 6.6 7.0 5.0 5.5 3.2 3.6 1.4 1.5 0.0 0.0 + 1997 10.1 9.2 8.3 8.2 6.5 6.8 4.9 5.3 3.1 3.5 1.4 1.5 0.0 0.0 + 1998 9.9 8.9 8.1 7.9 6.4 6.6 4.8 5.1 3.1 3.4 1.4 1.5 0.0 0.0 + 1999 9.7 8.6 8.0 7.7 6.3 6.4 4.7 4.9 3.1 3.3 1.4 1.4 0.0 0.0 + 2000 9.4 8.3 7.8 7.4 6.1 6.2 4.6 4.7 3.0 3.2 1.3 1.4 0.0 0.0 + 2001 9.2 8.1 7.6 7.2 6.0 5.9 4.5 4.5 2.9 3.0 1.3 1.3 0.0 0.0 + 2002 8.9 7.8 7.4 6.9 5.8 5.7 4.4 4.4 2.8 2.9 1.3 1.3 0.0 0.0 + 2003 8.6 7.5 7.2 6.7 5.6 5.5 4.2 4.2 2.7 2.8 1.2 1.2 0.0 0.0 + 2004 8.3 7.1 6.9 6.4 5.4 5.3 4.0 4.0 2.6 2.6 1.2 1.2 0.0 0.0 + 2005 7.9 6.8 6.6 6.1 5.2 5.0 3.8 3.8 2.5 2.5 1.1 1.1 0.0 0.0 + 2006 7.5 6.5 6.3 5.9 5.0 4.8 3.7 3.6 2.4 2.3 1.0 1.0 0.0 0.0 + 2007 7.2 6.3 6.1 5.6 4.7 4.6 3.5 3.4 2.3 2.2 1.0 1.0 0.0 0.0 + 2008 6.9 6.0 5.9 5.5 4.6 4.5 3.4 3.3 2.2 2.1 0.9 0.9 0.0 0.0 + 2009 6.7 5.8 5.7 5.3 4.4 4.3 3.3 3.2 2.1 2.0 0.9 0.9 0.0 0.0 + 2010 6.5 5.7 5.6 5.2 4.4 4.3 3.2 3.1 2.0 1.9 0.9 0.8 0.0 0.0 + 2011 6.4 5.6 5.5 5.1 4.3 4.2 3.2 3.1 2.0 1.9 0.9 0.8 0.0 0.0 + 2012 6.4 5.6 5.5 5.1 4.4 4.2 3.2 3.1 2.0 1.9 0.9 0.8 0.0 0.0 + 2013 6.4 5.6 5.5 5.1 4.4 4.3 3.2 3.1 2.0 1.9 0.9 0.8 0.0 0.0 + 2014 6.4 5.6 5.5 5.2 4.5 4.3 3.3 3.1 2.1 1.9 0.9 0.8 0.0 0.0 + 2015 6.4 5.6 5.6 5.2 4.5 4.4 3.3 3.1 2.1 1.9 0.9 0.8 0.0 0.0 + 2016 6.4 5.6 5.6 5.2 4.6 4.4 3.4 3.2 2.1 1.9 0.9 0.8 0.0 0.0 \end{Soutput} \end{Schunk} We now have the relevant points for the graph showing YLL to diabetes @@ -671,7 +671,7 @@ rownames(surv) <- NULL return(surv) } - + \end{Soutput} \end{Schunk} @@ -690,6 +690,7 @@ cbind(age = age, surv = surv, erl = c(musmuc((surv[-1] - diff(surv)/2))/surv[-length(surv)], 0) * int) } + \end{Soutput} \end{Schunk} @@ -749,7 +750,7 @@ colnames(surv)[-1] <- paste("A", c(age.in, Al), sep = "") return(surv) } - + \end{Soutput} \end{Schunk} @@ -801,7 +802,7 @@ } return(erl) } - + \end{Soutput} \end{Schunk} @@ -816,6 +817,7 @@ note = TRUE) erl(int = int, muW = muW, muD = muD, lam = lam, age.in = age.in, A = A, immune = immune, yll = TRUE, note = note)[, "YLL"] + \end{Soutput} \end{Schunk} Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/yll-tot.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/yll-tot.pdf differ Binary files /tmp/tmp9nPJ8m/L6Hcya0i23/r-cran-epi-2.32/vignettes/yl.pdf and /tmp/tmp9nPJ8m/CwTzu5oAY4/r-cran-epi-2.37/vignettes/yl.pdf differ diff -Nru r-cran-epi-2.32/vignettes/yl.tex r-cran-epi-2.37/vignettes/yl.tex --- r-cran-epi-2.32/vignettes/yl.tex 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-epi-2.37/vignettes/yl.tex 2019-05-23 08:15:33.000000000 +0000 @@ -0,0 +1,908 @@ + +%\VignetteIndexEntry{Years of life lost (YLL)} +\documentclass[a4paper,twoside,12pt]{report} + +\newcommand{\Title}{Years of Life Lost (YLL) + to disease\\Diabetes in DK as example} +\newcommand{\Tit}{YLL} +\newcommand{\Version}{February 1.2} +\newcommand{\Dates}{November 2017} +\newcommand{\Where}{SDC} +\newcommand{\Homepage}{\url{http://bendixcarstensen.com/Epi}} +\newcommand{\Faculty}{\begin{tabular}{rl} +Bendix Carstensen + & Steno Diabetes Center, Gentofte, Denmark\\ + & {\small \& Department of Biostatistics, + University of Copenhagen} \\ + & \texttt{b@bxc.dk}\\ + & \url{http://BendixCarstensen.com} \\[1em] + \end{tabular}} +\input{topreport} +\renewcommand{\rwpre}{./yll} + +\chapter{Theory and technicalities} + +This vignette for the \texttt{Epi} package describes the +probabilistic and demographic background for and technical implementation +of the \texttt{erl} and \texttt{yll} functions that computes the +expected residual life time and years of life lost in an illness-death +model. + +\section{Years of life lost (YLL)} + +\ldots to diabetes or any other disease for that matter. + +The general concept in calculation of ``years lost to\ldots'' is the +comparison of the expected lifetime between two groups of persons; one +with and one without disease (in this example DM). The expected +lifetime is the area under the survival curve, so basically the +exercise requires that two survival curves that are deemed relevant be +available. + +The years of life lost is therefore just the area between the survival +curves for those ``Well'', $S_W(t)$, and for those ``Diseased'', +$S_D(t)$: +\[ + \YLL = \int_0^\infty S_W(t) - S_D(t) \dif t +\] +The time $t$ could of course be age, but it could also be ``time after +age 50'' and the survival curves compared would then be survival +curves \emph{conditional} on survival till age 50, and the YLL would +be the years of life lost for a 50-year old person with diabetes. + +If we are referring to the expected lifetime we will more precisely use +the label expected residual lifetime, ERL. + +\section{Constructing the survival curves} + +YLL can be computed in two different ways, depending on the way the +survival curve and hence the expected lifetime of a person +\emph{without} diabetes is computed: +\begin{itemize} +\item Assume that the ``Well'' persons are \emph{immune} to disease + --- using only the non-DM mortality rates throughout for calculation + of expected life time. +\item Assume that the ``Well'' persons \emph{can} acquire the disease and + thereby see an increased mortality, thus involving all three rates + shown in figure \ref{fig:states}. +\end{itemize} +The former gives a higher YLL because the comparison is to persons +assumed immune to DM (and yet with the same mortality as non-immune +prior to diagnosis), the latter gives a more realistic picture of the +comparison of group of persons with and without diabetes at a given +age that can be interpreted in the real world. + +The differences can be illustrated by figure \ref{fig:states}; the +immune approach corresponds to an assumption of $\lambda(t)=0$ in the +calculation of the survival curve for a person in the ``Well'' state. + +Calculation of the survival of a diseased person already in the ``DM'' +state is unaffected by assumptions about $\lambda$. + +\insfig{states}{0.7}{Illness-death model describing diabetes incidence + and -mortality.} + +\subsection{Total mortality --- a shortcut?} + +A practical crude shortcut could be to compare the ERL in the diabetic +population to the ERL for the \emph{entire} population (that is use +the total mortality ignoring diabetes status). + +Note however that this approach also counts the mortality of persons +that acquired the disease earlier, thus making the comparison +population on average more ill than the population we aim at, namely +those well at a given time, which only then become more gradually ill. + +How large these effects are can however be empirically explored, as we +shall do later. + +\subsection{Disease duration} + +In the exposition above there is no explicit provision for the effect of +disease duration, but if we were able to devise mortality rates for +any combination of age and duration, this could be taken into account. + +There are however severe limitations in this as we in principle would +want to have duration effects as long as the age-effects --- in +principle for all $(a,d)$ where $d\leq A$, where $A$ is the age at +which we condition. So even if we were only to compute ERL from +age, say, 40 we would still need duration effects up to 60 years +(namely to age 100). + +The incorporation of duration effects is in principle trivial from a +computational point of view, but we would be forced to entertain +models predicting duration effects way beyond what is actually +observed disease duration in any practical case. + +\subsection{Computing integrals} + +The practical calculations of survival curves, ERL and YLL involves +calculation of (cumulative) integrals of rates and functions of these +as we shall see below. This is easy if we have a closed form +expression of the function, so its value may be computed at any time +point --- this will be the case if we model rates by smooth parametric +functions. + +Computing the (cumulative) integral of a function is done as follows: +\begin{itemize} +\item Compute the value of the function (mortality rate for example) + at the midpoints of a sequence of narrow equidistant intervals --- + for example one- or three month intervals of age, say. +\item Take the cumulative sum of these values multiplied by the + interval length --- this will be a very close approximation to the + cumulative integral evaluated at the end of each interval. +\item If the intervals are really small (like 1/100 year), the + distinction between the value at the middle and at the end of each + interval becomes irrelevant. +\end{itemize} +Note that in the above it is assumed that the rates are given in units +corresponding to the interval length --- or more precisely, as the +cumulative rates over the interval. + +\section{Survival functions in the illness-death model} + +The survival functions for persons in the ``Well'' state can be +computed under two fundamentally different scenarios, depending on +whether persons in the ``Well'' state are assumed to be immune to the +disease ($\lambda(a)=0$) or not. + +\subsection{Immune approach} + +In this case both survival functions for person in the two states are +the usual simple transformation of the cumulative mortality rates: +\[ + S_W(a) = \exp\left(-\int_0^a\!\!\mu_W(u) \dif u \right), \qquad + S_D(a) = \exp\left(-\int_0^a\!\!\mu_D(u) \dif u \right) +\] + +\subsubsection{Conditional survival functions} + +If we want the \emph{conditional} survival functions given survival to +age $A$, say, they are just: +\[ + S_W(a|A) = S_W(a)/S_W(A), \qquad S_D(a|A) = S_D(a)/S_D(A) +\] + +\subsection{Non-immune approach} + +For a diseased person, the survival function in this states is the same +as above, but the survival function for a person without disease (at +age 0) is (see figure \ref{fig:states}): +\[ +S(a) = \ptxt{Well}\!(a) + \ptxt{DM}\!(a) +\] +In the appendix of the paper \cite{Carstensen.2008c} is an indication +of how to compute the probability of being in any of the four states +shown in figure \ref{fig:states}, which I shall repeat here: + +In terms of the rates, the probability of being in the ``Well'' box is +simply the probability of escaping both death (at a rate of $\mu_W(a)$) +and diabetes (at a rate of $\lambda(a)$): +\[ + \ptxt{Well}(a) = \exp\left(\!-\int_0^a\!\!\mu_W(u)+\lambda(u) \right) \dif u +\] +The probability of being alive with diabetes at age $a$, is computed given that + diabetes occurred at age $s$ ($s data( DMepi ) +\end{Sinput} +\end{Schunk} +The dataset \texttt{DMepi} contains diabetes events, deaths and +person-years for persons without diabetes and deaths and person-years +for persons with diabetes: +\begin{Schunk} +\begin{Sinput} +> str( DMepi ) +\end{Sinput} +\begin{Soutput} +'data.frame': 4200 obs. of 8 variables: + $ sex : Factor w/ 2 levels "M","F": 1 1 1 1 1 1 1 1 1 1 ... + $ A : num 0 0 0 0 0 0 0 0 0 0 ... + $ P : num 1996 1997 1998 1999 2000 ... + $ D.DM: num 0 0 0 0 0 0 0 0 0 0 ... + $ Y.DM: num 0.484 0.64 1.641 0.552 2.507 ... + $ X : num 1 2 4 4 4 1 1 3 4 1 ... + $ D.nD: num 28 19 20 11 21 16 21 15 16 16 ... + $ Y.nD: num 35469 35085 34240 34056 34002 ... +\end{Soutput} +\begin{Sinput} +> head( DMepi ) +\end{Sinput} +\begin{Soutput} + sex A P D.DM Y.DM X D.nD Y.nD +2 M 0 1996 0 0.4839151 1 28 35468.92 +3 M 0 1997 0 0.6399726 2 19 35085.18 +4 M 0 1998 0 1.6406571 4 20 34240.14 +5 M 0 1999 0 0.5523614 4 11 34055.52 +6 M 0 2000 0 2.5065024 4 21 34002.22 +7 M 0 2001 0 0.1184120 1 16 34177.39 +\end{Soutput} +\end{Schunk} +For each combination of sex, age, period and date of birth in 1 year +age groups, we have the person-years in the ``Well'' (\texttt{Y.nD}) +and the ``DM'' (\texttt{Y.DM}) states, as well as the number of deaths +from these (\texttt{D.nD}, \texttt{D.DM}) and the number of incident +diabetes cases from the ``Well'' state (\texttt{X}). + +In order to compute the years of life lost to diabetes and how this +has changed over time, we fit models for the mortality and incidence +of both groups (and of course, separately for men and women). The +models we use will be age-period-cohort models \cite{Carstensen.2007a} +providing estimated mortality rates for ages 0--99 and dates +1.1.1996--1.1.2016. + +First we transform the age and period variables to reflect the mean +age and period in each of the Lexis triangles. We also compute the +total number of deaths and amount of risk time, as we are going to +model the total mortality as well. Finally we restrict the dataset to +ages over 30 only: +\begin{Schunk} +\begin{Sinput} +> DMepi <- transform( subset( DMepi, A>30 ), ++ D.T = D.nD + D.DM, ++ Y.T = Y.nD + Y.DM ) +> head(DMepi) +\end{Sinput} +\begin{Soutput} + sex A P D.DM Y.DM X D.nD Y.nD D.T Y.T +684 M 31 1996 0 305.9671 29 51 44161.83 51 44467.80 +685 M 31 1997 2 326.2074 31 54 45508.64 56 45834.85 +686 M 31 1998 2 340.1759 34 46 44954.45 48 45294.63 +687 M 31 1999 5 330.9918 24 39 41148.97 44 41479.96 +688 M 31 2000 1 332.0876 41 26 39027.30 27 39359.39 +689 M 31 2001 2 310.8467 35 33 37975.78 35 38286.63 +\end{Soutput} +\end{Schunk} +With the correct age and period coding in the Lexis triangles, we fit +models for the mortalities and incidences. Note that we for +comparative purposes also fit a model for the \emph{total} mortality, +ignoring the +\begin{Schunk} +\begin{Sinput} +> # Knots used in all models +> ( a.kn <- seq(40,95,,6) ) +\end{Sinput} +\begin{Soutput} +[1] 40 51 62 73 84 95 +\end{Soutput} +\begin{Sinput} +> ( p.kn <- seq(1997,2015,,4) ) +\end{Sinput} +\begin{Soutput} +[1] 1997 2003 2009 2015 +\end{Soutput} +\begin{Sinput} +> ( c.kn <- seq(1910,1976,,6) ) +\end{Sinput} +\begin{Soutput} +[1] 1910.0 1923.2 1936.4 1949.6 1962.8 1976.0 +\end{Soutput} +\begin{Sinput} +> # Check the number of events between knots +> ae <- xtabs( cbind(D.nD,D.DM,X) ~ cut(A,c(30,a.kn,Inf)) + sex, data=DMepi ) +> ftable( addmargins(ae,1), col.vars=3:2 ) +\end{Sinput} +\begin{Soutput} + D.nD D.DM X + sex M F M F M F +cut(A, c(30, a.kn, Inf)) +(30,40] 9135 4650 597 277 12080 9648 +(40,51] 25535 15784 2947 1439 35445 23925 +(51,62] 59698 40171 10838 5253 60539 40034 +(62,73] 106519 81253 26197 14299 55908 44221 +(73,84] 158365 156678 34510 28834 27985 30381 +(84,95] 100880 179466 16194 25317 5272 8967 +(95,Inf] 6095 21414 640 1928 90 288 +Sum 466227 499416 91923 77347 197319 157464 +\end{Soutput} +\begin{Sinput} +> pe <- xtabs( cbind(D.nD,D.DM,X) ~ cut(P,c(1990,p.kn,Inf)) + sex, data=DMepi ) +> ftable( addmargins(pe,1), col.vars=3:2 ) +\end{Sinput} +\begin{Soutput} + D.nD D.DM X + sex M F M F M F +cut(P, c(1990, p.kn, Inf)) +(1990,1997] 51569 53567 6407 5990 13175 10960 +(1997,2003] 144136 155695 22390 19922 49805 41277 +(2003,2009] 131159 141782 26079 22249 60554 48066 +(2009,2015] 119812 127714 31285 24704 63440 49457 +(2015,Inf] 19551 20658 5762 4482 10345 7704 +Sum 466227 499416 91923 77347 197319 157464 +\end{Soutput} +\begin{Sinput} +> ce <- xtabs( cbind(D.nD,D.DM,X) ~ cut(P-A,c(-Inf,c.kn,Inf)) + sex, data=DMepi ) +> ftable( addmargins(ce,1), col.vars=3:2 ) +\end{Sinput} +\begin{Soutput} + D.nD D.DM X + sex M F M F M F +cut(P - A, c(-Inf, c.kn, Inf)) +(-Inf,1.91e+03] 19679 49020 2004 4446 599 1416 +(1.91e+03,1.92e+03] 129799 189192 19780 26870 10665 15275 +(1.92e+03,1.94e+03] 158953 152454 35667 28471 37850 36659 +(1.94e+03,1.95e+03] 99058 72432 25494 13074 71489 50631 +(1.95e+03,1.96e+03] 44129 28315 7534 3731 51750 34255 +(1.96e+03,1.98e+03] 13598 7484 1373 720 22942 17689 +(1.98e+03, Inf] 1011 519 71 35 2024 1539 +Sum 466227 499416 91923 77347 197319 157464 +\end{Soutput} +\begin{Sinput} +> # Fit an APC-model for all transitions, seperately for men and women +> mW.m <- glm( D.nD ~ -1 + Ns(A ,knots=a.kn,int=TRUE) + ++ Ns( P,knots=p.kn,ref=2005) + ++ Ns(P-A,knots=c.kn,ref=1950), ++ offset = log(Y.nD), ++ family = poisson, ++ data = subset( DMepi, sex=="M" ) ) +> mD.m <- update( mW.m, D.DM ~ . , offset=log(Y.DM) ) +> mT.m <- update( mW.m, D.T ~ . , offset=log(Y.T ) ) +> lW.m <- update( mW.m, X ~ . ) +> # Model for women +> mW.f <- update( mW.m, data = subset( DMepi, sex=="F" ) ) +> mD.f <- update( mD.m, data = subset( DMepi, sex=="F" ) ) +> mT.f <- update( mT.m, data = subset( DMepi, sex=="F" ) ) +> lW.f <- update( lW.m, data = subset( DMepi, sex=="F" ) ) +\end{Sinput} +\end{Schunk} + +\section{Residual life time and years lost to DM} + +We now collect the estimated years of life lost classified by method +(immune assumption or not), sex, age and calendar time: +\begin{Schunk} +\begin{Sinput} +> a.ref <- 30:90 +> p.ref <- 1996:2016 +> aYLL <- NArray( list( type = c("Imm","Tot","Sus"), ++ sex = levels( DMepi$sex ), ++ age = a.ref, ++ date = p.ref ) ) +> str( aYLL ) +\end{Sinput} +\begin{Soutput} + logi [1:3, 1:2, 1:61, 1:21] NA NA NA NA NA NA ... + - attr(*, "dimnames")=List of 4 + ..$ type: chr [1:3] "Imm" "Tot" "Sus" + ..$ sex : chr [1:2] "M" "F" + ..$ age : chr [1:61] "30" "31" "32" "33" ... + ..$ date: chr [1:21] "1996" "1997" "1998" "1999" ... +\end{Soutput} +\begin{Sinput} +> system.time( ++ for( ip in p.ref ) ++ { ++ nd <- data.frame( A = seq(30,90,0.2)+0.1, ++ P = ip, ++ Y.nD = 1, ++ Y.DM = 1, ++ Y.T = 1 ) ++ muW.m <- ci.pred( mW.m, nd )[,1] ++ muD.m <- ci.pred( mD.m, nd )[,1] ++ muT.m <- ci.pred( mT.m, nd )[,1] ++ lam.m <- ci.pred( lW.m, nd )[,1] ++ muW.f <- ci.pred( mW.f, nd )[,1] ++ muD.f <- ci.pred( mD.f, nd )[,1] ++ muT.f <- ci.pred( mT.f, nd )[,1] ++ lam.f <- ci.pred( lW.f, nd )[,1] ++ aYLL["Imm","M",,paste(ip)] <- yll( int=0.2, muW.m, muD.m, lam=NULL, ++ A=a.ref, age.in=30, note=FALSE )[-1] ++ aYLL["Imm","F",,paste(ip)] <- yll( int=0.2, muW.f, muD.f, lam=NULL, ++ A=a.ref, age.in=30, note=FALSE )[-1] ++ aYLL["Tot","M",,paste(ip)] <- yll( int=0.2, muT.m, muD.m, lam=NULL, ++ A=a.ref, age.in=30, note=FALSE )[-1] ++ aYLL["Tot","F",,paste(ip)] <- yll( int=0.2, muT.f, muD.f, lam=NULL, ++ A=a.ref, age.in=30, note=FALSE )[-1] ++ aYLL["Sus","M",,paste(ip)] <- yll( int=0.2, muW.m, muD.m, lam=lam.m, ++ A=a.ref, age.in=30, note=FALSE )[-1] ++ aYLL["Sus","F",,paste(ip)] <- yll( int=0.2, muW.f, muD.f, lam=lam.f, ++ A=a.ref, age.in=30, note=FALSE )[-1] ++ } ) +\end{Sinput} +\begin{Soutput} + user system elapsed + 16.510 8.330 14.696 +\end{Soutput} +\begin{Sinput} +> round( ftable( aYLL[,,seq(1,61,10),], col.vars=c(3,2) ), 1 ) +\end{Sinput} +\begin{Soutput} + age 30 40 50 60 70 80 90 + sex M F M F M F M F M F M F M F +type date +Imm 1996 11.3 10.2 9.4 9.1 7.4 7.6 5.4 5.9 3.4 3.8 1.5 1.6 0.0 0.0 + 1997 11.1 9.9 9.2 8.9 7.3 7.4 5.3 5.7 3.3 3.7 1.4 1.5 0.0 0.0 + 1998 10.9 9.7 9.1 8.6 7.2 7.2 5.3 5.5 3.3 3.6 1.4 1.5 0.0 0.0 + 1999 10.7 9.4 9.0 8.4 7.1 7.0 5.2 5.3 3.3 3.5 1.4 1.5 0.0 0.0 + 2000 10.5 9.1 8.8 8.2 6.9 6.8 5.1 5.1 3.2 3.4 1.4 1.4 0.0 0.0 + 2001 10.3 8.9 8.6 7.9 6.8 6.6 5.0 5.0 3.1 3.2 1.3 1.4 0.0 0.0 + 2002 10.0 8.6 8.4 7.7 6.6 6.3 4.9 4.8 3.1 3.1 1.3 1.3 0.0 0.0 + 2003 9.7 8.3 8.2 7.4 6.4 6.1 4.7 4.6 3.0 3.0 1.3 1.3 0.0 0.0 + 2004 9.4 8.0 7.9 7.1 6.2 5.9 4.5 4.4 2.8 2.8 1.2 1.2 0.0 0.0 + 2005 9.0 7.6 7.6 6.9 6.0 5.6 4.4 4.1 2.7 2.6 1.1 1.1 0.0 0.0 + 2006 8.6 7.3 7.4 6.6 5.8 5.4 4.2 3.9 2.6 2.5 1.1 1.1 0.0 0.0 + 2007 8.3 7.0 7.1 6.3 5.5 5.2 4.0 3.8 2.5 2.4 1.0 1.0 0.0 0.0 + 2008 8.0 6.8 6.8 6.1 5.4 5.0 3.8 3.6 2.4 2.2 1.0 0.9 0.0 0.0 + 2009 7.7 6.6 6.6 6.0 5.2 4.9 3.7 3.5 2.3 2.1 0.9 0.9 0.0 0.0 + 2010 7.5 6.4 6.5 5.8 5.1 4.8 3.7 3.4 2.2 2.1 0.9 0.9 0.0 0.0 + 2011 7.4 6.3 6.4 5.8 5.1 4.7 3.6 3.4 2.2 2.0 0.9 0.8 0.0 0.0 + 2012 7.3 6.3 6.4 5.7 5.1 4.7 3.6 3.3 2.2 2.0 0.9 0.8 0.0 0.0 + 2013 7.3 6.2 6.4 5.7 5.1 4.7 3.6 3.3 2.2 2.0 0.9 0.8 0.0 0.0 + 2014 7.3 6.2 6.4 5.7 5.1 4.7 3.7 3.4 2.2 2.0 0.9 0.8 0.0 0.0 + 2015 7.3 6.2 6.4 5.7 5.2 4.8 3.7 3.4 2.3 2.0 0.9 0.8 0.0 0.0 + 2016 7.3 6.2 6.4 5.7 5.2 4.8 3.8 3.4 2.3 2.0 0.9 0.8 0.0 0.0 +Tot 1996 10.7 9.8 8.8 8.7 6.9 7.2 5.0 5.5 3.1 3.5 1.3 1.4 0.0 0.0 + 1997 10.5 9.5 8.6 8.4 6.7 7.0 4.9 5.3 3.0 3.4 1.3 1.4 0.0 0.0 + 1998 10.3 9.2 8.5 8.2 6.6 6.8 4.8 5.1 3.0 3.3 1.3 1.4 0.0 0.0 + 1999 10.0 8.9 8.3 7.9 6.5 6.5 4.7 4.9 2.9 3.2 1.3 1.3 0.0 0.0 + 2000 9.8 8.7 8.1 7.7 6.3 6.3 4.6 4.7 2.9 3.1 1.2 1.3 0.0 0.0 + 2001 9.6 8.4 7.9 7.4 6.2 6.1 4.5 4.6 2.8 2.9 1.2 1.2 0.0 0.0 + 2002 9.3 8.1 7.7 7.2 6.0 5.9 4.3 4.4 2.7 2.8 1.2 1.2 0.0 0.0 + 2003 9.0 7.8 7.5 6.9 5.8 5.6 4.2 4.2 2.6 2.7 1.1 1.1 0.0 0.0 + 2004 8.6 7.5 7.2 6.6 5.6 5.4 4.0 4.0 2.5 2.5 1.1 1.1 0.0 0.0 + 2005 8.3 7.1 6.9 6.4 5.3 5.1 3.8 3.7 2.4 2.4 1.0 1.0 0.0 0.0 + 2006 7.9 6.8 6.6 6.1 5.1 4.9 3.6 3.6 2.2 2.2 0.9 0.9 0.0 0.0 + 2007 7.6 6.5 6.4 5.9 4.9 4.7 3.4 3.4 2.1 2.1 0.9 0.9 0.0 0.0 + 2008 7.3 6.3 6.1 5.6 4.7 4.5 3.3 3.2 2.0 2.0 0.8 0.8 0.0 0.0 + 2009 7.0 6.1 5.9 5.5 4.5 4.4 3.2 3.1 1.9 1.9 0.8 0.8 0.0 0.0 + 2010 6.8 5.9 5.8 5.4 4.4 4.3 3.1 3.0 1.8 1.8 0.8 0.7 0.0 0.0 + 2011 6.7 5.8 5.7 5.3 4.4 4.3 3.0 3.0 1.8 1.7 0.7 0.7 0.0 0.0 + 2012 6.6 5.8 5.6 5.2 4.3 4.2 3.0 2.9 1.8 1.7 0.7 0.7 0.0 0.0 + 2013 6.5 5.7 5.6 5.2 4.3 4.2 3.0 2.9 1.8 1.7 0.7 0.7 0.0 0.0 + 2014 6.5 5.7 5.6 5.2 4.4 4.2 3.0 2.9 1.8 1.7 0.7 0.7 0.0 0.0 + 2015 6.4 5.6 5.6 5.2 4.4 4.2 3.1 2.9 1.8 1.7 0.7 0.7 0.0 0.0 + 2016 6.4 5.6 5.6 5.2 4.4 4.3 3.1 3.0 1.8 1.7 0.8 0.7 0.0 0.0 +Sus 1996 10.3 9.4 8.4 8.4 6.6 7.0 5.0 5.5 3.2 3.6 1.4 1.5 0.0 0.0 + 1997 10.1 9.2 8.3 8.2 6.5 6.8 4.9 5.3 3.1 3.5 1.4 1.5 0.0 0.0 + 1998 9.9 8.9 8.1 7.9 6.4 6.6 4.8 5.1 3.1 3.4 1.4 1.5 0.0 0.0 + 1999 9.7 8.6 8.0 7.7 6.3 6.4 4.7 4.9 3.1 3.3 1.4 1.4 0.0 0.0 + 2000 9.4 8.3 7.8 7.4 6.1 6.2 4.6 4.7 3.0 3.2 1.3 1.4 0.0 0.0 + 2001 9.2 8.1 7.6 7.2 6.0 5.9 4.5 4.5 2.9 3.0 1.3 1.3 0.0 0.0 + 2002 8.9 7.8 7.4 6.9 5.8 5.7 4.4 4.4 2.8 2.9 1.3 1.3 0.0 0.0 + 2003 8.6 7.5 7.2 6.7 5.6 5.5 4.2 4.2 2.7 2.8 1.2 1.2 0.0 0.0 + 2004 8.3 7.1 6.9 6.4 5.4 5.3 4.0 4.0 2.6 2.6 1.2 1.2 0.0 0.0 + 2005 7.9 6.8 6.6 6.1 5.2 5.0 3.8 3.8 2.5 2.5 1.1 1.1 0.0 0.0 + 2006 7.5 6.5 6.3 5.9 5.0 4.8 3.7 3.6 2.4 2.3 1.0 1.0 0.0 0.0 + 2007 7.2 6.3 6.1 5.6 4.7 4.6 3.5 3.4 2.3 2.2 1.0 1.0 0.0 0.0 + 2008 6.9 6.0 5.9 5.5 4.6 4.5 3.4 3.3 2.2 2.1 0.9 0.9 0.0 0.0 + 2009 6.7 5.8 5.7 5.3 4.4 4.3 3.3 3.2 2.1 2.0 0.9 0.9 0.0 0.0 + 2010 6.5 5.7 5.6 5.2 4.4 4.3 3.2 3.1 2.0 1.9 0.9 0.8 0.0 0.0 + 2011 6.4 5.6 5.5 5.1 4.3 4.2 3.2 3.1 2.0 1.9 0.9 0.8 0.0 0.0 + 2012 6.4 5.6 5.5 5.1 4.4 4.2 3.2 3.1 2.0 1.9 0.9 0.8 0.0 0.0 + 2013 6.4 5.6 5.5 5.1 4.4 4.3 3.2 3.1 2.0 1.9 0.9 0.8 0.0 0.0 + 2014 6.4 5.6 5.5 5.2 4.5 4.3 3.3 3.1 2.1 1.9 0.9 0.8 0.0 0.0 + 2015 6.4 5.6 5.6 5.2 4.5 4.4 3.3 3.1 2.1 1.9 0.9 0.8 0.0 0.0 + 2016 6.4 5.6 5.6 5.2 4.6 4.4 3.4 3.2 2.1 1.9 0.9 0.8 0.0 0.0 +\end{Soutput} +\end{Schunk} +We now have the relevant points for the graph showing YLL to diabetes +for men and women by age, and calendar year, both under the immunity +and susceptibility models for the calculation of YLL. +\begin{Schunk} +\begin{Sinput} +> plyll <- function(wh){ ++ par( mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(3,1,0)/1.6, bty="n", las=1 ) ++ ++ matplot( a.ref, aYLL[wh,"M",,], ++ type="l", lty=1, col="blue", lwd=1:2, ++ ylim=c(0,12), xlab="Age", ++ ylab="Years lost to DM", yaxs="i" ) ++ abline(v=50,h=1:10,col=gray(0.7)) ++ text( 90, 11, "Men", col="blue", adj=1 ) ++ text( 40, aYLL[wh,"M","40","1996"], "1996", adj=c(0,0), col="blue" ) ++ text( 43, aYLL[wh,"M","44","2016"], "2016", adj=c(1,1), col="blue" ) ++ ++ matplot( a.ref, aYLL[wh,"F",,], ++ type="l", lty=1, col="red", lwd=1:2, ++ ylim=c(0,12), xlab="Age", ++ ylab="Years lost to DM", yaxs="i" ) ++ abline(v=50,h=1:10,col=gray(0.7)) ++ text( 90, 11, "Women", col="red", adj=1 ) ++ text( 40, aYLL[wh,"F","40","1996"], "1996", adj=c(0,0), col="red" ) ++ text( 43, aYLL[wh,"F","44","2016"], "2016", adj=c(1,1), col="red" ) ++ } +> plyll("Imm") +\end{Sinput} +\end{Schunk} +\begin{Schunk} +\begin{Sinput} +> plyll("Tot") +\end{Sinput} +\end{Schunk} +\begin{Schunk} +\begin{Sinput} +> plyll("Sus") +\end{Sinput} +\end{Schunk} +\begin{figure}[h] + \centering + \includegraphics[width=\textwidth]{yll-imm} + \caption{Years of life lost to DM: the difference in expected + residual life time at different ages between persons with and + without diabetes, assuming the persons without diabetes at a given + age remain free from diabetes (immunity assumption --- not + reasonable). The lines refer to date of evaluation; the top lines + refer to 1.1.1996 the bottom ones to 1.1.2016. Blue curves are + men, red women.} + \label{fig:imm} +\end{figure} + +\begin{figure}[h] + \centering + \includegraphics[width=\textwidth]{yll-sus} + \caption{Years of life lost to DM: the difference in expected + residual life time at different ages between persons with and + without diabetes, allowing the persons without diabetes at a given + to contract diabetes and thus be subject to higher mortality. The + lines refer to date of evaluation; the top lines refer to 1.1.1996 + the bottom ones to 1.1.2016. Blue curves are men, red women.} + \label{fig:sus} +\end{figure} + +\begin{figure}[h] + \centering + \includegraphics[width=\textwidth]{yll-tot} + \caption{Years of life lost to DM: the difference in expected + residual life time at different ages between persons with and + without diabetes. Allowance for susceptibility is approximated by + using the total population mortality instead of non-DM + mortality. The lines refer to date of evaluation; the top lines + refer to 1.1.1996 the bottom ones to 1.1.2016. Blue curves are + men, red women.} + \label{fig:tot} +\end{figure} + +From figure \ref{fig:sus} we see that for men aged 50 the years lost to +diabetes has decreased from a bit over 8 to a bit less than 6 years, +and for women from 8.5 to 5 years; so a greater improvement for women. + +\chapter{Practical implementation} + +We have devised functions that wraps these formulae up for practical +use. + +\section{Function definitions} + +% The following code sources the originally formatted code in erl.R in +% order to make it printable with all the formatting and comments in +% it. However the sourcing does not work when compiling the vignettes +% in R CMD build. So when checking the code we comment this out by +% putting eval=FALSE. So depending on whether we actually construct +% the pdf for the inst/doc folder or test (upload) the package, one of +% the following two chunks are run with eval=FALSE and the other with +% eval=TRUE. +% When checking the package +When using the functions it is assumed that the functions $\mu_W$, +$\mu_D$ and $\lambda$ are given as vectors corresponding to +equidistantly (usually tightly) spaced ages from 0 to $K$ where K is +the age where everyone can safely be assumed dead. + +\texttt{surv1} is a simple function that computes the survival +function from a vector of mortality rates, and optionally the +conditional survival given being alive at prespecified ages: +\begin{Schunk} +\begin{Sinput} +> surv1 +\end{Sinput} +\begin{Soutput} +function( int, mu, age.in=0, A=NULL ) +{ +# Computes the survival function from age A till the end, assuming +# that mu is a vector of mortalities in intervals of length int. +# int and mu should be in compatible units that is T and T^-1 for +# some unit T (months, years, ...) + +# impute 0s for NAs +if( any(is.na(mu)) ){ mu[is.na(mu)] <- 0 ; warning("NAs in agument 'mu' set to 0") } + +# age-class boundaries +age <- 0:length(mu)*int + age.in + +# cumulative rates and survival at the boundaries +Mu <- c( 0, cumsum( mu )*int ) +Sv <- exp( -Mu ) +surv <- data.frame( age=age, surv=Sv ) + +# if a vector of conditioning ages A is given +if( cond <- !is.null(A) ) + { + j <- 0 + # actual conditioning ages + cage <- NULL + for( ia in A ) + { + j <- j+1 + # Where is the age we condition on + cA <- which( diff(age>ia)==1 ) + surv <- cbind( surv, pmin( 1, surv$surv/(surv$surv[cA]) ) ) + cage[j] <- surv$age[cA] + } + } +names( surv )[-1] <- paste( "A", c( age.in, if( cond ) cage else NULL ), sep="" ) +rownames( surv ) <- NULL +return( surv ) +} +\end{Soutput} +\end{Schunk} +\texttt{erl1} basically just expands the result of \texttt{surv1} with +a column of expected residual life times: +\begin{Schunk} +\begin{Sinput} +> erl1 +\end{Sinput} +\begin{Soutput} +function( int, mu, age.in = 0 ) +{ +# Computes expected residual life time at all ages +age <- 0:length(mu)*int + age.in + +# Small utility: cumulative cumulative sum from the end of a vector +musmuc <- function( x ) rev( cumsum( rev(x) ) ) + +# The survival function with a 0 at end, and the integral from the upper end +surv <- surv1( int = int, mu = mu, age.in = age.in )[,2] +cbind( age = age, + surv = surv, + erl = c( musmuc( ( surv[-1]-diff(surv)/2 ) ) / + surv[-length(surv)], 0 ) * int ) +} +\end{Soutput} +\end{Schunk} +We also define a function, \texttt{surv2}, that computes the survival +function for a non-diseased person that may become diseased with rate +\texttt{lam} and after that die at a rate of \texttt{muD} +(corresponding to the formulae above). This is the sane way of +handling years of life lost to a particular illness: +\begin{Schunk} +\begin{Sinput} +> surv2 +\end{Sinput} +\begin{Soutput} +function( int, muW, muD, lam, age.in=0, A=NULL ) +{ +# check the vectors +if( length(muW) != length(muD) | + length(muD) != length(lam) ) + stop( "Vectors with rates must have same length:\n", + "length(muW)=", length(muW), + ", length(muD)=", length(muD), + ", length(lam)=", length(lam) ) + +# Replace NAs with 0s +if( !is.null(lam) ) { +if( any(is.na(lam)) ){ lam[is.na(lam)] <- 0 ; warning("NAs in agument 'lam' set to 0") } } +if( any(is.na(muD)) ){ muD[is.na(muD)] <- 0 ; warning("NAs in agument 'muD' set to 0") } +if( any(is.na(muW)) ){ muW[is.na(muW)] <- 0 ; warning("NAs in agument 'muW' set to 0") } + +# First the workhorse that computes the survival function for a +# person in Well assuming that the mortality rate from this state is +# muW, disease incidence is in lam, and mortality in the diseased +# state is muD, and that all refer to constant rates intervals of +# length int starting from age.in, conditional on survival to A +wsurv2 <- +function( int, muW, muD, lam, age.in=0, A=0 ) +{ +# age-class boundaries - note one longer than rate vectors as it +# refers to boundaries of intervals not midpoints +age <- 0:length(muW)*int + age.in + +# cumulative rates at the boundaries, given survival to A +MuW <- cumsum( c( 0, muW ) * ( age > A ) ) * int +MuD <- cumsum( c( 0, muD ) * ( age > A ) ) * int +Lam <- cumsum( c( 0, lam ) * ( age > A ) ) * int + +# probability of being well +pW <- exp( -( Lam + MuW ) ) + +# probability of diagnosis at s --- first term in the integral for +# P(DM at a). Note that we explicitly add a 0 at the start so we get a +# probability of 0 of transition at the first age point +Dis <- c(0,lam) * ( age > A ) * exp( -(Lam+MuW) ) * int + +# for each age (age[ia]) we compute the integral over the range +# [0,age] of the product of the probability of diagnosis and the +# probability of surviving from diagnosis till age ia +pDM <- Dis * 0 +for( ia in 1:length(age) ) + pDM[ia] <- sum( Dis[1:ia] * exp( -(MuD[ia]-MuD[1:ia]) ) ) + # 1st term as function of s (1:ia) + # 2nd term integral over range s:age + # upper integration limit is age (ia) and the lower + # limit is the intermediate age (at DM) (1:ia) +# Finally, we add the probabilities of being in Well resp. DM to get +# the overall survival: +surv <- data.frame( age = age, surv = pDM + pW ) +return( surv ) +} + +# survival from start +surv <- wsurv2( int, muW, muD, lam, age.in=age.in, A=0 ) + +# add columns for conditioning ages +if( !is.null(A) ) + { + for( j in 1:length(A) ) + { + surv <- cbind( surv, + wsurv2( int, muW, muD, lam, age.in=age.in, A=A[j] )[,2] ) + } + } +Al <- A +for( i in 1:length(A) ) Al[i] <- max( surv$age[surv$age <= A[i]] ) +colnames( surv )[-1] <- paste( "A", c( age.in, Al ), sep="" ) + +# done! +return( surv ) +} +\end{Soutput} +\end{Schunk} +Finally we devised a function using these to compute the expected +residual lifetime at select ages: +\begin{Schunk} +\begin{Sinput} +> erl +\end{Sinput} +\begin{Soutput} +function( int, + muW, + muD, + lam = NULL, + age.in = 0, + A = NULL, + immune = is.null(lam), + yll = TRUE, + note = TRUE ) +{ +# Computes expected residual life time for Well and Dis states +# respectively in an illness-death model, optionally ignoring +# the well->ill transition + +# Utility to integrate a survival function from the last point where +# it is 1, assuming points are 1 apart +trsum <- +function( x ) +{ +x[c(diff(x)==0,TRUE)] <- NA +sum( ( x[-length(x)] + x[-1] ) / 2, na.rm=TRUE ) +} + +# Check sensibility +if( !immune & is.null(lam) ) stop( "'lam' is required when immune=FALSE\n" ) + +# Replace NAs with 0s +if( !is.null(lam) ) { +if( any(is.na(lam)) ){ lam[is.na(lam)] <- 0 ; warning("NAs in agument 'lam' set to 0") } } +if( any(is.na(muD)) ){ muD[is.na(muD)] <- 0 ; warning("NAs in agument 'muD' set to 0") } +if( any(is.na(muW)) ){ muW[is.na(muW)] <- 0 ; warning("NAs in agument 'muW' set to 0") } + +# Survival functions + sD <- surv1( int=int, muD, age.in = age.in, A = A ) +if( immune ) sW <- surv1( int=int, muW, age.in = age.in, A = A ) +else sW <- surv2( int=int, muW, muD, lam, age.in = age.in, A = A ) + +# Area under the survival functions +erl <- cbind( apply( sW[,-1], 2, trsum ), + apply( sD[,-1], 2, trsum ) ) * int +colnames( erl ) <- c("Well","Dis") +rownames( erl ) <- colnames( sW )[-1] + +# Should we compute years of life lost? +if( yll ) erl <- cbind( erl, YLL = erl[,"Well"] - erl[,"Dis"] ) + +# Cautionary note +if( immune ) + { + attr( erl, "NOTE" ) <- "Calculations assume that Well persons cannot get Ill (quite silly!)." + if( note ) cat("NOTE:", attr( erl, "NOTE" ), "\n" ) + } +return( erl ) +} +\end{Soutput} +\end{Schunk} +\ldots and a wrapper for this if we only want the years of life lost +returned: +\begin{Schunk} +\begin{Sinput} +> yll +\end{Sinput} +\begin{Soutput} +function( int, + muW, + muD, + lam = NULL, + age.in = 0, + A = NULL, + immune = is.null(lam), + note = TRUE ) erl( int = int, + muW = muW, + muD = muD, + lam = lam, + age.in = age.in, + A = A, + immune = immune, + yll = TRUE, + note = note )[,"YLL"] +\end{Soutput} +\end{Schunk} + +\bibliographystyle{plain} + +\begin{thebibliography}{1} + +\bibitem{Carstensen.2007a} +B~Carstensen. +\newblock Age-{P}eriod-{C}ohort models for the {L}exis diagram. +\newblock {\em Statistics in Medicine}, 26(15):3018--3045, 2007. + +\bibitem{Carstensen.2008c} +B~Carstensen, JK~Kristensen, P~Ottosen, and K~Borch-Johnsen. +\newblock The {D}anish {N}ational {D}iabetes {R}egister: {T}rends in incidence, + prevalence and mortality. +\newblock {\em Diabetologia}, 51:2187--2196, 2008. + +\end{thebibliography} + +\addcontentsline{toc}{chapter}{References} + +\end{document}